/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/IfConversion.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- IfConversion.cpp - Machine code if conversion pass -----------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the machine instruction level if-conversion pass, which |
11 | | // tries to convert conditional branches into predicated instructions. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "BranchFolding.h" |
16 | | #include "llvm/ADT/STLExtras.h" |
17 | | #include "llvm/ADT/ScopeExit.h" |
18 | | #include "llvm/ADT/SmallSet.h" |
19 | | #include "llvm/ADT/SmallVector.h" |
20 | | #include "llvm/ADT/SparseSet.h" |
21 | | #include "llvm/ADT/Statistic.h" |
22 | | #include "llvm/ADT/iterator_range.h" |
23 | | #include "llvm/CodeGen/LivePhysRegs.h" |
24 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
25 | | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
26 | | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
27 | | #include "llvm/CodeGen/MachineFunction.h" |
28 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
29 | | #include "llvm/CodeGen/MachineInstr.h" |
30 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
31 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
32 | | #include "llvm/CodeGen/MachineOperand.h" |
33 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | | #include "llvm/CodeGen/TargetSchedule.h" |
35 | | #include "llvm/IR/DebugLoc.h" |
36 | | #include "llvm/MC/MCRegisterInfo.h" |
37 | | #include "llvm/Pass.h" |
38 | | #include "llvm/Support/BranchProbability.h" |
39 | | #include "llvm/Support/CommandLine.h" |
40 | | #include "llvm/Support/Debug.h" |
41 | | #include "llvm/Support/ErrorHandling.h" |
42 | | #include "llvm/Support/raw_ostream.h" |
43 | | #include "llvm/Target/TargetInstrInfo.h" |
44 | | #include "llvm/Target/TargetLowering.h" |
45 | | #include "llvm/Target/TargetRegisterInfo.h" |
46 | | #include "llvm/Target/TargetSubtargetInfo.h" |
47 | | #include <algorithm> |
48 | | #include <cassert> |
49 | | #include <functional> |
50 | | #include <iterator> |
51 | | #include <memory> |
52 | | #include <utility> |
53 | | #include <vector> |
54 | | |
55 | | using namespace llvm; |
56 | | |
57 | | #define DEBUG_TYPE "if-converter" |
58 | | |
59 | | // Hidden options for help debugging. |
60 | | static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); |
61 | | static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); |
62 | | static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); |
63 | | static cl::opt<bool> DisableSimple("disable-ifcvt-simple", |
64 | | cl::init(false), cl::Hidden); |
65 | | static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", |
66 | | cl::init(false), cl::Hidden); |
67 | | static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", |
68 | | cl::init(false), cl::Hidden); |
69 | | static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", |
70 | | cl::init(false), cl::Hidden); |
71 | | static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", |
72 | | cl::init(false), cl::Hidden); |
73 | | static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", |
74 | | cl::init(false), cl::Hidden); |
75 | | static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", |
76 | | cl::init(false), cl::Hidden); |
77 | | static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond", |
78 | | cl::init(false), cl::Hidden); |
79 | | static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold", |
80 | | cl::init(true), cl::Hidden); |
81 | | |
82 | | STATISTIC(NumSimple, "Number of simple if-conversions performed"); |
83 | | STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); |
84 | | STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); |
85 | | STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); |
86 | | STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); |
87 | | STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); |
88 | | STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); |
89 | | STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed"); |
90 | | STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); |
91 | | STATISTIC(NumDupBBs, "Number of duplicated blocks"); |
92 | | STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); |
93 | | |
94 | | namespace { |
95 | | |
96 | | class IfConverter : public MachineFunctionPass { |
97 | | enum IfcvtKind { |
98 | | ICNotClassfied, // BB data valid, but not classified. |
99 | | ICSimpleFalse, // Same as ICSimple, but on the false path. |
100 | | ICSimple, // BB is entry of an one split, no rejoin sub-CFG. |
101 | | ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. |
102 | | ICTriangleRev, // Same as ICTriangle, but true path rev condition. |
103 | | ICTriangleFalse, // Same as ICTriangle, but on the false path. |
104 | | ICTriangle, // BB is entry of a triangle sub-CFG. |
105 | | ICDiamond, // BB is entry of a diamond sub-CFG. |
106 | | ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a |
107 | | // common tail that can be shared. |
108 | | }; |
109 | | |
110 | | /// One per MachineBasicBlock, this is used to cache the result |
111 | | /// if-conversion feasibility analysis. This includes results from |
112 | | /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its |
113 | | /// classification, and common tail block of its successors (if it's a |
114 | | /// diamond shape), its size, whether it's predicable, and whether any |
115 | | /// instruction can clobber the 'would-be' predicate. |
116 | | /// |
117 | | /// IsDone - True if BB is not to be considered for ifcvt. |
118 | | /// IsBeingAnalyzed - True if BB is currently being analyzed. |
119 | | /// IsAnalyzed - True if BB has been analyzed (info is still valid). |
120 | | /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. |
121 | | /// IsBrAnalyzable - True if analyzeBranch() returns false. |
122 | | /// HasFallThrough - True if BB may fallthrough to the following BB. |
123 | | /// IsUnpredicable - True if BB is known to be unpredicable. |
124 | | /// ClobbersPred - True if BB could modify predicates (e.g. has |
125 | | /// cmp, call, etc.) |
126 | | /// NonPredSize - Number of non-predicated instructions. |
127 | | /// ExtraCost - Extra cost for multi-cycle instructions. |
128 | | /// ExtraCost2 - Some instructions are slower when predicated |
129 | | /// BB - Corresponding MachineBasicBlock. |
130 | | /// TrueBB / FalseBB- See analyzeBranch(). |
131 | | /// BrCond - Conditions for end of block conditional branches. |
132 | | /// Predicate - Predicate used in the BB. |
133 | | struct BBInfo { |
134 | | bool IsDone : 1; |
135 | | bool IsBeingAnalyzed : 1; |
136 | | bool IsAnalyzed : 1; |
137 | | bool IsEnqueued : 1; |
138 | | bool IsBrAnalyzable : 1; |
139 | | bool IsBrReversible : 1; |
140 | | bool HasFallThrough : 1; |
141 | | bool IsUnpredicable : 1; |
142 | | bool CannotBeCopied : 1; |
143 | | bool ClobbersPred : 1; |
144 | | unsigned NonPredSize = 0; |
145 | | unsigned ExtraCost = 0; |
146 | | unsigned ExtraCost2 = 0; |
147 | | MachineBasicBlock *BB = nullptr; |
148 | | MachineBasicBlock *TrueBB = nullptr; |
149 | | MachineBasicBlock *FalseBB = nullptr; |
150 | | SmallVector<MachineOperand, 4> BrCond; |
151 | | SmallVector<MachineOperand, 4> Predicate; |
152 | | |
153 | | BBInfo() : IsDone(false), IsBeingAnalyzed(false), |
154 | | IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), |
155 | | IsBrReversible(false), HasFallThrough(false), |
156 | | IsUnpredicable(false), CannotBeCopied(false), |
157 | 127k | ClobbersPred(false) {} |
158 | | }; |
159 | | |
160 | | /// Record information about pending if-conversions to attempt: |
161 | | /// BBI - Corresponding BBInfo. |
162 | | /// Kind - Type of block. See IfcvtKind. |
163 | | /// NeedSubsumption - True if the to-be-predicated BB has already been |
164 | | /// predicated. |
165 | | /// NumDups - Number of instructions that would be duplicated due |
166 | | /// to this if-conversion. (For diamonds, the number of |
167 | | /// identical instructions at the beginnings of both |
168 | | /// paths). |
169 | | /// NumDups2 - For diamonds, the number of identical instructions |
170 | | /// at the ends of both paths. |
171 | | struct IfcvtToken { |
172 | | BBInfo &BBI; |
173 | | IfcvtKind Kind; |
174 | | unsigned NumDups; |
175 | | unsigned NumDups2; |
176 | | bool NeedSubsumption : 1; |
177 | | bool TClobbersPred : 1; |
178 | | bool FClobbersPred : 1; |
179 | | |
180 | | IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, |
181 | | bool tc = false, bool fc = false) |
182 | | : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), |
183 | 5.14k | TClobbersPred(tc), FClobbersPred(fc) {} |
184 | | }; |
185 | | |
186 | | /// Results of if-conversion feasibility analysis indexed by basic block |
187 | | /// number. |
188 | | std::vector<BBInfo> BBAnalysis; |
189 | | TargetSchedModel SchedModel; |
190 | | |
191 | | const TargetLoweringBase *TLI; |
192 | | const TargetInstrInfo *TII; |
193 | | const TargetRegisterInfo *TRI; |
194 | | const MachineBranchProbabilityInfo *MBPI; |
195 | | MachineRegisterInfo *MRI; |
196 | | |
197 | | LivePhysRegs Redefs; |
198 | | |
199 | | bool PreRegAlloc; |
200 | | bool MadeChange; |
201 | | int FnNum = -1; |
202 | | std::function<bool(const MachineFunction &)> PredicateFtor; |
203 | | |
204 | | public: |
205 | | static char ID; |
206 | | |
207 | | IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr) |
208 | 6.81k | : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { |
209 | 6.81k | initializeIfConverterPass(*PassRegistry::getPassRegistry()); |
210 | 6.81k | } |
211 | | |
212 | 6.79k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
213 | 6.79k | AU.addRequired<MachineBlockFrequencyInfo>(); |
214 | 6.79k | AU.addRequired<MachineBranchProbabilityInfo>(); |
215 | 6.79k | MachineFunctionPass::getAnalysisUsage(AU); |
216 | 6.79k | } |
217 | | |
218 | | bool runOnMachineFunction(MachineFunction &MF) override; |
219 | | |
220 | 6.79k | MachineFunctionProperties getRequiredProperties() const override { |
221 | 6.79k | return MachineFunctionProperties().set( |
222 | 6.79k | MachineFunctionProperties::Property::NoVRegs); |
223 | 6.79k | } |
224 | | |
225 | | private: |
226 | | bool reverseBranchCondition(BBInfo &BBI) const; |
227 | | bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, |
228 | | BranchProbability Prediction) const; |
229 | | bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, |
230 | | bool FalseBranch, unsigned &Dups, |
231 | | BranchProbability Prediction) const; |
232 | | bool CountDuplicatedInstructions( |
233 | | MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, |
234 | | MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, |
235 | | unsigned &Dups1, unsigned &Dups2, |
236 | | MachineBasicBlock &TBB, MachineBasicBlock &FBB, |
237 | | bool SkipUnconditionalBranches) const; |
238 | | bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, |
239 | | unsigned &Dups1, unsigned &Dups2, |
240 | | BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; |
241 | | bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, |
242 | | unsigned &Dups1, unsigned &Dups2, |
243 | | BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; |
244 | | void AnalyzeBranches(BBInfo &BBI); |
245 | | void ScanInstructions(BBInfo &BBI, |
246 | | MachineBasicBlock::iterator &Begin, |
247 | | MachineBasicBlock::iterator &End, |
248 | | bool BranchUnpredicable = false) const; |
249 | | bool RescanInstructions( |
250 | | MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, |
251 | | MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, |
252 | | BBInfo &TrueBBI, BBInfo &FalseBBI) const; |
253 | | void AnalyzeBlock(MachineBasicBlock &MBB, |
254 | | std::vector<std::unique_ptr<IfcvtToken>> &Tokens); |
255 | | bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, |
256 | | bool isTriangle = false, bool RevBranch = false, |
257 | | bool hasCommonTail = false); |
258 | | void AnalyzeBlocks(MachineFunction &MF, |
259 | | std::vector<std::unique_ptr<IfcvtToken>> &Tokens); |
260 | | void InvalidatePreds(MachineBasicBlock &MBB); |
261 | | bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); |
262 | | bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); |
263 | | bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, |
264 | | unsigned NumDups1, unsigned NumDups2, |
265 | | bool TClobbersPred, bool FClobbersPred, |
266 | | bool RemoveBranch, bool MergeAddEdges); |
267 | | bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, |
268 | | unsigned NumDups1, unsigned NumDups2, |
269 | | bool TClobbers, bool FClobbers); |
270 | | bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind, |
271 | | unsigned NumDups1, unsigned NumDups2, |
272 | | bool TClobbers, bool FClobbers); |
273 | | void PredicateBlock(BBInfo &BBI, |
274 | | MachineBasicBlock::iterator E, |
275 | | SmallVectorImpl<MachineOperand> &Cond, |
276 | | SmallSet<unsigned, 4> *LaterRedefs = nullptr); |
277 | | void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, |
278 | | SmallVectorImpl<MachineOperand> &Cond, |
279 | | bool IgnoreBr = false); |
280 | | void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); |
281 | | |
282 | | bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, |
283 | | unsigned Cycle, unsigned Extra, |
284 | 17.0k | BranchProbability Prediction) const { |
285 | 16.7k | return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, |
286 | 16.7k | Prediction); |
287 | 17.0k | } |
288 | | |
289 | | bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, |
290 | | unsigned TCycle, unsigned TExtra, |
291 | | MachineBasicBlock &FBB, |
292 | | unsigned FCycle, unsigned FExtra, |
293 | 479 | BranchProbability Prediction) const { |
294 | 474 | return TCycle > 0 && FCycle > 0 && |
295 | 473 | TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, |
296 | 473 | Prediction); |
297 | 479 | } |
298 | | |
299 | | /// Returns true if Block ends without a terminator. |
300 | 63.9k | bool blockAlwaysFallThrough(BBInfo &BBI) const { |
301 | 42.1k | return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr; |
302 | 63.9k | } |
303 | | |
304 | | /// Used to sort if-conversion candidates. |
305 | | static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1, |
306 | 3.14k | const std::unique_ptr<IfcvtToken> &C2) { |
307 | 3.14k | int Incr1 = (C1->Kind == ICDiamond) |
308 | 3.14k | ? -(int)(C1->NumDups + C1->NumDups2)30 : (int)C1->NumDups3.11k ; |
309 | 3.14k | int Incr2 = (C2->Kind == ICDiamond) |
310 | 3.14k | ? -(int)(C2->NumDups + C2->NumDups2)189 : (int)C2->NumDups2.95k ; |
311 | 3.14k | if (Incr1 > Incr2) |
312 | 269 | return true; |
313 | 2.87k | else if (2.87k Incr1 == Incr22.87k ) { |
314 | 2.74k | // Favors subsumption. |
315 | 2.74k | if (!C1->NeedSubsumption && 2.74k C2->NeedSubsumption2.73k ) |
316 | 0 | return true; |
317 | 2.74k | else if (2.74k C1->NeedSubsumption == C2->NeedSubsumption2.74k ) { |
318 | 2.74k | // Favors diamond over triangle, etc. |
319 | 2.74k | if ((unsigned)C1->Kind < (unsigned)C2->Kind) |
320 | 1.26k | return true; |
321 | 1.47k | else if (1.47k C1->Kind == C2->Kind1.47k ) |
322 | 1.01k | return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); |
323 | 590 | } |
324 | 2.87k | } |
325 | 590 | return false; |
326 | 590 | } |
327 | | }; |
328 | | |
329 | | } // end anonymous namespace |
330 | | |
331 | | char IfConverter::ID = 0; |
332 | | |
333 | | char &llvm::IfConverterID = IfConverter::ID; |
334 | | |
335 | 36.7k | INITIALIZE_PASS_BEGIN36.7k (IfConverter, DEBUG_TYPE, "If Converter", false, false)
|
336 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) |
337 | 36.7k | INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) |
338 | | |
339 | 32.2k | bool IfConverter::runOnMachineFunction(MachineFunction &MF) { |
340 | 32.2k | if (skipFunction(*MF.getFunction()) || 32.2k (PredicateFtor && 32.2k !PredicateFtor(MF)15.8k )) |
341 | 1.09k | return false; |
342 | 31.1k | |
343 | 31.1k | const TargetSubtargetInfo &ST = MF.getSubtarget(); |
344 | 31.1k | TLI = ST.getTargetLowering(); |
345 | 31.1k | TII = ST.getInstrInfo(); |
346 | 31.1k | TRI = ST.getRegisterInfo(); |
347 | 31.1k | BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); |
348 | 31.1k | MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); |
349 | 31.1k | MRI = &MF.getRegInfo(); |
350 | 31.1k | SchedModel.init(ST.getSchedModel(), &ST, TII); |
351 | 31.1k | |
352 | 31.1k | if (!TII31.1k ) return false0 ; |
353 | 31.1k | |
354 | 31.1k | PreRegAlloc = MRI->isSSA(); |
355 | 31.1k | |
356 | 31.1k | bool BFChange = false; |
357 | 31.1k | if (!PreRegAlloc31.1k ) { |
358 | 31.1k | // Tail merge tend to expose more if-conversion opportunities. |
359 | 31.1k | BranchFolder BF(true, false, MBFI, *MBPI); |
360 | 31.1k | BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), |
361 | 31.1k | getAnalysisIfAvailable<MachineModuleInfo>()); |
362 | 31.1k | } |
363 | 31.1k | |
364 | 31.1k | DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" |
365 | 31.1k | << MF.getName() << "\'"); |
366 | 31.1k | |
367 | 31.1k | if (FnNum < IfCvtFnStart || 31.1k (IfCvtFnStop != -1 && 31.0k FnNum > IfCvtFnStop0 )) { |
368 | 52 | DEBUG(dbgs() << " skipped\n"); |
369 | 52 | return false; |
370 | 52 | } |
371 | 31.0k | DEBUG31.0k (dbgs() << "\n"); |
372 | 31.0k | |
373 | 31.0k | MF.RenumberBlocks(); |
374 | 31.0k | BBAnalysis.resize(MF.getNumBlockIDs()); |
375 | 31.0k | |
376 | 31.0k | std::vector<std::unique_ptr<IfcvtToken>> Tokens; |
377 | 31.0k | MadeChange = false; |
378 | 31.0k | unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + |
379 | 31.0k | NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; |
380 | 34.1k | while (IfCvtLimit == -1 || 34.1k (int)NumIfCvts < IfCvtLimit6 ) { |
381 | 34.1k | // Do an initial analysis for each basic block and find all the potential |
382 | 34.1k | // candidates to perform if-conversion. |
383 | 34.1k | bool Change = false; |
384 | 34.1k | AnalyzeBlocks(MF, Tokens); |
385 | 39.3k | while (!Tokens.empty()39.3k ) { |
386 | 5.14k | std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back()); |
387 | 5.14k | Tokens.pop_back(); |
388 | 5.14k | BBInfo &BBI = Token->BBI; |
389 | 5.14k | IfcvtKind Kind = Token->Kind; |
390 | 5.14k | unsigned NumDups = Token->NumDups; |
391 | 5.14k | unsigned NumDups2 = Token->NumDups2; |
392 | 5.14k | |
393 | 5.14k | // If the block has been evicted out of the queue or it has already been |
394 | 5.14k | // marked dead (due to it being predicated), then skip it. |
395 | 5.14k | if (BBI.IsDone) |
396 | 185 | BBI.IsEnqueued = false; |
397 | 5.14k | if (!BBI.IsEnqueued) |
398 | 1.28k | continue; |
399 | 3.86k | |
400 | 3.86k | BBI.IsEnqueued = false; |
401 | 3.86k | |
402 | 3.86k | bool RetVal = false; |
403 | 3.86k | switch (Kind) { |
404 | 0 | default: 0 llvm_unreachable0 ("Unexpected!"); |
405 | 2.43k | case ICSimple: |
406 | 2.43k | case ICSimpleFalse: { |
407 | 2.43k | bool isFalse = Kind == ICSimpleFalse; |
408 | 2.43k | if ((isFalse && 2.43k DisableSimpleF587 ) || (!isFalse && 2.43k DisableSimple1.84k )) break0 ; |
409 | 2.43k | DEBUG2.43k (dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? |
410 | 2.43k | " false" : "") |
411 | 2.43k | << "): BB#" << BBI.BB->getNumber() << " (" |
412 | 2.43k | << ((Kind == ICSimpleFalse) |
413 | 2.43k | ? BBI.FalseBB->getNumber() |
414 | 2.43k | : BBI.TrueBB->getNumber()) << ") "); |
415 | 2.43k | RetVal = IfConvertSimple(BBI, Kind); |
416 | 2.43k | DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); |
417 | 2.43k | if (RetVal2.43k ) { |
418 | 2.28k | if (isFalse2.28k ) ++NumSimpleFalse498 ; |
419 | 1.78k | else ++NumSimple; |
420 | 2.28k | } |
421 | 2.43k | break; |
422 | 2.43k | } |
423 | 1.26k | case ICTriangle: |
424 | 1.26k | case ICTriangleRev: |
425 | 1.26k | case ICTriangleFalse: |
426 | 1.26k | case ICTriangleFRev: { |
427 | 1.26k | bool isFalse = Kind == ICTriangleFalse; |
428 | 1.26k | bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); |
429 | 1.26k | if (DisableTriangle && 1.26k !isFalse0 && !isRev0 ) break0 ; |
430 | 1.26k | if (1.26k DisableTriangleR && 1.26k !isFalse0 && isRev0 ) break0 ; |
431 | 1.26k | if (1.26k DisableTriangleF && 1.26k isFalse0 && !isRev0 ) break0 ; |
432 | 1.26k | if (1.26k DisableTriangleFR && 1.26k isFalse0 && isRev0 ) break0 ; |
433 | 1.26k | DEBUG1.26k (dbgs() << "Ifcvt (Triangle"); |
434 | 1.26k | if (isFalse) |
435 | 1.26k | DEBUG(dbgs() << " false"); |
436 | 1.26k | if (isRev) |
437 | 1.26k | DEBUG(dbgs() << " rev"); |
438 | 1.26k | DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" |
439 | 1.26k | << BBI.TrueBB->getNumber() << ",F:" |
440 | 1.26k | << BBI.FalseBB->getNumber() << ") "); |
441 | 1.26k | RetVal = IfConvertTriangle(BBI, Kind); |
442 | 1.26k | DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); |
443 | 1.26k | if (RetVal1.26k ) { |
444 | 1.26k | if (isFalse1.26k ) { |
445 | 1.06k | if (isRev1.06k ) ++NumTriangleFRev0 ; |
446 | 1.06k | else ++NumTriangleFalse; |
447 | 1.26k | } else { |
448 | 195 | if (isRev195 ) ++NumTriangleRev164 ; |
449 | 31 | else ++NumTriangle; |
450 | 195 | } |
451 | 1.26k | } |
452 | 1.26k | break; |
453 | 1.26k | } |
454 | 168 | case ICDiamond: |
455 | 168 | if (DisableDiamond168 ) break0 ; |
456 | 168 | DEBUG168 (dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" |
457 | 168 | << BBI.TrueBB->getNumber() << ",F:" |
458 | 168 | << BBI.FalseBB->getNumber() << ") "); |
459 | 168 | RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2, |
460 | 168 | Token->TClobbersPred, |
461 | 168 | Token->FClobbersPred); |
462 | 168 | DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); |
463 | 168 | if (RetVal168 ) ++NumDiamonds167 ; |
464 | 168 | break; |
465 | 2 | case ICForkedDiamond: |
466 | 2 | if (DisableForkedDiamond2 ) break0 ; |
467 | 2 | DEBUG2 (dbgs() << "Ifcvt (Forked Diamond): BB#" |
468 | 2 | << BBI.BB->getNumber() << " (T:" |
469 | 2 | << BBI.TrueBB->getNumber() << ",F:" |
470 | 2 | << BBI.FalseBB->getNumber() << ") "); |
471 | 2 | RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, |
472 | 2 | Token->TClobbersPred, |
473 | 2 | Token->FClobbersPred); |
474 | 2 | DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); |
475 | 2 | if (RetVal2 ) ++NumForkedDiamonds2 ; |
476 | 2.43k | break; |
477 | 3.86k | } |
478 | 3.86k | |
479 | 3.86k | if (3.86k RetVal && 3.86k MRI->tracksLiveness()3.71k ) |
480 | 3.67k | recomputeLivenessFlags(*BBI.BB); |
481 | 3.86k | |
482 | 3.86k | Change |= RetVal; |
483 | 3.86k | |
484 | 3.86k | NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + |
485 | 3.86k | NumTriangleFalse + NumTriangleFRev + NumDiamonds; |
486 | 3.86k | if (IfCvtLimit != -1 && 3.86k (int)NumIfCvts >= IfCvtLimit0 ) |
487 | 0 | break; |
488 | 5.14k | } |
489 | 34.1k | |
490 | 34.1k | if (34.1k !Change34.1k ) |
491 | 31.0k | break; |
492 | 3.09k | MadeChange |= Change; |
493 | 3.09k | } |
494 | 31.0k | |
495 | 31.0k | Tokens.clear(); |
496 | 31.0k | BBAnalysis.clear(); |
497 | 31.0k | |
498 | 31.0k | if (MadeChange && 31.0k IfCvtBranchFold2.91k ) { |
499 | 2.91k | BranchFolder BF(false, false, MBFI, *MBPI); |
500 | 2.91k | BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), |
501 | 2.91k | getAnalysisIfAvailable<MachineModuleInfo>()); |
502 | 2.91k | } |
503 | 31.0k | |
504 | 31.0k | MadeChange |= BFChange; |
505 | 31.0k | return MadeChange; |
506 | 32.2k | } |
507 | | |
508 | | /// BB has a fallthrough. Find its 'false' successor given its 'true' successor. |
509 | | static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, |
510 | 26.2k | MachineBasicBlock *TrueBB) { |
511 | 40.1k | for (MachineBasicBlock *SuccBB : BB->successors()) { |
512 | 40.1k | if (SuccBB != TrueBB) |
513 | 26.2k | return SuccBB; |
514 | 2 | } |
515 | 2 | return nullptr; |
516 | 2 | } |
517 | | |
518 | | /// Reverse the condition of the end of the block branch. Swap block's 'true' |
519 | | /// and 'false' successors. |
520 | 240 | bool IfConverter::reverseBranchCondition(BBInfo &BBI) const { |
521 | 240 | DebugLoc dl; // FIXME: this is nowhere |
522 | 240 | if (!TII->reverseBranchCondition(BBI.BrCond)240 ) { |
523 | 240 | TII->removeBranch(*BBI.BB); |
524 | 240 | TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); |
525 | 240 | std::swap(BBI.TrueBB, BBI.FalseBB); |
526 | 240 | return true; |
527 | 240 | } |
528 | 0 | return false; |
529 | 0 | } |
530 | | |
531 | | /// Returns the next block in the function blocks ordering. If it is the end, |
532 | | /// returns NULL. |
533 | 17.7k | static inline MachineBasicBlock *getNextBlock(MachineBasicBlock &MBB) { |
534 | 17.7k | MachineFunction::iterator I = MBB.getIterator(); |
535 | 17.7k | MachineFunction::iterator E = MBB.getParent()->end(); |
536 | 17.7k | if (++I == E) |
537 | 2.61k | return nullptr; |
538 | 15.1k | return &*I; |
539 | 15.1k | } |
540 | | |
541 | | /// Returns true if the 'true' block (along with its predecessor) forms a valid |
542 | | /// simple shape for ifcvt. It also returns the number of instructions that the |
543 | | /// ifcvt would need to duplicate if performed in Dups. |
544 | | bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, |
545 | 48.1k | BranchProbability Prediction) const { |
546 | 48.1k | Dups = 0; |
547 | 48.1k | if (TrueBBI.IsBeingAnalyzed || 48.1k TrueBBI.IsDone46.5k ) |
548 | 2.41k | return false; |
549 | 45.7k | |
550 | 45.7k | if (45.7k TrueBBI.IsBrAnalyzable45.7k ) |
551 | 36.0k | return false; |
552 | 9.66k | |
553 | 9.66k | if (9.66k TrueBBI.BB->pred_size() > 19.66k ) { |
554 | 6.02k | if (TrueBBI.CannotBeCopied || |
555 | 5.98k | !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, |
556 | 5.98k | Prediction)) |
557 | 3.06k | return false; |
558 | 2.96k | Dups = TrueBBI.NonPredSize; |
559 | 2.96k | } |
560 | 9.66k | |
561 | 6.60k | return true; |
562 | 48.1k | } |
563 | | |
564 | | /// Returns true if the 'true' and 'false' blocks (along with their common |
565 | | /// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is |
566 | | /// true, it checks if 'true' block's false branch branches to the 'false' block |
567 | | /// rather than the other way around. It also returns the number of instructions |
568 | | /// that the ifcvt would need to duplicate if performed in 'Dups'. |
569 | | bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, |
570 | | bool FalseBranch, unsigned &Dups, |
571 | 96.2k | BranchProbability Prediction) const { |
572 | 96.2k | Dups = 0; |
573 | 96.2k | if (TrueBBI.IsBeingAnalyzed || 96.2k TrueBBI.IsDone93.0k ) |
574 | 4.82k | return false; |
575 | 91.4k | |
576 | 91.4k | if (91.4k TrueBBI.BB->pred_size() > 191.4k ) { |
577 | 32.7k | if (TrueBBI.CannotBeCopied) |
578 | 210 | return false; |
579 | 32.5k | |
580 | 32.5k | unsigned Size = TrueBBI.NonPredSize; |
581 | 32.5k | if (TrueBBI.IsBrAnalyzable32.5k ) { |
582 | 20.5k | if (TrueBBI.TrueBB && 20.5k TrueBBI.BrCond.empty()12.6k ) |
583 | 20.5k | // Ends with an unconditional branch. It will be removed. |
584 | 3.07k | --Size; |
585 | 17.5k | else { |
586 | 17.5k | MachineBasicBlock *FExit = FalseBranch |
587 | 17.5k | ? TrueBBI.TrueBB8.75k : TrueBBI.FalseBB8.75k ; |
588 | 17.5k | if (FExit) |
589 | 17.5k | // Require a conditional branch |
590 | 9.57k | ++Size; |
591 | 17.5k | } |
592 | 20.5k | } |
593 | 32.5k | if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction)) |
594 | 20.9k | return false; |
595 | 11.6k | Dups = Size; |
596 | 11.6k | } |
597 | 91.4k | |
598 | 70.3k | MachineBasicBlock *TExit = FalseBranch ? 70.3k TrueBBI.FalseBB35.1k : TrueBBI.TrueBB35.1k ; |
599 | 70.3k | if (!TExit && 70.3k blockAlwaysFallThrough(TrueBBI)41.8k ) { |
600 | 23.9k | MachineFunction::iterator I = TrueBBI.BB->getIterator(); |
601 | 23.9k | if (++I == TrueBBI.BB->getParent()->end()) |
602 | 2.29k | return false; |
603 | 21.6k | TExit = &*I; |
604 | 21.6k | } |
605 | 68.0k | return TExit && 68.0k TExit == FalseBBI.BB50.1k ; |
606 | 96.2k | } |
607 | | |
608 | | /// Count duplicated instructions and move the iterators to show where they |
609 | | /// are. |
610 | | /// @param TIB True Iterator Begin |
611 | | /// @param FIB False Iterator Begin |
612 | | /// These two iterators initially point to the first instruction of the two |
613 | | /// blocks, and finally point to the first non-shared instruction. |
614 | | /// @param TIE True Iterator End |
615 | | /// @param FIE False Iterator End |
616 | | /// These two iterators initially point to End() for the two blocks() and |
617 | | /// finally point to the first shared instruction in the tail. |
618 | | /// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of |
619 | | /// two blocks. |
620 | | /// @param Dups1 count of duplicated instructions at the beginning of the 2 |
621 | | /// blocks. |
622 | | /// @param Dups2 count of duplicated instructions at the end of the 2 blocks. |
623 | | /// @param SkipUnconditionalBranches if true, Don't make sure that |
624 | | /// unconditional branches at the end of the blocks are the same. True is |
625 | | /// passed when the blocks are analyzable to allow for fallthrough to be |
626 | | /// handled. |
627 | | /// @return false if the shared portion prevents if conversion. |
628 | | bool IfConverter::CountDuplicatedInstructions( |
629 | | MachineBasicBlock::iterator &TIB, |
630 | | MachineBasicBlock::iterator &FIB, |
631 | | MachineBasicBlock::iterator &TIE, |
632 | | MachineBasicBlock::iterator &FIE, |
633 | | unsigned &Dups1, unsigned &Dups2, |
634 | | MachineBasicBlock &TBB, MachineBasicBlock &FBB, |
635 | 1.44k | bool SkipUnconditionalBranches) const { |
636 | 1.48k | while (TIB != TIE && 1.48k FIB != FIE1.48k ) { |
637 | 1.48k | // Skip dbg_value instructions. These do not count. |
638 | 1.48k | TIB = skipDebugInstructionsForward(TIB, TIE); |
639 | 1.48k | FIB = skipDebugInstructionsForward(FIB, FIE); |
640 | 1.48k | if (TIB == TIE || 1.48k FIB == FIE1.48k ) |
641 | 0 | break; |
642 | 1.48k | if (1.48k !TIB->isIdenticalTo(*FIB)1.48k ) |
643 | 1.43k | break; |
644 | 47 | // A pred-clobbering instruction in the shared portion prevents |
645 | 47 | // if-conversion. |
646 | 47 | std::vector<MachineOperand> PredDefs; |
647 | 47 | if (TII->DefinesPredicate(*TIB, PredDefs)) |
648 | 7 | return false; |
649 | 40 | // If we get all the way to the branch instructions, don't count them. |
650 | 40 | if (40 !TIB->isBranch()40 ) |
651 | 38 | ++Dups1; |
652 | 1.48k | ++TIB; |
653 | 1.48k | ++FIB; |
654 | 1.48k | } |
655 | 1.44k | |
656 | 1.44k | // Check for already containing all of the block. |
657 | 1.43k | if (1.43k TIB == TIE || 1.43k FIB == FIE1.43k ) |
658 | 3 | return true; |
659 | 1.43k | // Now, in preparation for counting duplicate instructions at the ends of the |
660 | 1.43k | // blocks, switch to reverse_iterators. Note that getReverse() returns an |
661 | 1.43k | // iterator that points to the same instruction, unlike std::reverse_iterator. |
662 | 1.43k | // We have to do our own shifting so that we get the same range. |
663 | 1.43k | MachineBasicBlock::reverse_iterator RTIE = std::next(TIE.getReverse()); |
664 | 1.43k | MachineBasicBlock::reverse_iterator RFIE = std::next(FIE.getReverse()); |
665 | 1.43k | const MachineBasicBlock::reverse_iterator RTIB = std::next(TIB.getReverse()); |
666 | 1.43k | const MachineBasicBlock::reverse_iterator RFIB = std::next(FIB.getReverse()); |
667 | 1.43k | |
668 | 1.43k | if (!TBB.succ_empty() || 1.43k !FBB.succ_empty()505 ) { |
669 | 962 | if (SkipUnconditionalBranches962 ) { |
670 | 1.02k | while (RTIE != RTIB && 1.02k RTIE->isUnconditionalBranch()1.02k ) |
671 | 103 | ++RTIE; |
672 | 1.81k | while (RFIE != RFIB && 1.81k RFIE->isUnconditionalBranch()1.81k ) |
673 | 899 | ++RFIE; |
674 | 920 | } |
675 | 962 | } |
676 | 1.43k | |
677 | 1.43k | // Count duplicate instructions at the ends of the blocks. |
678 | 1.84k | while (RTIE != RTIB && 1.84k RFIE != RFIB1.84k ) { |
679 | 1.82k | // Skip dbg_value instructions. These do not count. |
680 | 1.82k | // Note that these are reverse iterators going forward. |
681 | 1.82k | RTIE = skipDebugInstructionsForward(RTIE, RTIB); |
682 | 1.82k | RFIE = skipDebugInstructionsForward(RFIE, RFIB); |
683 | 1.82k | if (RTIE == RTIB || 1.82k RFIE == RFIB1.82k ) |
684 | 0 | break; |
685 | 1.82k | if (1.82k !RTIE->isIdenticalTo(*RFIE)1.82k ) |
686 | 1.42k | break; |
687 | 408 | // We have to verify that any branch instructions are the same, and then we |
688 | 408 | // don't count them toward the # of duplicate instructions. |
689 | 408 | if (408 !RTIE->isBranch()408 ) |
690 | 348 | ++Dups2; |
691 | 1.82k | ++RTIE; |
692 | 1.82k | ++RFIE; |
693 | 1.82k | } |
694 | 1.44k | TIE = std::next(RTIE.getReverse()); |
695 | 1.44k | FIE = std::next(RFIE.getReverse()); |
696 | 1.44k | return true; |
697 | 1.44k | } |
698 | | |
699 | | /// RescanInstructions - Run ScanInstructions on a pair of blocks. |
700 | | /// @param TIB - True Iterator Begin, points to first non-shared instruction |
701 | | /// @param FIB - False Iterator Begin, points to first non-shared instruction |
702 | | /// @param TIE - True Iterator End, points past last non-shared instruction |
703 | | /// @param FIE - False Iterator End, points past last non-shared instruction |
704 | | /// @param TrueBBI - BBInfo to update for the true block. |
705 | | /// @param FalseBBI - BBInfo to update for the false block. |
706 | | /// @returns - false if either block cannot be predicated or if both blocks end |
707 | | /// with a predicate-clobbering instruction. |
708 | | bool IfConverter::RescanInstructions( |
709 | | MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, |
710 | | MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, |
711 | 1.43k | BBInfo &TrueBBI, BBInfo &FalseBBI) const { |
712 | 1.43k | bool BranchUnpredicable = true; |
713 | 1.43k | TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false; |
714 | 1.43k | ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable); |
715 | 1.43k | if (TrueBBI.IsUnpredicable) |
716 | 673 | return false; |
717 | 764 | ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable); |
718 | 764 | if (FalseBBI.IsUnpredicable) |
719 | 235 | return false; |
720 | 529 | if (529 TrueBBI.ClobbersPred && 529 FalseBBI.ClobbersPred62 ) |
721 | 50 | return false; |
722 | 479 | return true; |
723 | 479 | } |
724 | | |
725 | | #ifndef NDEBUG |
726 | | static void verifySameBranchInstructions( |
727 | | MachineBasicBlock *MBB1, |
728 | | MachineBasicBlock *MBB2) { |
729 | | const MachineBasicBlock::reverse_iterator B1 = MBB1->rend(); |
730 | | const MachineBasicBlock::reverse_iterator B2 = MBB2->rend(); |
731 | | MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin(); |
732 | | MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin(); |
733 | | while (E1 != B1 && E2 != B2) { |
734 | | skipDebugInstructionsForward(E1, B1); |
735 | | skipDebugInstructionsForward(E2, B2); |
736 | | if (E1 == B1 && E2 == B2) |
737 | | break; |
738 | | |
739 | | if (E1 == B1) { |
740 | | assert(!E2->isBranch() && "Branch mis-match, one block is empty."); |
741 | | break; |
742 | | } |
743 | | if (E2 == B2) { |
744 | | assert(!E1->isBranch() && "Branch mis-match, one block is empty."); |
745 | | break; |
746 | | } |
747 | | |
748 | | if (E1->isBranch() || E2->isBranch()) |
749 | | assert(E1->isIdenticalTo(*E2) && |
750 | | "Branch mis-match, branch instructions don't match."); |
751 | | else |
752 | | break; |
753 | | ++E1; |
754 | | ++E2; |
755 | | } |
756 | | } |
757 | | #endif |
758 | | |
759 | | /// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along |
760 | | /// with their common predecessor) form a diamond if a common tail block is |
761 | | /// extracted. |
762 | | /// While not strictly a diamond, this pattern would form a diamond if |
763 | | /// tail-merging had merged the shared tails. |
764 | | /// EBB |
765 | | /// _/ \_ |
766 | | /// | | |
767 | | /// TBB FBB |
768 | | /// / \ / \ |
769 | | /// FalseBB TrueBB FalseBB |
770 | | /// Currently only handles analyzable branches. |
771 | | /// Specifically excludes actual diamonds to avoid overlap. |
772 | | bool IfConverter::ValidForkedDiamond( |
773 | | BBInfo &TrueBBI, BBInfo &FalseBBI, |
774 | | unsigned &Dups1, unsigned &Dups2, |
775 | 23.5k | BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { |
776 | 23.5k | Dups1 = Dups2 = 0; |
777 | 23.5k | if (TrueBBI.IsBeingAnalyzed || 23.5k TrueBBI.IsDone22.1k || |
778 | 23.5k | FalseBBI.IsBeingAnalyzed21.7k || FalseBBI.IsDone21.5k ) |
779 | 2.40k | return false; |
780 | 21.1k | |
781 | 21.1k | if (21.1k !TrueBBI.IsBrAnalyzable || 21.1k !FalseBBI.IsBrAnalyzable14.6k ) |
782 | 7.71k | return false; |
783 | 13.4k | // Don't IfConvert blocks that can't be folded into their predecessor. |
784 | 13.4k | if (13.4k TrueBBI.BB->pred_size() > 1 || 13.4k FalseBBI.BB->pred_size() > 16.37k ) |
785 | 7.70k | return false; |
786 | 5.76k | |
787 | 5.76k | // This function is specifically looking for conditional tails, as |
788 | 5.76k | // unconditional tails are already handled by the standard diamond case. |
789 | 5.76k | if (5.76k TrueBBI.BrCond.size() == 0 || |
790 | 2.80k | FalseBBI.BrCond.size() == 0) |
791 | 4.65k | return false; |
792 | 1.10k | |
793 | 1.10k | MachineBasicBlock *TT = TrueBBI.TrueBB; |
794 | 1.10k | MachineBasicBlock *TF = TrueBBI.FalseBB; |
795 | 1.10k | MachineBasicBlock *FT = FalseBBI.TrueBB; |
796 | 1.10k | MachineBasicBlock *FF = FalseBBI.FalseBB; |
797 | 1.10k | |
798 | 1.10k | if (!TT) |
799 | 0 | TT = getNextBlock(*TrueBBI.BB); |
800 | 1.10k | if (!TF) |
801 | 0 | TF = getNextBlock(*TrueBBI.BB); |
802 | 1.10k | if (!FT) |
803 | 0 | FT = getNextBlock(*FalseBBI.BB); |
804 | 1.10k | if (!FF) |
805 | 0 | FF = getNextBlock(*FalseBBI.BB); |
806 | 1.10k | |
807 | 1.10k | if (!TT || 1.10k !TF1.10k ) |
808 | 0 | return false; |
809 | 1.10k | |
810 | 1.10k | // Check successors. If they don't match, bail. |
811 | 1.10k | if (1.10k !((TT == FT && 1.10k TF == FF120 ) || (TF == FT && 1.10k TT == FF159 ))) |
812 | 1.06k | return false; |
813 | 42 | |
814 | 42 | bool FalseReversed = false; |
815 | 42 | if (TF == FT && 42 TT == FF38 ) { |
816 | 38 | // If the branches are opposing, but we can't reverse, don't do it. |
817 | 38 | if (!FalseBBI.IsBrReversible) |
818 | 0 | return false; |
819 | 38 | FalseReversed = true; |
820 | 38 | reverseBranchCondition(FalseBBI); |
821 | 38 | } |
822 | 42 | auto UnReverseOnExit = make_scope_exit([&]() 42 { |
823 | 42 | if (FalseReversed) |
824 | 38 | reverseBranchCondition(FalseBBI); |
825 | 42 | }); |
826 | 42 | |
827 | 42 | // Count duplicate instructions at the beginning of the true and false blocks. |
828 | 42 | MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); |
829 | 42 | MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); |
830 | 42 | MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); |
831 | 42 | MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); |
832 | 42 | if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, |
833 | 42 | *TrueBBI.BB, *FalseBBI.BB, |
834 | 42 | /* SkipUnconditionalBranches */ true)) |
835 | 0 | return false; |
836 | 42 | |
837 | 42 | TrueBBICalc.BB = TrueBBI.BB; |
838 | 42 | FalseBBICalc.BB = FalseBBI.BB; |
839 | 42 | if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) |
840 | 40 | return false; |
841 | 2 | |
842 | 2 | // The size is used to decide whether to if-convert, and the shared portions |
843 | 2 | // are subtracted off. Because of the subtraction, we just use the size that |
844 | 2 | // was calculated by the original ScanInstructions, as it is correct. |
845 | 2 | TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; |
846 | 2 | FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; |
847 | 2 | return true; |
848 | 2 | } |
849 | | |
850 | | /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along |
851 | | /// with their common predecessor) forms a valid diamond shape for ifcvt. |
852 | | bool IfConverter::ValidDiamond( |
853 | | BBInfo &TrueBBI, BBInfo &FalseBBI, |
854 | | unsigned &Dups1, unsigned &Dups2, |
855 | 24.0k | BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { |
856 | 24.0k | Dups1 = Dups2 = 0; |
857 | 24.0k | if (TrueBBI.IsBeingAnalyzed || 24.0k TrueBBI.IsDone22.6k || |
858 | 24.0k | FalseBBI.IsBeingAnalyzed22.2k || FalseBBI.IsDone22.0k ) |
859 | 2.40k | return false; |
860 | 21.6k | |
861 | 21.6k | MachineBasicBlock *TT = TrueBBI.TrueBB; |
862 | 21.6k | MachineBasicBlock *FT = FalseBBI.TrueBB; |
863 | 21.6k | |
864 | 21.6k | if (!TT && 21.6k blockAlwaysFallThrough(TrueBBI)13.0k ) |
865 | 6.58k | TT = getNextBlock(*TrueBBI.BB); |
866 | 21.6k | if (!FT && 21.6k blockAlwaysFallThrough(FalseBBI)8.90k ) |
867 | 6.77k | FT = getNextBlock(*FalseBBI.BB); |
868 | 21.6k | if (TT != FT) |
869 | 17.9k | return false; |
870 | 3.67k | if (3.67k !TT && 3.67k (TrueBBI.IsBrAnalyzable || 1.48k FalseBBI.IsBrAnalyzable931 )) |
871 | 557 | return false; |
872 | 3.11k | if (3.11k TrueBBI.BB->pred_size() > 1 || 3.11k FalseBBI.BB->pred_size() > 12.37k ) |
873 | 911 | return false; |
874 | 2.20k | |
875 | 2.20k | // FIXME: Allow true block to have an early exit? |
876 | 2.20k | if (2.20k TrueBBI.FalseBB || 2.20k FalseBBI.FalseBB1.88k ) |
877 | 805 | return false; |
878 | 1.40k | |
879 | 1.40k | // Count duplicate instructions at the beginning and end of the true and |
880 | 1.40k | // false blocks. |
881 | 1.40k | // Skip unconditional branches only if we are considering an analyzable |
882 | 1.40k | // diamond. Otherwise the branches must be the same. |
883 | 1.40k | bool SkipUnconditionalBranches = |
884 | 882 | TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable; |
885 | 1.40k | MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); |
886 | 1.40k | MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); |
887 | 1.40k | MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); |
888 | 1.40k | MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); |
889 | 1.40k | if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, |
890 | 1.40k | *TrueBBI.BB, *FalseBBI.BB, |
891 | 1.40k | SkipUnconditionalBranches)) |
892 | 7 | return false; |
893 | 1.39k | |
894 | 1.39k | TrueBBICalc.BB = TrueBBI.BB; |
895 | 1.39k | FalseBBICalc.BB = FalseBBI.BB; |
896 | 1.39k | if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) |
897 | 918 | return false; |
898 | 477 | // The size is used to decide whether to if-convert, and the shared portions |
899 | 477 | // are subtracted off. Because of the subtraction, we just use the size that |
900 | 477 | // was calculated by the original ScanInstructions, as it is correct. |
901 | 477 | TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; |
902 | 477 | FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; |
903 | 477 | return true; |
904 | 477 | } |
905 | | |
906 | | /// AnalyzeBranches - Look at the branches at the end of a block to determine if |
907 | | /// the block is predicable. |
908 | 88.2k | void IfConverter::AnalyzeBranches(BBInfo &BBI) { |
909 | 88.2k | if (BBI.IsDone) |
910 | 3.50k | return; |
911 | 84.7k | |
912 | 84.7k | BBI.TrueBB = BBI.FalseBB = nullptr; |
913 | 84.7k | BBI.BrCond.clear(); |
914 | 84.7k | BBI.IsBrAnalyzable = |
915 | 84.7k | !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); |
916 | 84.7k | SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); |
917 | 84.7k | BBI.IsBrReversible = (RevCond.size() == 0) || |
918 | 27.7k | !TII->reverseBranchCondition(RevCond); |
919 | 52.4k | BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; |
920 | 84.7k | |
921 | 84.7k | if (BBI.BrCond.size()84.7k ) { |
922 | 27.7k | // No false branch. This BB must end with a conditional branch and a |
923 | 27.7k | // fallthrough. |
924 | 27.7k | if (!BBI.FalseBB) |
925 | 26.2k | BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); |
926 | 27.7k | if (!BBI.FalseBB27.7k ) { |
927 | 2 | // Malformed bcc? True and false blocks are the same? |
928 | 2 | BBI.IsUnpredicable = true; |
929 | 2 | } |
930 | 27.7k | } |
931 | 88.2k | } |
932 | | |
933 | | /// ScanInstructions - Scan all the instructions in the block to determine if |
934 | | /// the block is predicable. In most cases, that means all the instructions |
935 | | /// in the block are isPredicable(). Also checks if the block contains any |
936 | | /// instruction which can clobber a predicate (e.g. condition code register). |
937 | | /// If so, the block is not predicable unless it's the last instruction. |
938 | | void IfConverter::ScanInstructions(BBInfo &BBI, |
939 | | MachineBasicBlock::iterator &Begin, |
940 | | MachineBasicBlock::iterator &End, |
941 | 90.4k | bool BranchUnpredicable) const { |
942 | 90.4k | if (BBI.IsDone || 90.4k BBI.IsUnpredicable86.9k ) |
943 | 6.05k | return; |
944 | 84.4k | |
945 | 84.4k | bool AlreadyPredicated = !BBI.Predicate.empty(); |
946 | 84.4k | |
947 | 84.4k | BBI.NonPredSize = 0; |
948 | 84.4k | BBI.ExtraCost = 0; |
949 | 84.4k | BBI.ExtraCost2 = 0; |
950 | 84.4k | BBI.ClobbersPred = false; |
951 | 198k | for (MachineInstr &MI : make_range(Begin, End)) { |
952 | 198k | if (MI.isDebugValue()) |
953 | 55 | continue; |
954 | 198k | |
955 | 198k | // It's unsafe to duplicate convergent instructions in this context, so set |
956 | 198k | // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the |
957 | 198k | // following CFG, which is subject to our "simple" transformation. |
958 | 198k | // |
959 | 198k | // BB0 // if (c1) goto BB1; else goto BB2; |
960 | 198k | // / \ |
961 | 198k | // BB1 | |
962 | 198k | // | BB2 // if (c2) goto TBB; else goto FBB; |
963 | 198k | // | / | |
964 | 198k | // | / | |
965 | 198k | // TBB | |
966 | 198k | // | | |
967 | 198k | // | FBB |
968 | 198k | // | |
969 | 198k | // exit |
970 | 198k | // |
971 | 198k | // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd |
972 | 198k | // be unconditional, and in BB2, they'd be predicated upon c2), and suppose |
973 | 198k | // TBB contains a convergent instruction. This is safe iff doing so does |
974 | 198k | // not add a control-flow dependency to the convergent instruction -- i.e., |
975 | 198k | // it's safe iff the set of control flows that leads us to the convergent |
976 | 198k | // instruction does not get smaller after the transformation. |
977 | 198k | // |
978 | 198k | // Originally we executed TBB if c1 || c2. After the transformation, there |
979 | 198k | // are two copies of TBB's instructions. We get to the first if c1, and we |
980 | 198k | // get to the second if !c1 && c2. |
981 | 198k | // |
982 | 198k | // There are clearly fewer ways to satisfy the condition "c1" than |
983 | 198k | // "c1 || c2". Since we've shrunk the set of control flows which lead to |
984 | 198k | // our convergent instruction, the transformation is unsafe. |
985 | 198k | if (198k MI.isNotDuplicable() || 198k MI.isConvergent()191k ) |
986 | 6.84k | BBI.CannotBeCopied = true; |
987 | 198k | |
988 | 198k | bool isPredicated = TII->isPredicated(MI); |
989 | 134k | bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch(); |
990 | 198k | |
991 | 198k | if (BranchUnpredicable && 198k MI.isBranch()5.51k ) { |
992 | 13 | BBI.IsUnpredicable = true; |
993 | 13 | return; |
994 | 13 | } |
995 | 198k | |
996 | 198k | // A conditional branch is not predicable, but it may be eliminated. |
997 | 198k | if (198k isCondBr198k ) |
998 | 15.4k | continue; |
999 | 183k | |
1000 | 183k | if (183k !isPredicated183k ) { |
1001 | 179k | BBI.NonPredSize++; |
1002 | 179k | unsigned ExtraPredCost = TII->getPredicationCost(MI); |
1003 | 179k | unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false); |
1004 | 179k | if (NumCycles > 1) |
1005 | 29.0k | BBI.ExtraCost += NumCycles-1; |
1006 | 179k | BBI.ExtraCost2 += ExtraPredCost; |
1007 | 183k | } else if (4.11k !AlreadyPredicated4.11k ) { |
1008 | 1.68k | // FIXME: This instruction is already predicated before the |
1009 | 1.68k | // if-conversion pass. It's probably something like a conditional move. |
1010 | 1.68k | // Mark this block unpredicable for now. |
1011 | 1.68k | BBI.IsUnpredicable = true; |
1012 | 1.68k | return; |
1013 | 1.68k | } |
1014 | 181k | |
1015 | 181k | if (181k BBI.ClobbersPred && 181k !isPredicated9.56k ) { |
1016 | 7.27k | // Predicate modification instruction should end the block (except for |
1017 | 7.27k | // already predicated instructions and end of block branches). |
1018 | 7.27k | // Predicate may have been modified, the subsequent (currently) |
1019 | 7.27k | // unpredicated instructions cannot be correctly predicated. |
1020 | 7.27k | BBI.IsUnpredicable = true; |
1021 | 7.27k | return; |
1022 | 7.27k | } |
1023 | 174k | |
1024 | 174k | // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are |
1025 | 174k | // still potentially predicable. |
1026 | 174k | std::vector<MachineOperand> PredDefs; |
1027 | 174k | if (TII->DefinesPredicate(MI, PredDefs)) |
1028 | 26.8k | BBI.ClobbersPred = true; |
1029 | 174k | |
1030 | 174k | if (!TII->isPredicable(MI)174k ) { |
1031 | 37.7k | BBI.IsUnpredicable = true; |
1032 | 37.7k | return; |
1033 | 37.7k | } |
1034 | 37.6k | } |
1035 | 90.4k | } |
1036 | | |
1037 | | /// Determine if the block is a suitable candidate to be predicated by the |
1038 | | /// specified predicate. |
1039 | | /// @param BBI BBInfo for the block to check |
1040 | | /// @param Pred Predicate array for the branch that leads to BBI |
1041 | | /// @param isTriangle true if the Analysis is for a triangle |
1042 | | /// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false |
1043 | | /// case |
1044 | | /// @param hasCommonTail true if BBI shares a tail with a sibling block that |
1045 | | /// contains any instruction that would make the block unpredicable. |
1046 | | bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, |
1047 | | SmallVectorImpl<MachineOperand> &Pred, |
1048 | | bool isTriangle, bool RevBranch, |
1049 | 13.4k | bool hasCommonTail) { |
1050 | 13.4k | // If the block is dead or unpredicable, then it cannot be predicated. |
1051 | 13.4k | // Two blocks may share a common unpredicable tail, but this doesn't prevent |
1052 | 13.4k | // them from being if-converted. The non-shared portion is assumed to have |
1053 | 13.4k | // been checked |
1054 | 13.4k | if (BBI.IsDone || 13.4k (BBI.IsUnpredicable && 13.4k !hasCommonTail6.92k )) |
1055 | 6.90k | return false; |
1056 | 6.53k | |
1057 | 6.53k | // If it is already predicated but we couldn't analyze its terminator, the |
1058 | 6.53k | // latter might fallthrough, but we can't determine where to. |
1059 | 6.53k | // Conservatively avoid if-converting again. |
1060 | 6.53k | if (6.53k BBI.Predicate.size() && 6.53k !BBI.IsBrAnalyzable100 ) |
1061 | 0 | return false; |
1062 | 6.53k | |
1063 | 6.53k | // If it is already predicated, check if the new predicate subsumes |
1064 | 6.53k | // its predicate. |
1065 | 6.53k | if (6.53k BBI.Predicate.size() && 6.53k !TII->SubsumesPredicate(Pred, BBI.Predicate)100 ) |
1066 | 80 | return false; |
1067 | 6.45k | |
1068 | 6.45k | if (6.45k !hasCommonTail && 6.45k BBI.BrCond.size()5.49k ) { |
1069 | 1.10k | if (!isTriangle) |
1070 | 0 | return false; |
1071 | 1.10k | |
1072 | 1.10k | // Test predicate subsumption. |
1073 | 1.10k | SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end()); |
1074 | 1.10k | SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); |
1075 | 1.10k | if (RevBranch1.10k ) { |
1076 | 283 | if (TII->reverseBranchCondition(Cond)) |
1077 | 3 | return false; |
1078 | 1.10k | } |
1079 | 1.10k | if (1.10k TII->reverseBranchCondition(RevPred) || |
1080 | 1.10k | !TII->SubsumesPredicate(Cond, RevPred)) |
1081 | 514 | return false; |
1082 | 5.93k | } |
1083 | 5.93k | |
1084 | 5.93k | return true; |
1085 | 5.93k | } |
1086 | | |
1087 | | /// Analyze the structure of the sub-CFG starting from the specified block. |
1088 | | /// Record its successors and whether it looks like an if-conversion candidate. |
1089 | | void IfConverter::AnalyzeBlock( |
1090 | 107k | MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { |
1091 | 107k | struct BBState { |
1092 | 156k | BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {} |
1093 | 107k | MachineBasicBlock *MBB; |
1094 | 107k | |
1095 | 107k | /// This flag is true if MBB's successors have been analyzed. |
1096 | 107k | bool SuccsAnalyzed; |
1097 | 107k | }; |
1098 | 107k | |
1099 | 107k | // Push MBB to the stack. |
1100 | 107k | SmallVector<BBState, 16> BBStack(1, MBB); |
1101 | 107k | |
1102 | 288k | while (!BBStack.empty()288k ) { |
1103 | 180k | BBState &State = BBStack.back(); |
1104 | 180k | MachineBasicBlock *BB = State.MBB; |
1105 | 180k | BBInfo &BBI = BBAnalysis[BB->getNumber()]; |
1106 | 180k | |
1107 | 180k | if (!State.SuccsAnalyzed180k ) { |
1108 | 156k | if (BBI.IsAnalyzed || 156k BBI.IsBeingAnalyzed89.8k ) { |
1109 | 67.8k | BBStack.pop_back(); |
1110 | 67.8k | continue; |
1111 | 67.8k | } |
1112 | 88.2k | |
1113 | 88.2k | BBI.BB = BB; |
1114 | 88.2k | BBI.IsBeingAnalyzed = true; |
1115 | 88.2k | |
1116 | 88.2k | AnalyzeBranches(BBI); |
1117 | 88.2k | MachineBasicBlock::iterator Begin = BBI.BB->begin(); |
1118 | 88.2k | MachineBasicBlock::iterator End = BBI.BB->end(); |
1119 | 88.2k | ScanInstructions(BBI, Begin, End); |
1120 | 88.2k | |
1121 | 88.2k | // Unanalyzable or ends with fallthrough or unconditional branch, or if is |
1122 | 88.2k | // not considered for ifcvt anymore. |
1123 | 88.2k | if (!BBI.IsBrAnalyzable || 88.2k BBI.BrCond.empty()54.7k || BBI.IsDone28.9k ) { |
1124 | 60.6k | BBI.IsBeingAnalyzed = false; |
1125 | 60.6k | BBI.IsAnalyzed = true; |
1126 | 60.6k | BBStack.pop_back(); |
1127 | 60.6k | continue; |
1128 | 60.6k | } |
1129 | 27.6k | |
1130 | 27.6k | // Do not ifcvt if either path is a back edge to the entry block. |
1131 | 27.6k | if (27.6k BBI.TrueBB == BB || 27.6k BBI.FalseBB == BB24.1k ) { |
1132 | 3.53k | BBI.IsBeingAnalyzed = false; |
1133 | 3.53k | BBI.IsAnalyzed = true; |
1134 | 3.53k | BBStack.pop_back(); |
1135 | 3.53k | continue; |
1136 | 3.53k | } |
1137 | 24.1k | |
1138 | 24.1k | // Do not ifcvt if true and false fallthrough blocks are the same. |
1139 | 24.1k | if (24.1k !BBI.FalseBB24.1k ) { |
1140 | 2 | BBI.IsBeingAnalyzed = false; |
1141 | 2 | BBI.IsAnalyzed = true; |
1142 | 2 | BBStack.pop_back(); |
1143 | 2 | continue; |
1144 | 2 | } |
1145 | 24.1k | |
1146 | 24.1k | // Push the False and True blocks to the stack. |
1147 | 24.1k | State.SuccsAnalyzed = true; |
1148 | 24.1k | BBStack.push_back(*BBI.FalseBB); |
1149 | 24.1k | BBStack.push_back(*BBI.TrueBB); |
1150 | 24.1k | continue; |
1151 | 24.1k | } |
1152 | 24.1k | |
1153 | 24.1k | BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; |
1154 | 24.1k | BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; |
1155 | 24.1k | |
1156 | 24.1k | if (TrueBBI.IsDone && 24.1k FalseBBI.IsDone409 ) { |
1157 | 31 | BBI.IsBeingAnalyzed = false; |
1158 | 31 | BBI.IsAnalyzed = true; |
1159 | 31 | BBStack.pop_back(); |
1160 | 31 | continue; |
1161 | 31 | } |
1162 | 24.0k | |
1163 | 24.0k | SmallVector<MachineOperand, 4> |
1164 | 24.0k | RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); |
1165 | 24.0k | bool CanRevCond = !TII->reverseBranchCondition(RevCond); |
1166 | 24.0k | |
1167 | 24.0k | unsigned Dups = 0; |
1168 | 24.0k | unsigned Dups2 = 0; |
1169 | 24.0k | bool TNeedSub = !TrueBBI.Predicate.empty(); |
1170 | 24.0k | bool FNeedSub = !FalseBBI.Predicate.empty(); |
1171 | 24.0k | bool Enqueued = false; |
1172 | 24.0k | |
1173 | 24.0k | BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); |
1174 | 24.0k | |
1175 | 24.0k | if (CanRevCond24.0k ) { |
1176 | 24.0k | BBInfo TrueBBICalc, FalseBBICalc; |
1177 | 479 | auto feasibleDiamond = [&]() { |
1178 | 479 | bool MeetsSize = MeetIfcvtSizeLimit( |
1179 | 479 | *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + |
1180 | 479 | TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, |
1181 | 479 | *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + |
1182 | 479 | FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, |
1183 | 479 | Prediction); |
1184 | 479 | bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, |
1185 | 479 | /* IsTriangle */ false, /* RevCond */ false, |
1186 | 479 | /* hasCommonTail */ true); |
1187 | 479 | bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond, |
1188 | 479 | /* IsTriangle */ false, /* RevCond */ false, |
1189 | 479 | /* hasCommonTail */ true); |
1190 | 479 | return MeetsSize && TrueFeasible170 && FalseFeasible170 ; |
1191 | 479 | }; |
1192 | 24.0k | |
1193 | 24.0k | if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, |
1194 | 24.0k | TrueBBICalc, FalseBBICalc)) { |
1195 | 477 | if (feasibleDiamond()477 ) { |
1196 | 168 | // Diamond: |
1197 | 168 | // EBB |
1198 | 168 | // / \_ |
1199 | 168 | // | | |
1200 | 168 | // TBB FBB |
1201 | 168 | // \ / |
1202 | 168 | // TailBB |
1203 | 168 | // Note TailBB can be empty. |
1204 | 168 | Tokens.push_back(llvm::make_unique<IfcvtToken>( |
1205 | 168 | BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, |
1206 | 168 | (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); |
1207 | 168 | Enqueued = true; |
1208 | 168 | } |
1209 | 24.0k | } else if (23.5k ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, |
1210 | 23.5k | TrueBBICalc, FalseBBICalc)) { |
1211 | 2 | if (feasibleDiamond()2 ) { |
1212 | 2 | // ForkedDiamond: |
1213 | 2 | // if TBB and FBB have a common tail that includes their conditional |
1214 | 2 | // branch instructions, then we can If Convert this pattern. |
1215 | 2 | // EBB |
1216 | 2 | // _/ \_ |
1217 | 2 | // | | |
1218 | 2 | // TBB FBB |
1219 | 2 | // / \ / \ |
1220 | 2 | // FalseBB TrueBB FalseBB |
1221 | 2 | // |
1222 | 2 | Tokens.push_back(llvm::make_unique<IfcvtToken>( |
1223 | 2 | BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, |
1224 | 2 | (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); |
1225 | 2 | Enqueued = true; |
1226 | 2 | } |
1227 | 23.5k | } |
1228 | 24.0k | } |
1229 | 24.0k | |
1230 | 24.0k | if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && |
1231 | 154 | MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, |
1232 | 154 | TrueBBI.ExtraCost2, Prediction) && |
1233 | 24.0k | FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)97 ) { |
1234 | 31 | // Triangle: |
1235 | 31 | // EBB |
1236 | 31 | // | \_ |
1237 | 31 | // | | |
1238 | 31 | // | TBB |
1239 | 31 | // | / |
1240 | 31 | // FBB |
1241 | 31 | Tokens.push_back( |
1242 | 31 | llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups)); |
1243 | 31 | Enqueued = true; |
1244 | 31 | } |
1245 | 24.0k | |
1246 | 24.0k | if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) && |
1247 | 17 | MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, |
1248 | 17 | TrueBBI.ExtraCost2, Prediction) && |
1249 | 24.0k | FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)10 ) { |
1250 | 1 | Tokens.push_back( |
1251 | 1 | llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups)); |
1252 | 1 | Enqueued = true; |
1253 | 1 | } |
1254 | 24.0k | |
1255 | 24.0k | if (ValidSimple(TrueBBI, Dups, Prediction) && |
1256 | 4.24k | MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, |
1257 | 4.24k | TrueBBI.ExtraCost2, Prediction) && |
1258 | 24.0k | FeasibilityAnalysis(TrueBBI, BBI.BrCond)3.27k ) { |
1259 | 2.15k | // Simple (split, no rejoin): |
1260 | 2.15k | // EBB |
1261 | 2.15k | // | \_ |
1262 | 2.15k | // | | |
1263 | 2.15k | // | TBB---> exit |
1264 | 2.15k | // | |
1265 | 2.15k | // FBB |
1266 | 2.15k | Tokens.push_back( |
1267 | 2.15k | llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups)); |
1268 | 2.15k | Enqueued = true; |
1269 | 2.15k | } |
1270 | 24.0k | |
1271 | 24.0k | if (CanRevCond24.0k ) { |
1272 | 24.0k | // Try the other path... |
1273 | 24.0k | if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, |
1274 | 24.0k | Prediction.getCompl()) && |
1275 | 5.62k | MeetIfcvtSizeLimit(*FalseBBI.BB, |
1276 | 5.62k | FalseBBI.NonPredSize + FalseBBI.ExtraCost, |
1277 | 5.62k | FalseBBI.ExtraCost2, Prediction.getCompl()) && |
1278 | 24.0k | FeasibilityAnalysis(FalseBBI, RevCond, true)3.86k ) { |
1279 | 1.16k | Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse, |
1280 | 1.16k | FNeedSub, Dups)); |
1281 | 1.16k | Enqueued = true; |
1282 | 1.16k | } |
1283 | 24.0k | |
1284 | 24.0k | if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, |
1285 | 24.0k | Prediction.getCompl()) && |
1286 | 4.66k | MeetIfcvtSizeLimit(*FalseBBI.BB, |
1287 | 4.66k | FalseBBI.NonPredSize + FalseBBI.ExtraCost, |
1288 | 4.66k | FalseBBI.ExtraCost2, Prediction.getCompl()) && |
1289 | 24.0k | FeasibilityAnalysis(FalseBBI, RevCond, true, true)3.32k ) { |
1290 | 922 | Tokens.push_back( |
1291 | 922 | llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups)); |
1292 | 922 | Enqueued = true; |
1293 | 922 | } |
1294 | 24.0k | |
1295 | 24.0k | if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) && |
1296 | 2.36k | MeetIfcvtSizeLimit(*FalseBBI.BB, |
1297 | 2.36k | FalseBBI.NonPredSize + FalseBBI.ExtraCost, |
1298 | 2.36k | FalseBBI.ExtraCost2, Prediction.getCompl()) && |
1299 | 24.0k | FeasibilityAnalysis(FalseBBI, RevCond)1.90k ) { |
1300 | 711 | Tokens.push_back( |
1301 | 711 | llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups)); |
1302 | 711 | Enqueued = true; |
1303 | 711 | } |
1304 | 24.0k | } |
1305 | 180k | |
1306 | 180k | BBI.IsEnqueued = Enqueued; |
1307 | 180k | BBI.IsBeingAnalyzed = false; |
1308 | 180k | BBI.IsAnalyzed = true; |
1309 | 180k | BBStack.pop_back(); |
1310 | 180k | } |
1311 | 107k | } |
1312 | | |
1313 | | /// Analyze all blocks and find entries for all if-conversion candidates. |
1314 | | void IfConverter::AnalyzeBlocks( |
1315 | 34.1k | MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { |
1316 | 34.1k | for (MachineBasicBlock &MBB : MF) |
1317 | 107k | AnalyzeBlock(MBB, Tokens); |
1318 | 34.1k | |
1319 | 34.1k | // Sort to favor more complex ifcvt scheme. |
1320 | 34.1k | std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); |
1321 | 34.1k | } |
1322 | | |
1323 | | /// Returns true either if ToMBB is the next block after MBB or that all the |
1324 | | /// intervening blocks are empty (given MBB can fall through to its next block). |
1325 | 3.54k | static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) { |
1326 | 3.54k | MachineFunction::iterator PI = MBB.getIterator(); |
1327 | 3.54k | MachineFunction::iterator I = std::next(PI); |
1328 | 3.54k | MachineFunction::iterator TI = ToMBB.getIterator(); |
1329 | 3.54k | MachineFunction::iterator E = MBB.getParent()->end(); |
1330 | 3.54k | while (I != TI3.54k ) { |
1331 | 624 | // Check isSuccessor to avoid case where the next block is empty, but |
1332 | 624 | // it's not a successor. |
1333 | 624 | if (I == E || 624 !I->empty()624 || !PI->isSuccessor(&*I)46 ) |
1334 | 623 | return false; |
1335 | 1 | PI = I++; |
1336 | 1 | } |
1337 | 3.54k | // Finally see if the last I is indeed a successor to PI. |
1338 | 2.92k | return PI->isSuccessor(&*I); |
1339 | 3.54k | } |
1340 | | |
1341 | | /// Invalidate predecessor BB info so it would be re-analyzed to determine if it |
1342 | | /// can be if-converted. If predecessor is already enqueued, dequeue it! |
1343 | 3.71k | void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) { |
1344 | 2.70k | for (const MachineBasicBlock *Predecessor : MBB.predecessors()) { |
1345 | 2.70k | BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()]; |
1346 | 2.70k | if (PBBI.IsDone || 2.70k PBBI.BB == &MBB2.61k ) |
1347 | 92 | continue; |
1348 | 2.61k | PBBI.IsAnalyzed = false; |
1349 | 2.61k | PBBI.IsEnqueued = false; |
1350 | 2.61k | } |
1351 | 3.71k | } |
1352 | | |
1353 | | /// Inserts an unconditional branch from \p MBB to \p ToMBB. |
1354 | | static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB, |
1355 | 716 | const TargetInstrInfo *TII) { |
1356 | 716 | DebugLoc dl; // FIXME: this is nowhere |
1357 | 716 | SmallVector<MachineOperand, 0> NoCond; |
1358 | 716 | TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl); |
1359 | 716 | } |
1360 | | |
1361 | | /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all |
1362 | | /// values defined in MI which are also live/used by MI. |
1363 | 6.61k | static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { |
1364 | 6.61k | const TargetRegisterInfo *TRI = MI.getParent()->getParent() |
1365 | 6.61k | ->getSubtarget().getRegisterInfo(); |
1366 | 6.61k | |
1367 | 6.61k | // Before stepping forward past MI, remember which regs were live |
1368 | 6.61k | // before MI. This is needed to set the Undef flag only when reg is |
1369 | 6.61k | // dead. |
1370 | 6.61k | SparseSet<unsigned> LiveBeforeMI; |
1371 | 6.61k | LiveBeforeMI.setUniverse(TRI->getNumRegs()); |
1372 | 6.61k | for (unsigned Reg : Redefs) |
1373 | 226k | LiveBeforeMI.insert(Reg); |
1374 | 6.61k | |
1375 | 6.61k | SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers; |
1376 | 6.61k | Redefs.stepForward(MI, Clobbers); |
1377 | 6.61k | |
1378 | 6.61k | // Now add the implicit uses for each of the clobbered values. |
1379 | 6.91k | for (auto Clobber : Clobbers) { |
1380 | 6.91k | // FIXME: Const cast here is nasty, but better than making StepForward |
1381 | 6.91k | // take a mutable instruction instead of const. |
1382 | 6.91k | unsigned Reg = Clobber.first; |
1383 | 6.91k | MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second); |
1384 | 6.91k | MachineInstr *OpMI = Op.getParent(); |
1385 | 6.91k | MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI); |
1386 | 6.91k | if (Op.isRegMask()6.91k ) { |
1387 | 291 | // First handle regmasks. They clobber any entries in the mask which |
1388 | 291 | // means that we need a def for those registers. |
1389 | 291 | if (LiveBeforeMI.count(Reg)) |
1390 | 291 | MIB.addReg(Reg, RegState::Implicit); |
1391 | 291 | |
1392 | 291 | // We also need to add an implicit def of this register for the later |
1393 | 291 | // use to read from. |
1394 | 291 | // For the register allocator to have allocated a register clobbered |
1395 | 291 | // by the call which is used later, it must be the case that |
1396 | 291 | // the call doesn't return. |
1397 | 291 | MIB.addReg(Reg, RegState::Implicit | RegState::Define); |
1398 | 291 | continue; |
1399 | 291 | } |
1400 | 6.62k | if (6.62k LiveBeforeMI.count(Reg)6.62k ) |
1401 | 1.60k | MIB.addReg(Reg, RegState::Implicit); |
1402 | 5.01k | else { |
1403 | 5.01k | bool HasLiveSubReg = false; |
1404 | 5.06k | for (MCSubRegIterator S(Reg, TRI); S.isValid()5.06k ; ++S43 ) { |
1405 | 46 | if (!LiveBeforeMI.count(*S)) |
1406 | 43 | continue; |
1407 | 3 | HasLiveSubReg = true; |
1408 | 3 | break; |
1409 | 3 | } |
1410 | 5.01k | if (HasLiveSubReg) |
1411 | 3 | MIB.addReg(Reg, RegState::Implicit); |
1412 | 5.01k | } |
1413 | 6.91k | } |
1414 | 6.61k | } |
1415 | | |
1416 | | /// If convert a simple (split, no rejoin) sub-CFG. |
1417 | 2.43k | bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { |
1418 | 2.43k | BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; |
1419 | 2.43k | BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; |
1420 | 2.43k | BBInfo *CvtBBI = &TrueBBI; |
1421 | 2.43k | BBInfo *NextBBI = &FalseBBI; |
1422 | 2.43k | |
1423 | 2.43k | SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); |
1424 | 2.43k | if (Kind == ICSimpleFalse) |
1425 | 587 | std::swap(CvtBBI, NextBBI); |
1426 | 2.43k | |
1427 | 2.43k | MachineBasicBlock &CvtMBB = *CvtBBI->BB; |
1428 | 2.43k | MachineBasicBlock &NextMBB = *NextBBI->BB; |
1429 | 2.43k | if (CvtBBI->IsDone || |
1430 | 2.43k | (CvtBBI->CannotBeCopied && 2.28k CvtMBB.pred_size() > 10 )) { |
1431 | 148 | // Something has changed. It's no longer safe to predicate this block. |
1432 | 148 | BBI.IsAnalyzed = false; |
1433 | 148 | CvtBBI->IsAnalyzed = false; |
1434 | 148 | return false; |
1435 | 148 | } |
1436 | 2.28k | |
1437 | 2.28k | if (2.28k CvtMBB.hasAddressTaken()2.28k ) |
1438 | 2.28k | // Conservatively abort if-conversion if BB's address is taken. |
1439 | 1 | return false; |
1440 | 2.28k | |
1441 | 2.28k | if (2.28k Kind == ICSimpleFalse2.28k ) |
1442 | 498 | if (498 TII->reverseBranchCondition(Cond)498 ) |
1443 | 0 | llvm_unreachable("Unable to reverse branch condition!"); |
1444 | 2.28k | |
1445 | 2.28k | Redefs.init(*TRI); |
1446 | 2.28k | |
1447 | 2.28k | if (MRI->tracksLiveness()2.28k ) { |
1448 | 2.27k | // Initialize liveins to the first BB. These are potentiall redefined by |
1449 | 2.27k | // predicated instructions. |
1450 | 2.27k | Redefs.addLiveIns(CvtMBB); |
1451 | 2.27k | Redefs.addLiveIns(NextMBB); |
1452 | 2.27k | } |
1453 | 2.28k | |
1454 | 2.28k | // Remove the branches from the entry so we can add the contents of the true |
1455 | 2.28k | // block to it. |
1456 | 2.28k | BBI.NonPredSize -= TII->removeBranch(*BBI.BB); |
1457 | 2.28k | |
1458 | 2.28k | if (CvtMBB.pred_size() > 12.28k ) { |
1459 | 1.26k | // Copy instructions in the true block, predicate them, and add them to |
1460 | 1.26k | // the entry block. |
1461 | 1.26k | CopyAndPredicateBlock(BBI, *CvtBBI, Cond); |
1462 | 1.26k | |
1463 | 1.26k | // Keep the CFG updated. |
1464 | 1.26k | BBI.BB->removeSuccessor(&CvtMBB, true); |
1465 | 2.28k | } else { |
1466 | 1.01k | // Predicate the instructions in the true block. |
1467 | 1.01k | PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); |
1468 | 1.01k | |
1469 | 1.01k | // Merge converted block into entry block. The BB to Cvt edge is removed |
1470 | 1.01k | // by MergeBlocks. |
1471 | 1.01k | MergeBlocks(BBI, *CvtBBI); |
1472 | 1.01k | } |
1473 | 2.28k | |
1474 | 2.28k | bool IterIfcvt = true; |
1475 | 2.28k | if (!canFallThroughTo(*BBI.BB, NextMBB)2.28k ) { |
1476 | 234 | InsertUncondBranch(*BBI.BB, NextMBB, TII); |
1477 | 234 | BBI.HasFallThrough = false; |
1478 | 234 | // Now ifcvt'd block will look like this: |
1479 | 234 | // BB: |
1480 | 234 | // ... |
1481 | 234 | // t, f = cmp |
1482 | 234 | // if t op |
1483 | 234 | // b BBf |
1484 | 234 | // |
1485 | 234 | // We cannot further ifcvt this block because the unconditional branch |
1486 | 234 | // will have to be predicated on the new condition, that will not be |
1487 | 234 | // available if cmp executes. |
1488 | 234 | IterIfcvt = false; |
1489 | 234 | } |
1490 | 2.28k | |
1491 | 2.28k | // Update block info. BB can be iteratively if-converted. |
1492 | 2.28k | if (!IterIfcvt) |
1493 | 234 | BBI.IsDone = true; |
1494 | 2.28k | InvalidatePreds(*BBI.BB); |
1495 | 2.28k | CvtBBI->IsDone = true; |
1496 | 2.28k | |
1497 | 2.28k | // FIXME: Must maintain LiveIns. |
1498 | 2.28k | return true; |
1499 | 2.43k | } |
1500 | | |
1501 | | /// If convert a triangle sub-CFG. |
1502 | 1.26k | bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { |
1503 | 1.26k | BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; |
1504 | 1.26k | BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; |
1505 | 1.26k | BBInfo *CvtBBI = &TrueBBI; |
1506 | 1.26k | BBInfo *NextBBI = &FalseBBI; |
1507 | 1.26k | DebugLoc dl; // FIXME: this is nowhere |
1508 | 1.26k | |
1509 | 1.26k | SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); |
1510 | 1.26k | if (Kind == ICTriangleFalse || 1.26k Kind == ICTriangleFRev195 ) |
1511 | 1.23k | std::swap(CvtBBI, NextBBI); |
1512 | 1.26k | |
1513 | 1.26k | MachineBasicBlock &CvtMBB = *CvtBBI->BB; |
1514 | 1.26k | MachineBasicBlock &NextMBB = *NextBBI->BB; |
1515 | 1.26k | if (CvtBBI->IsDone || |
1516 | 1.26k | (CvtBBI->CannotBeCopied && 1.26k CvtMBB.pred_size() > 10 )) { |
1517 | 0 | // Something has changed. It's no longer safe to predicate this block. |
1518 | 0 | BBI.IsAnalyzed = false; |
1519 | 0 | CvtBBI->IsAnalyzed = false; |
1520 | 0 | return false; |
1521 | 0 | } |
1522 | 1.26k | |
1523 | 1.26k | if (1.26k CvtMBB.hasAddressTaken()1.26k ) |
1524 | 1.26k | // Conservatively abort if-conversion if BB's address is taken. |
1525 | 3 | return false; |
1526 | 1.26k | |
1527 | 1.26k | if (1.26k Kind == ICTriangleFalse || 1.26k Kind == ICTriangleFRev195 ) |
1528 | 1.23k | if (1.23k TII->reverseBranchCondition(Cond)1.23k ) |
1529 | 0 | llvm_unreachable("Unable to reverse branch condition!"); |
1530 | 1.26k | |
1531 | 1.26k | if (1.26k Kind == ICTriangleRev || 1.26k Kind == ICTriangleFRev1.26k ) { |
1532 | 164 | if (reverseBranchCondition(*CvtBBI)164 ) { |
1533 | 164 | // BB has been changed, modify its predecessors (except for this |
1534 | 164 | // one) so they don't get ifcvt'ed based on bad intel. |
1535 | 164 | for (MachineBasicBlock *PBB : CvtMBB.predecessors()) { |
1536 | 164 | if (PBB == BBI.BB) |
1537 | 164 | continue; |
1538 | 0 | BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; |
1539 | 0 | if (PBBI.IsEnqueued0 ) { |
1540 | 0 | PBBI.IsAnalyzed = false; |
1541 | 0 | PBBI.IsEnqueued = false; |
1542 | 0 | } |
1543 | 164 | } |
1544 | 164 | } |
1545 | 164 | } |
1546 | 1.26k | |
1547 | 1.26k | // Initialize liveins to the first BB. These are potentially redefined by |
1548 | 1.26k | // predicated instructions. |
1549 | 1.26k | Redefs.init(*TRI); |
1550 | 1.26k | if (MRI->tracksLiveness()1.26k ) { |
1551 | 1.23k | Redefs.addLiveIns(CvtMBB); |
1552 | 1.23k | Redefs.addLiveIns(NextMBB); |
1553 | 1.23k | } |
1554 | 1.26k | |
1555 | 1.26k | bool HasEarlyExit = CvtBBI->FalseBB != nullptr; |
1556 | 1.26k | BranchProbability CvtNext, CvtFalse, BBNext, BBCvt; |
1557 | 1.26k | |
1558 | 1.26k | if (HasEarlyExit1.26k ) { |
1559 | 538 | // Get probabilities before modifying CvtMBB and BBI.BB. |
1560 | 538 | CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB); |
1561 | 538 | CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB); |
1562 | 538 | BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB); |
1563 | 538 | BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); |
1564 | 538 | } |
1565 | 1.26k | |
1566 | 1.26k | // Remove the branches from the entry so we can add the contents of the true |
1567 | 1.26k | // block to it. |
1568 | 1.26k | BBI.NonPredSize -= TII->removeBranch(*BBI.BB); |
1569 | 1.26k | |
1570 | 1.26k | if (CvtMBB.pred_size() > 11.26k ) { |
1571 | 24 | // Copy instructions in the true block, predicate them, and add them to |
1572 | 24 | // the entry block. |
1573 | 24 | CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); |
1574 | 1.26k | } else { |
1575 | 1.23k | // Predicate the 'true' block after removing its branch. |
1576 | 1.23k | CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); |
1577 | 1.23k | PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); |
1578 | 1.23k | |
1579 | 1.23k | // Now merge the entry of the triangle with the true block. |
1580 | 1.23k | MergeBlocks(BBI, *CvtBBI, false); |
1581 | 1.23k | } |
1582 | 1.26k | |
1583 | 1.26k | // Keep the CFG updated. |
1584 | 1.26k | BBI.BB->removeSuccessor(&CvtMBB, true); |
1585 | 1.26k | |
1586 | 1.26k | // If 'true' block has a 'false' successor, add an exit branch to it. |
1587 | 1.26k | if (HasEarlyExit1.26k ) { |
1588 | 538 | SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), |
1589 | 538 | CvtBBI->BrCond.end()); |
1590 | 538 | if (TII->reverseBranchCondition(RevCond)) |
1591 | 0 | llvm_unreachable("Unable to reverse branch condition!"); |
1592 | 538 | |
1593 | 538 | // Update the edge probability for both CvtBBI->FalseBB and NextBBI. |
1594 | 538 | // NewNext = New_Prob(BBI.BB, NextMBB) = |
1595 | 538 | // Prob(BBI.BB, NextMBB) + |
1596 | 538 | // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB) |
1597 | 538 | // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) = |
1598 | 538 | // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB) |
1599 | 538 | auto NewTrueBB = getNextBlock(*BBI.BB); |
1600 | 538 | auto NewNext = BBNext + BBCvt * CvtNext; |
1601 | 538 | auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB); |
1602 | 538 | if (NewTrueBBIter != BBI.BB->succ_end()) |
1603 | 186 | BBI.BB->setSuccProbability(NewTrueBBIter, NewNext); |
1604 | 538 | |
1605 | 538 | auto NewFalse = BBCvt * CvtFalse; |
1606 | 538 | TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); |
1607 | 538 | BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse); |
1608 | 538 | } |
1609 | 1.26k | |
1610 | 1.26k | // Merge in the 'false' block if the 'false' block has no other |
1611 | 1.26k | // predecessors. Otherwise, add an unconditional branch to 'false'. |
1612 | 1.26k | bool FalseBBDead = false; |
1613 | 1.26k | bool IterIfcvt = true; |
1614 | 1.26k | bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB); |
1615 | 1.26k | if (!isFallThrough1.26k ) { |
1616 | 390 | // Only merge them if the true block does not fallthrough to the false |
1617 | 390 | // block. By not merging them, we make it possible to iteratively |
1618 | 390 | // ifcvt the blocks. |
1619 | 390 | if (!HasEarlyExit && |
1620 | 390 | NextMBB.pred_size() == 138 && !NextBBI->HasFallThrough5 && |
1621 | 390 | !NextMBB.hasAddressTaken()2 ) { |
1622 | 2 | MergeBlocks(BBI, *NextBBI); |
1623 | 2 | FalseBBDead = true; |
1624 | 390 | } else { |
1625 | 388 | InsertUncondBranch(*BBI.BB, NextMBB, TII); |
1626 | 388 | BBI.HasFallThrough = false; |
1627 | 388 | } |
1628 | 390 | // Mixed predicated and unpredicated code. This cannot be iteratively |
1629 | 390 | // predicated. |
1630 | 390 | IterIfcvt = false; |
1631 | 390 | } |
1632 | 1.26k | |
1633 | 1.26k | // Update block info. BB can be iteratively if-converted. |
1634 | 1.26k | if (!IterIfcvt) |
1635 | 390 | BBI.IsDone = true; |
1636 | 1.26k | InvalidatePreds(*BBI.BB); |
1637 | 1.26k | CvtBBI->IsDone = true; |
1638 | 1.26k | if (FalseBBDead) |
1639 | 2 | NextBBI->IsDone = true; |
1640 | 1.26k | |
1641 | 1.26k | // FIXME: Must maintain LiveIns. |
1642 | 1.26k | return true; |
1643 | 1.26k | } |
1644 | | |
1645 | | /// Common code shared between diamond conversions. |
1646 | | /// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape. |
1647 | | /// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI |
1648 | | /// and FalseBBI |
1649 | | /// \p NumDups2 - number of shared instructions at the end of \p TrueBBI |
1650 | | /// and \p FalseBBI |
1651 | | /// \p RemoveBranch - Remove the common branch of the two blocks before |
1652 | | /// predicating. Only false for unanalyzable fallthrough |
1653 | | /// cases. The caller will replace the branch if necessary. |
1654 | | /// \p MergeAddEdges - Add successor edges when merging blocks. Only false for |
1655 | | /// unanalyzable fallthrough |
1656 | | bool IfConverter::IfConvertDiamondCommon( |
1657 | | BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, |
1658 | | unsigned NumDups1, unsigned NumDups2, |
1659 | | bool TClobbersPred, bool FClobbersPred, |
1660 | 170 | bool RemoveBranch, bool MergeAddEdges) { |
1661 | 170 | |
1662 | 170 | if (TrueBBI.IsDone || 170 FalseBBI.IsDone170 || |
1663 | 170 | TrueBBI.BB->pred_size() > 1170 || FalseBBI.BB->pred_size() > 1170 ) { |
1664 | 0 | // Something has changed. It's no longer safe to predicate these blocks. |
1665 | 0 | BBI.IsAnalyzed = false; |
1666 | 0 | TrueBBI.IsAnalyzed = false; |
1667 | 0 | FalseBBI.IsAnalyzed = false; |
1668 | 0 | return false; |
1669 | 0 | } |
1670 | 170 | |
1671 | 170 | if (170 TrueBBI.BB->hasAddressTaken() || 170 FalseBBI.BB->hasAddressTaken()170 ) |
1672 | 170 | // Conservatively abort if-conversion if either BB has its address taken. |
1673 | 1 | return false; |
1674 | 169 | |
1675 | 169 | // Put the predicated instructions from the 'true' block before the |
1676 | 169 | // instructions from the 'false' block, unless the true block would clobber |
1677 | 169 | // the predicate, in which case, do the opposite. |
1678 | 169 | BBInfo *BBI1 = &TrueBBI; |
1679 | 169 | BBInfo *BBI2 = &FalseBBI; |
1680 | 169 | SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); |
1681 | 169 | if (TII->reverseBranchCondition(RevCond)) |
1682 | 0 | llvm_unreachable("Unable to reverse branch condition!"); |
1683 | 169 | SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; |
1684 | 169 | SmallVector<MachineOperand, 4> *Cond2 = &RevCond; |
1685 | 169 | |
1686 | 169 | // Figure out the more profitable ordering. |
1687 | 169 | bool DoSwap = false; |
1688 | 169 | if (TClobbersPred && 169 !FClobbersPred0 ) |
1689 | 0 | DoSwap = true; |
1690 | 169 | else if (169 !TClobbersPred && 169 !FClobbersPred169 ) { |
1691 | 168 | if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) |
1692 | 47 | DoSwap = true; |
1693 | 169 | } else if (1 TClobbersPred && 1 FClobbersPred0 ) |
1694 | 0 | llvm_unreachable("Predicate info cannot be clobbered by both sides."); |
1695 | 169 | if (169 DoSwap169 ) { |
1696 | 47 | std::swap(BBI1, BBI2); |
1697 | 47 | std::swap(Cond1, Cond2); |
1698 | 47 | } |
1699 | 169 | |
1700 | 169 | // Remove the conditional branch from entry to the blocks. |
1701 | 169 | BBI.NonPredSize -= TII->removeBranch(*BBI.BB); |
1702 | 169 | |
1703 | 169 | MachineBasicBlock &MBB1 = *BBI1->BB; |
1704 | 169 | MachineBasicBlock &MBB2 = *BBI2->BB; |
1705 | 169 | |
1706 | 169 | // Initialize the Redefs: |
1707 | 169 | // - BB2 live-in regs need implicit uses before being redefined by BB1 |
1708 | 169 | // instructions. |
1709 | 169 | // - BB1 live-out regs need implicit uses before being redefined by BB2 |
1710 | 169 | // instructions. We start with BB1 live-ins so we have the live-out regs |
1711 | 169 | // after tracking the BB1 instructions. |
1712 | 169 | Redefs.init(*TRI); |
1713 | 169 | if (MRI->tracksLiveness()169 ) { |
1714 | 165 | Redefs.addLiveIns(MBB1); |
1715 | 165 | Redefs.addLiveIns(MBB2); |
1716 | 165 | } |
1717 | 169 | |
1718 | 169 | // Remove the duplicated instructions at the beginnings of both paths. |
1719 | 169 | // Skip dbg_value instructions |
1720 | 169 | MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(); |
1721 | 169 | MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(); |
1722 | 169 | BBI1->NonPredSize -= NumDups1; |
1723 | 169 | BBI2->NonPredSize -= NumDups1; |
1724 | 169 | |
1725 | 169 | // Skip past the dups on each side separately since there may be |
1726 | 169 | // differing dbg_value entries. |
1727 | 178 | for (unsigned i = 0; i < NumDups1178 ; ++DI19 ) { |
1728 | 9 | if (!DI1->isDebugValue()) |
1729 | 9 | ++i; |
1730 | 9 | } |
1731 | 178 | while (NumDups1 != 0178 ) { |
1732 | 9 | ++DI2; |
1733 | 9 | if (!DI2->isDebugValue()) |
1734 | 9 | --NumDups1; |
1735 | 9 | } |
1736 | 169 | |
1737 | 169 | if (MRI->tracksLiveness()169 ) { |
1738 | 9 | for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) { |
1739 | 9 | SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy; |
1740 | 9 | Redefs.stepForward(MI, Dummy); |
1741 | 9 | } |
1742 | 165 | } |
1743 | 169 | BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1); |
1744 | 169 | MBB2.erase(MBB2.begin(), DI2); |
1745 | 169 | |
1746 | 169 | // The branches have been checked to match, so it is safe to remove the branch |
1747 | 169 | // in BB1 and rely on the copy in BB2 |
1748 | | #ifndef NDEBUG |
1749 | | // Unanalyzable branches must match exactly. Check that now. |
1750 | | if (!BBI1->IsBrAnalyzable) |
1751 | | verifySameBranchInstructions(&MBB1, &MBB2); |
1752 | | #endif |
1753 | | BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB); |
1754 | 169 | // Remove duplicated instructions. |
1755 | 169 | DI1 = MBB1.end(); |
1756 | 242 | for (unsigned i = 0; i != NumDups2242 ; ) { |
1757 | 73 | // NumDups2 only counted non-dbg_value instructions, so this won't |
1758 | 73 | // run off the head of the list. |
1759 | 73 | assert(DI1 != MBB1.begin()); |
1760 | 73 | --DI1; |
1761 | 73 | // skip dbg_value instructions |
1762 | 73 | if (!DI1->isDebugValue()) |
1763 | 73 | ++i; |
1764 | 73 | } |
1765 | 169 | MBB1.erase(DI1, MBB1.end()); |
1766 | 169 | |
1767 | 169 | DI2 = BBI2->BB->end(); |
1768 | 169 | // The branches have been checked to match. Skip over the branch in the false |
1769 | 169 | // block so that we don't try to predicate it. |
1770 | 169 | if (RemoveBranch) |
1771 | 101 | BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB); |
1772 | 68 | else { |
1773 | 76 | do { |
1774 | 76 | assert(DI2 != MBB2.begin()); |
1775 | 76 | DI2--; |
1776 | 76 | } while (DI2->isBranch() || 76 DI2->isDebugValue()68 ); |
1777 | 68 | DI2++; |
1778 | 68 | } |
1779 | 242 | while (NumDups2 != 0242 ) { |
1780 | 73 | // NumDups2 only counted non-dbg_value instructions, so this won't |
1781 | 73 | // run off the head of the list. |
1782 | 73 | assert(DI2 != MBB2.begin()); |
1783 | 73 | --DI2; |
1784 | 73 | // skip dbg_value instructions |
1785 | 73 | if (!DI2->isDebugValue()) |
1786 | 73 | --NumDups2; |
1787 | 73 | } |
1788 | 169 | |
1789 | 169 | // Remember which registers would later be defined by the false block. |
1790 | 169 | // This allows us not to predicate instructions in the true block that would |
1791 | 169 | // later be re-defined. That is, rather than |
1792 | 169 | // subeq r0, r1, #1 |
1793 | 169 | // addne r0, r1, #1 |
1794 | 169 | // generate: |
1795 | 169 | // sub r0, r1, #1 |
1796 | 169 | // addne r0, r1, #1 |
1797 | 169 | SmallSet<unsigned, 4> RedefsByFalse; |
1798 | 169 | SmallSet<unsigned, 4> ExtUses; |
1799 | 169 | if (TII->isProfitableToUnpredicate(MBB1, MBB2)169 ) { |
1800 | 35 | for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) { |
1801 | 35 | if (FI.isDebugValue()) |
1802 | 0 | continue; |
1803 | 35 | SmallVector<unsigned, 4> Defs; |
1804 | 152 | for (const MachineOperand &MO : FI.operands()) { |
1805 | 152 | if (!MO.isReg()) |
1806 | 66 | continue; |
1807 | 86 | unsigned Reg = MO.getReg(); |
1808 | 86 | if (!Reg) |
1809 | 46 | continue; |
1810 | 40 | if (40 MO.isDef()40 ) { |
1811 | 14 | Defs.push_back(Reg); |
1812 | 40 | } else if (26 !RedefsByFalse.count(Reg)26 ) { |
1813 | 22 | // These are defined before ctrl flow reach the 'false' instructions. |
1814 | 22 | // They cannot be modified by the 'true' instructions. |
1815 | 22 | for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); |
1816 | 44 | SubRegs.isValid()44 ; ++SubRegs22 ) |
1817 | 22 | ExtUses.insert(*SubRegs); |
1818 | 26 | } |
1819 | 152 | } |
1820 | 35 | |
1821 | 14 | for (unsigned Reg : Defs) { |
1822 | 14 | if (!ExtUses.count(Reg)14 ) { |
1823 | 11 | for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); |
1824 | 22 | SubRegs.isValid()22 ; ++SubRegs11 ) |
1825 | 11 | RedefsByFalse.insert(*SubRegs); |
1826 | 11 | } |
1827 | 14 | } |
1828 | 35 | } |
1829 | 16 | } |
1830 | 169 | |
1831 | 169 | // Predicate the 'true' block. |
1832 | 169 | PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse); |
1833 | 169 | |
1834 | 169 | // After predicating BBI1, if there is a predicated terminator in BBI1 and |
1835 | 169 | // a non-predicated in BBI2, then we don't want to predicate the one from |
1836 | 169 | // BBI2. The reason is that if we merged these blocks, we would end up with |
1837 | 169 | // two predicated terminators in the same block. |
1838 | 169 | if (!MBB2.empty() && 169 (DI2 == MBB2.end())169 ) { |
1839 | 94 | MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator(); |
1840 | 94 | MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator(); |
1841 | 94 | if (BBI1T != MBB1.end() && 94 TII->isPredicated(*BBI1T)1 && |
1842 | 94 | BBI2T != MBB2.end()1 && !TII->isPredicated(*BBI2T)1 ) |
1843 | 1 | --DI2; |
1844 | 94 | } |
1845 | 169 | |
1846 | 169 | // Predicate the 'false' block. |
1847 | 169 | PredicateBlock(*BBI2, DI2, *Cond2); |
1848 | 169 | |
1849 | 169 | // Merge the true block into the entry of the diamond. |
1850 | 169 | MergeBlocks(BBI, *BBI1, MergeAddEdges); |
1851 | 169 | MergeBlocks(BBI, *BBI2, MergeAddEdges); |
1852 | 169 | return true; |
1853 | 170 | } |
1854 | | |
1855 | | /// If convert an almost-diamond sub-CFG where the true |
1856 | | /// and false blocks share a common tail. |
1857 | | bool IfConverter::IfConvertForkedDiamond( |
1858 | | BBInfo &BBI, IfcvtKind Kind, |
1859 | | unsigned NumDups1, unsigned NumDups2, |
1860 | 2 | bool TClobbersPred, bool FClobbersPred) { |
1861 | 2 | BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; |
1862 | 2 | BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; |
1863 | 2 | |
1864 | 2 | // Save the debug location for later. |
1865 | 2 | DebugLoc dl; |
1866 | 2 | MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator(); |
1867 | 2 | if (TIE != TrueBBI.BB->end()) |
1868 | 2 | dl = TIE->getDebugLoc(); |
1869 | 2 | // Removing branches from both blocks is safe, because we have already |
1870 | 2 | // determined that both blocks have the same branch instructions. The branch |
1871 | 2 | // will be added back at the end, unpredicated. |
1872 | 2 | if (!IfConvertDiamondCommon( |
1873 | 2 | BBI, TrueBBI, FalseBBI, |
1874 | 2 | NumDups1, NumDups2, |
1875 | 2 | TClobbersPred, FClobbersPred, |
1876 | 2 | /* RemoveBranch */ true, /* MergeAddEdges */ true)) |
1877 | 0 | return false; |
1878 | 2 | |
1879 | 2 | // Add back the branch. |
1880 | 2 | // Debug location saved above when removing the branch from BBI2 |
1881 | 2 | TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, |
1882 | 2 | TrueBBI.BrCond, dl); |
1883 | 2 | |
1884 | 2 | // Update block info. |
1885 | 2 | BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; |
1886 | 2 | InvalidatePreds(*BBI.BB); |
1887 | 2 | |
1888 | 2 | // FIXME: Must maintain LiveIns. |
1889 | 2 | return true; |
1890 | 2 | } |
1891 | | |
1892 | | /// If convert a diamond sub-CFG. |
1893 | | bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, |
1894 | | unsigned NumDups1, unsigned NumDups2, |
1895 | 168 | bool TClobbersPred, bool FClobbersPred) { |
1896 | 168 | BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; |
1897 | 168 | BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; |
1898 | 168 | MachineBasicBlock *TailBB = TrueBBI.TrueBB; |
1899 | 168 | |
1900 | 168 | // True block must fall through or end with an unanalyzable terminator. |
1901 | 168 | if (!TailBB168 ) { |
1902 | 151 | if (blockAlwaysFallThrough(TrueBBI)) |
1903 | 82 | TailBB = FalseBBI.TrueBB; |
1904 | 151 | assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); |
1905 | 151 | } |
1906 | 168 | |
1907 | 168 | if (!IfConvertDiamondCommon( |
1908 | 168 | BBI, TrueBBI, FalseBBI, |
1909 | 168 | NumDups1, NumDups2, |
1910 | 168 | TClobbersPred, FClobbersPred, |
1911 | 168 | /* RemoveBranch */ TrueBBI.IsBrAnalyzable, |
1912 | 168 | /* MergeAddEdges */ TailBB == nullptr)) |
1913 | 1 | return false; |
1914 | 167 | |
1915 | 167 | // If the if-converted block falls through or unconditionally branches into |
1916 | 167 | // the tail block, and the tail block does not have other predecessors, then |
1917 | 167 | // fold the tail block in as well. Otherwise, unless it falls through to the |
1918 | 167 | // tail, add a unconditional branch to it. |
1919 | 167 | if (167 TailBB167 ) { |
1920 | 99 | // We need to remove the edges to the true and false blocks manually since |
1921 | 99 | // we didn't let IfConvertDiamondCommon update the CFG. |
1922 | 99 | BBI.BB->removeSuccessor(TrueBBI.BB); |
1923 | 99 | BBI.BB->removeSuccessor(FalseBBI.BB, true); |
1924 | 99 | |
1925 | 99 | BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; |
1926 | 99 | bool CanMergeTail = !TailBBI.HasFallThrough && |
1927 | 44 | !TailBBI.BB->hasAddressTaken(); |
1928 | 99 | // The if-converted block can still have a predicated terminator |
1929 | 99 | // (e.g. a predicated return). If that is the case, we cannot merge |
1930 | 99 | // it with the tail block. |
1931 | 99 | MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator(); |
1932 | 99 | if (TI != BBI.BB->end() && 99 TII->isPredicated(*TI)0 ) |
1933 | 0 | CanMergeTail = false; |
1934 | 99 | // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; |
1935 | 99 | // check if there are any other predecessors besides those. |
1936 | 99 | unsigned NumPreds = TailBB->pred_size(); |
1937 | 99 | if (NumPreds > 1) |
1938 | 80 | CanMergeTail = false; |
1939 | 19 | else if (19 NumPreds == 1 && 19 CanMergeTail19 ) { |
1940 | 5 | MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); |
1941 | 5 | if (*PI != TrueBBI.BB && 5 *PI != FalseBBI.BB2 ) |
1942 | 0 | CanMergeTail = false; |
1943 | 19 | } |
1944 | 99 | if (CanMergeTail99 ) { |
1945 | 5 | MergeBlocks(BBI, TailBBI); |
1946 | 5 | TailBBI.IsDone = true; |
1947 | 99 | } else { |
1948 | 94 | BBI.BB->addSuccessor(TailBB, BranchProbability::getOne()); |
1949 | 94 | InsertUncondBranch(*BBI.BB, *TailBB, TII); |
1950 | 94 | BBI.HasFallThrough = false; |
1951 | 94 | } |
1952 | 99 | } |
1953 | 168 | |
1954 | 168 | // Update block info. |
1955 | 168 | BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; |
1956 | 168 | InvalidatePreds(*BBI.BB); |
1957 | 168 | |
1958 | 168 | // FIXME: Must maintain LiveIns. |
1959 | 168 | return true; |
1960 | 168 | } |
1961 | | |
1962 | | static bool MaySpeculate(const MachineInstr &MI, |
1963 | 164 | SmallSet<unsigned, 4> &LaterRedefs) { |
1964 | 164 | bool SawStore = true; |
1965 | 164 | if (!MI.isSafeToMove(nullptr, SawStore)) |
1966 | 45 | return false; |
1967 | 119 | |
1968 | 119 | for (const MachineOperand &MO : MI.operands()) 119 { |
1969 | 148 | if (!MO.isReg()) |
1970 | 14 | continue; |
1971 | 134 | unsigned Reg = MO.getReg(); |
1972 | 134 | if (!Reg) |
1973 | 14 | continue; |
1974 | 120 | if (120 MO.isDef() && 120 !LaterRedefs.count(Reg)119 ) |
1975 | 112 | return false; |
1976 | 7 | } |
1977 | 7 | |
1978 | 7 | return true; |
1979 | 7 | } |
1980 | | |
1981 | | /// Predicate instructions from the start of the block to the specified end with |
1982 | | /// the specified condition. |
1983 | | void IfConverter::PredicateBlock(BBInfo &BBI, |
1984 | | MachineBasicBlock::iterator E, |
1985 | | SmallVectorImpl<MachineOperand> &Cond, |
1986 | 2.59k | SmallSet<unsigned, 4> *LaterRedefs) { |
1987 | 2.59k | bool AnyUnpred = false; |
1988 | 2.59k | bool MaySpec = LaterRedefs != nullptr; |
1989 | 5.31k | for (MachineInstr &I : make_range(BBI.BB->begin(), E)) { |
1990 | 5.31k | if (I.isDebugValue() || 5.31k TII->isPredicated(I)5.31k ) |
1991 | 5 | continue; |
1992 | 5.30k | // It may be possible not to predicate an instruction if it's the 'true' |
1993 | 5.30k | // side of a diamond and the 'false' side may re-define the instruction's |
1994 | 5.30k | // defs. |
1995 | 5.30k | if (5.30k MaySpec && 5.30k MaySpeculate(I, *LaterRedefs)164 ) { |
1996 | 7 | AnyUnpred = true; |
1997 | 7 | continue; |
1998 | 7 | } |
1999 | 5.29k | // If any instruction is predicated, then every instruction after it must |
2000 | 5.29k | // be predicated. |
2001 | 5.29k | MaySpec = false; |
2002 | 5.29k | if (!TII->PredicateInstruction(I, Cond)5.29k ) { |
2003 | | #ifndef NDEBUG |
2004 | | dbgs() << "Unable to predicate " << I << "!\n"; |
2005 | | #endif |
2006 | 0 | llvm_unreachable(nullptr); |
2007 | 0 | } |
2008 | 5.29k | |
2009 | 5.29k | // If the predicated instruction now redefines a register as the result of |
2010 | 5.29k | // if-conversion, add an implicit kill. |
2011 | 5.29k | UpdatePredRedefs(I, Redefs); |
2012 | 5.29k | } |
2013 | 2.59k | |
2014 | 2.59k | BBI.Predicate.append(Cond.begin(), Cond.end()); |
2015 | 2.59k | |
2016 | 2.59k | BBI.IsAnalyzed = false; |
2017 | 2.59k | BBI.NonPredSize = 0; |
2018 | 2.59k | |
2019 | 2.59k | ++NumIfConvBBs; |
2020 | 2.59k | if (AnyUnpred) |
2021 | 7 | ++NumUnpred; |
2022 | 2.59k | } |
2023 | | |
2024 | | /// Copy and predicate instructions from source BB to the destination block. |
2025 | | /// Skip end of block branches if IgnoreBr is true. |
2026 | | void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, |
2027 | | SmallVectorImpl<MachineOperand> &Cond, |
2028 | 1.29k | bool IgnoreBr) { |
2029 | 1.29k | MachineFunction &MF = *ToBBI.BB->getParent(); |
2030 | 1.29k | |
2031 | 1.29k | MachineBasicBlock &FromMBB = *FromBBI.BB; |
2032 | 1.31k | for (MachineInstr &I : FromMBB) { |
2033 | 1.31k | // Do not copy the end of the block branches. |
2034 | 1.31k | if (IgnoreBr && 1.31k I.isBranch()27 ) |
2035 | 0 | break; |
2036 | 1.31k | |
2037 | 1.31k | MachineInstr *MI = MF.CloneMachineInstr(&I); |
2038 | 1.31k | ToBBI.BB->insert(ToBBI.BB->end(), MI); |
2039 | 1.31k | ToBBI.NonPredSize++; |
2040 | 1.31k | unsigned ExtraPredCost = TII->getPredicationCost(I); |
2041 | 1.31k | unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); |
2042 | 1.31k | if (NumCycles > 1) |
2043 | 76 | ToBBI.ExtraCost += NumCycles-1; |
2044 | 1.31k | ToBBI.ExtraCost2 += ExtraPredCost; |
2045 | 1.31k | |
2046 | 1.31k | if (!TII->isPredicated(I) && 1.31k !MI->isDebugValue()1.31k ) { |
2047 | 1.31k | if (!TII->PredicateInstruction(*MI, Cond)1.31k ) { |
2048 | | #ifndef NDEBUG |
2049 | | dbgs() << "Unable to predicate " << I << "!\n"; |
2050 | | #endif |
2051 | 0 | llvm_unreachable(nullptr); |
2052 | 0 | } |
2053 | 1.31k | } |
2054 | 1.31k | |
2055 | 1.31k | // If the predicated instruction now redefines a register as the result of |
2056 | 1.31k | // if-conversion, add an implicit kill. |
2057 | 1.31k | UpdatePredRedefs(*MI, Redefs); |
2058 | 1.31k | } |
2059 | 1.29k | |
2060 | 1.29k | if (1.29k !IgnoreBr1.29k ) { |
2061 | 1.26k | std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(), |
2062 | 1.26k | FromMBB.succ_end()); |
2063 | 1.26k | MachineBasicBlock *NBB = getNextBlock(FromMBB); |
2064 | 1.26k | MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB0 : nullptr1.26k ; |
2065 | 1.26k | |
2066 | 0 | for (MachineBasicBlock *Succ : Succs) { |
2067 | 0 | // Fallthrough edge can't be transferred. |
2068 | 0 | if (Succ == FallThrough) |
2069 | 0 | continue; |
2070 | 0 | ToBBI.BB->addSuccessor(Succ); |
2071 | 0 | } |
2072 | 1.26k | } |
2073 | 1.29k | |
2074 | 1.29k | ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); |
2075 | 1.29k | ToBBI.Predicate.append(Cond.begin(), Cond.end()); |
2076 | 1.29k | |
2077 | 1.29k | ToBBI.ClobbersPred |= FromBBI.ClobbersPred; |
2078 | 1.29k | ToBBI.IsAnalyzed = false; |
2079 | 1.29k | |
2080 | 1.29k | ++NumDupBBs; |
2081 | 1.29k | } |
2082 | | |
2083 | | /// Move all instructions from FromBB to the end of ToBB. This will leave |
2084 | | /// FromBB as an empty block, so remove all of its successor edges except for |
2085 | | /// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is |
2086 | | /// being moved, add those successor edges to ToBBI and remove the old edge |
2087 | | /// from ToBBI to FromBBI. |
2088 | 2.59k | void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { |
2089 | 2.59k | MachineBasicBlock &FromMBB = *FromBBI.BB; |
2090 | 2.59k | assert(!FromMBB.hasAddressTaken() && |
2091 | 2.59k | "Removing a BB whose address is taken!"); |
2092 | 2.59k | |
2093 | 2.59k | // In case FromMBB contains terminators (e.g. return instruction), |
2094 | 2.59k | // first move the non-terminator instructions, then the terminators. |
2095 | 2.59k | MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator(); |
2096 | 2.59k | MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator(); |
2097 | 2.59k | ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI); |
2098 | 2.59k | |
2099 | 2.59k | // If FromBB has non-predicated terminator we should copy it at the end. |
2100 | 2.59k | if (FromTI != FromMBB.end() && 2.59k !TII->isPredicated(*FromTI)1.09k ) |
2101 | 164 | ToTI = ToBBI.BB->end(); |
2102 | 2.59k | ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end()); |
2103 | 2.59k | |
2104 | 2.59k | // Force normalizing the successors' probabilities of ToBBI.BB to convert all |
2105 | 2.59k | // unknown probabilities into known ones. |
2106 | 2.59k | // FIXME: This usage is too tricky and in the future we would like to |
2107 | 2.59k | // eliminate all unknown probabilities in MBB. |
2108 | 2.59k | if (ToBBI.IsBrAnalyzable) |
2109 | 2.59k | ToBBI.BB->normalizeSuccProbs(); |
2110 | 2.59k | |
2111 | 2.59k | SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(), |
2112 | 2.59k | FromMBB.succ_end()); |
2113 | 2.59k | MachineBasicBlock *NBB = getNextBlock(FromMBB); |
2114 | 2.59k | MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB1.43k : nullptr1.16k ; |
2115 | 2.59k | // The edge probability from ToBBI.BB to FromMBB, which is only needed when |
2116 | 2.59k | // AddEdges is true and FromMBB is a successor of ToBBI.BB. |
2117 | 2.59k | auto To2FromProb = BranchProbability::getZero(); |
2118 | 2.59k | if (AddEdges && 2.59k ToBBI.BB->isSuccessor(&FromMBB)1.16k ) { |
2119 | 1.15k | // Remove the old edge but remember the edge probability so we can calculate |
2120 | 1.15k | // the correct weights on the new edges being added further down. |
2121 | 1.15k | To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB); |
2122 | 1.15k | ToBBI.BB->removeSuccessor(&FromMBB); |
2123 | 1.15k | } |
2124 | 2.59k | |
2125 | 1.97k | for (MachineBasicBlock *Succ : FromSuccs) { |
2126 | 1.97k | // Fallthrough edge can't be transferred. |
2127 | 1.97k | if (Succ == FallThrough) |
2128 | 1.33k | continue; |
2129 | 637 | |
2130 | 637 | auto NewProb = BranchProbability::getZero(); |
2131 | 637 | if (AddEdges637 ) { |
2132 | 38 | // Calculate the edge probability for the edge from ToBBI.BB to Succ, |
2133 | 38 | // which is a portion of the edge probability from FromMBB to Succ. The |
2134 | 38 | // portion ratio is the edge probability from ToBBI.BB to FromMBB (if |
2135 | 38 | // FromBBI is a successor of ToBBI.BB. See comment below for excepion). |
2136 | 38 | NewProb = MBPI->getEdgeProbability(&FromMBB, Succ); |
2137 | 38 | |
2138 | 38 | // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This |
2139 | 38 | // only happens when if-converting a diamond CFG and FromMBB is the |
2140 | 38 | // tail BB. In this case FromMBB post-dominates ToBBI.BB and hence we |
2141 | 38 | // could just use the probabilities on FromMBB's out-edges when adding |
2142 | 38 | // new successors. |
2143 | 38 | if (!To2FromProb.isZero()) |
2144 | 33 | NewProb *= To2FromProb; |
2145 | 38 | } |
2146 | 637 | |
2147 | 637 | FromMBB.removeSuccessor(Succ); |
2148 | 637 | |
2149 | 637 | if (AddEdges637 ) { |
2150 | 38 | // If the edge from ToBBI.BB to Succ already exists, update the |
2151 | 38 | // probability of this edge by adding NewProb to it. An example is shown |
2152 | 38 | // below, in which A is ToBBI.BB and B is FromMBB. In this case we |
2153 | 38 | // don't have to set C as A's successor as it already is. We only need to |
2154 | 38 | // update the edge probability on A->C. Note that B will not be |
2155 | 38 | // immediately removed from A's successors. It is possible that B->D is |
2156 | 38 | // not removed either if D is a fallthrough of B. Later the edge A->D |
2157 | 38 | // (generated here) and B->D will be combined into one edge. To maintain |
2158 | 38 | // correct edge probability of this combined edge, we need to set the edge |
2159 | 38 | // probability of A->B to zero, which is already done above. The edge |
2160 | 38 | // probability on A->D is calculated by scaling the original probability |
2161 | 38 | // on A->B by the probability of B->D. |
2162 | 38 | // |
2163 | 38 | // Before ifcvt: After ifcvt (assume B->D is kept): |
2164 | 38 | // |
2165 | 38 | // A A |
2166 | 38 | // /| /|\ |
2167 | 38 | // / B / B| |
2168 | 38 | // | /| | || |
2169 | 38 | // |/ | | |/ |
2170 | 38 | // C D C D |
2171 | 38 | // |
2172 | 38 | if (ToBBI.BB->isSuccessor(Succ)) |
2173 | 6 | ToBBI.BB->setSuccProbability( |
2174 | 6 | find(ToBBI.BB->successors(), Succ), |
2175 | 6 | MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb); |
2176 | 38 | else |
2177 | 32 | ToBBI.BB->addSuccessor(Succ, NewProb); |
2178 | 38 | } |
2179 | 1.97k | } |
2180 | 2.59k | |
2181 | 2.59k | // Move the now empty FromMBB out of the way to the end of the function so |
2182 | 2.59k | // it doesn't interfere with fallthrough checks done by canFallThroughTo(). |
2183 | 2.59k | MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin(); |
2184 | 2.59k | if (Last != &FromMBB) |
2185 | 2.18k | FromMBB.moveAfter(Last); |
2186 | 2.59k | |
2187 | 2.59k | // Normalize the probabilities of ToBBI.BB's successors with all adjustment |
2188 | 2.59k | // we've done above. |
2189 | 2.59k | if (ToBBI.IsBrAnalyzable && 2.59k FromBBI.IsBrAnalyzable2.59k ) |
2190 | 1.44k | ToBBI.BB->normalizeSuccProbs(); |
2191 | 2.59k | |
2192 | 2.59k | ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); |
2193 | 2.59k | FromBBI.Predicate.clear(); |
2194 | 2.59k | |
2195 | 2.59k | ToBBI.NonPredSize += FromBBI.NonPredSize; |
2196 | 2.59k | ToBBI.ExtraCost += FromBBI.ExtraCost; |
2197 | 2.59k | ToBBI.ExtraCost2 += FromBBI.ExtraCost2; |
2198 | 2.59k | FromBBI.NonPredSize = 0; |
2199 | 2.59k | FromBBI.ExtraCost = 0; |
2200 | 2.59k | FromBBI.ExtraCost2 = 0; |
2201 | 2.59k | |
2202 | 2.59k | ToBBI.ClobbersPred |= FromBBI.ClobbersPred; |
2203 | 2.59k | ToBBI.HasFallThrough = FromBBI.HasFallThrough; |
2204 | 2.59k | ToBBI.IsAnalyzed = false; |
2205 | 2.59k | FromBBI.IsAnalyzed = false; |
2206 | 2.59k | } |
2207 | | |
2208 | | FunctionPass * |
2209 | 4.12k | llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) { |
2210 | 4.12k | return new IfConverter(std::move(Ftor)); |
2211 | 4.12k | } |