/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/BranchFolding.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- BranchFolding.cpp - Fold machine code branch instructions ----------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass forwards branches to unconditional branches to make them branch |
11 | | // directly to the target block. This pass often results in dead MBB's, which |
12 | | // it then removes. |
13 | | // |
14 | | // Note that this pass must be run after register allocation, it cannot handle |
15 | | // SSA form. It also must handle virtual registers for targets that emit virtual |
16 | | // ISA (e.g. NVPTX). |
17 | | // |
18 | | //===----------------------------------------------------------------------===// |
19 | | |
20 | | #include "BranchFolding.h" |
21 | | #include "llvm/ADT/BitVector.h" |
22 | | #include "llvm/ADT/SmallPtrSet.h" |
23 | | #include "llvm/ADT/SmallSet.h" |
24 | | #include "llvm/ADT/SmallVector.h" |
25 | | #include "llvm/ADT/Statistic.h" |
26 | | #include "llvm/ADT/STLExtras.h" |
27 | | #include "llvm/CodeGen/Analysis.h" |
28 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
29 | | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
30 | | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
31 | | #include "llvm/CodeGen/MachineFunction.h" |
32 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
33 | | #include "llvm/CodeGen/MachineInstr.h" |
34 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
36 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
37 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
38 | | #include "llvm/CodeGen/MachineOperand.h" |
39 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
40 | | #include "llvm/CodeGen/TargetPassConfig.h" |
41 | | #include "llvm/IR/DebugInfoMetadata.h" |
42 | | #include "llvm/IR/DebugLoc.h" |
43 | | #include "llvm/IR/Function.h" |
44 | | #include "llvm/MC/MCRegisterInfo.h" |
45 | | #include "llvm/Pass.h" |
46 | | #include "llvm/Support/BlockFrequency.h" |
47 | | #include "llvm/Support/BranchProbability.h" |
48 | | #include "llvm/Support/CommandLine.h" |
49 | | #include "llvm/Support/Debug.h" |
50 | | #include "llvm/Support/ErrorHandling.h" |
51 | | #include "llvm/Support/raw_ostream.h" |
52 | | #include "llvm/Target/TargetInstrInfo.h" |
53 | | #include "llvm/Target/TargetMachine.h" |
54 | | #include "llvm/Target/TargetRegisterInfo.h" |
55 | | #include "llvm/Target/TargetSubtargetInfo.h" |
56 | | #include <cassert> |
57 | | #include <cstddef> |
58 | | #include <iterator> |
59 | | #include <numeric> |
60 | | #include <vector> |
61 | | |
62 | | using namespace llvm; |
63 | | |
64 | | #define DEBUG_TYPE "branch-folder" |
65 | | |
66 | | STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); |
67 | | STATISTIC(NumBranchOpts, "Number of branches optimized"); |
68 | | STATISTIC(NumTailMerge , "Number of block tails merged"); |
69 | | STATISTIC(NumHoist , "Number of times common instructions are hoisted"); |
70 | | STATISTIC(NumTailCalls, "Number of tail calls optimized"); |
71 | | |
72 | | static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", |
73 | | cl::init(cl::BOU_UNSET), cl::Hidden); |
74 | | |
75 | | // Throttle for huge numbers of predecessors (compile speed problems) |
76 | | static cl::opt<unsigned> |
77 | | TailMergeThreshold("tail-merge-threshold", |
78 | | cl::desc("Max number of predecessors to consider tail merging"), |
79 | | cl::init(150), cl::Hidden); |
80 | | |
81 | | // Heuristic for tail merging (and, inversely, tail duplication). |
82 | | // TODO: This should be replaced with a target query. |
83 | | static cl::opt<unsigned> |
84 | | TailMergeSize("tail-merge-size", |
85 | | cl::desc("Min number of instructions to consider tail merging"), |
86 | | cl::init(3), cl::Hidden); |
87 | | |
88 | | namespace { |
89 | | |
90 | | /// BranchFolderPass - Wrap branch folder in a machine function pass. |
91 | | class BranchFolderPass : public MachineFunctionPass { |
92 | | public: |
93 | | static char ID; |
94 | | |
95 | 32.0k | explicit BranchFolderPass(): MachineFunctionPass(ID) {} |
96 | | |
97 | | bool runOnMachineFunction(MachineFunction &MF) override; |
98 | | |
99 | 32.0k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
100 | 32.0k | AU.addRequired<MachineBlockFrequencyInfo>(); |
101 | 32.0k | AU.addRequired<MachineBranchProbabilityInfo>(); |
102 | 32.0k | AU.addRequired<TargetPassConfig>(); |
103 | 32.0k | MachineFunctionPass::getAnalysisUsage(AU); |
104 | 32.0k | } |
105 | | }; |
106 | | |
107 | | } // end anonymous namespace |
108 | | |
109 | | char BranchFolderPass::ID = 0; |
110 | | char &llvm::BranchFolderPassID = BranchFolderPass::ID; |
111 | | |
112 | | INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, |
113 | | "Control Flow Optimizer", false, false) |
114 | | |
115 | 588k | bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { |
116 | 588k | if (skipFunction(*MF.getFunction())) |
117 | 44 | return false; |
118 | 588k | |
119 | 588k | TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); |
120 | 588k | // TailMerge can create jump into if branches that make CFG irreducible for |
121 | 588k | // HW that requires structurized CFG. |
122 | 588k | bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && |
123 | 586k | PassConfig->getEnableTailMerge(); |
124 | 588k | BranchFolder::MBFIWrapper MBBFreqInfo( |
125 | 588k | getAnalysis<MachineBlockFrequencyInfo>()); |
126 | 588k | BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, |
127 | 588k | getAnalysis<MachineBranchProbabilityInfo>()); |
128 | 588k | return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), |
129 | 588k | MF.getSubtarget().getRegisterInfo(), |
130 | 588k | getAnalysisIfAvailable<MachineModuleInfo>()); |
131 | 588k | } |
132 | | |
133 | | BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, |
134 | | MBFIWrapper &FreqInfo, |
135 | | const MachineBranchProbabilityInfo &ProbInfo, |
136 | | unsigned MinTailLength) |
137 | | : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength), |
138 | 795k | MBBFreqInfo(FreqInfo), MBPI(ProbInfo) { |
139 | 795k | if (MinCommonTailLength == 0) |
140 | 622k | MinCommonTailLength = TailMergeSize; |
141 | 795k | switch (FlagEnableTailMerge) { |
142 | 795k | case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; |
143 | 3 | case cl::BOU_TRUE: EnableTailMerge = true; break; |
144 | 51 | case cl::BOU_FALSE: EnableTailMerge = false; break; |
145 | 795k | } |
146 | 795k | } |
147 | | |
148 | 352k | void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { |
149 | 352k | assert(MBB->pred_empty() && "MBB must be dead!"); |
150 | 352k | DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); |
151 | 352k | |
152 | 352k | MachineFunction *MF = MBB->getParent(); |
153 | 352k | // drop all successors. |
154 | 688k | while (!MBB->succ_empty()) |
155 | 335k | MBB->removeSuccessor(MBB->succ_end()-1); |
156 | 352k | |
157 | 352k | // Avoid matching if this pointer gets reused. |
158 | 352k | TriedMerging.erase(MBB); |
159 | 352k | |
160 | 352k | // Remove the block. |
161 | 352k | MF->erase(MBB); |
162 | 352k | FuncletMembership.erase(MBB); |
163 | 352k | if (MLI) |
164 | 16.3k | MLI->removeBlock(MBB); |
165 | 352k | } |
166 | | |
167 | | bool BranchFolder::OptimizeFunction(MachineFunction &MF, |
168 | | const TargetInstrInfo *tii, |
169 | | const TargetRegisterInfo *tri, |
170 | | MachineModuleInfo *mmi, |
171 | 795k | MachineLoopInfo *mli, bool AfterPlacement) { |
172 | 795k | if (!tii795k ) return false0 ; |
173 | 795k | |
174 | 795k | TriedMerging.clear(); |
175 | 795k | |
176 | 795k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
177 | 795k | AfterBlockPlacement = AfterPlacement; |
178 | 795k | TII = tii; |
179 | 795k | TRI = tri; |
180 | 795k | MMI = mmi; |
181 | 795k | MLI = mli; |
182 | 795k | this->MRI = &MRI; |
183 | 795k | |
184 | 793k | UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); |
185 | 795k | if (!UpdateLiveIns) |
186 | 6.07k | MRI.invalidateLiveness(); |
187 | 795k | |
188 | 795k | // Fix CFG. The later algorithms expect it to be right. |
189 | 795k | bool MadeChange = false; |
190 | 7.74M | for (MachineBasicBlock &MBB : MF) { |
191 | 7.74M | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; |
192 | 7.74M | SmallVector<MachineOperand, 4> Cond; |
193 | 7.74M | if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true)) |
194 | 6.46M | MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); |
195 | 7.74M | } |
196 | 795k | |
197 | 795k | // Recalculate funclet membership. |
198 | 795k | FuncletMembership = getFuncletMembership(MF); |
199 | 795k | |
200 | 795k | bool MadeChangeThisIteration = true; |
201 | 1.81M | while (MadeChangeThisIteration1.81M ) { |
202 | 1.02M | MadeChangeThisIteration = TailMergeBlocks(MF); |
203 | 1.02M | // No need to clean up if tail merging does not change anything after the |
204 | 1.02M | // block placement. |
205 | 1.02M | if (!AfterBlockPlacement || 1.02M MadeChangeThisIteration184k ) |
206 | 847k | MadeChangeThisIteration |= OptimizeBranches(MF); |
207 | 1.02M | if (EnableHoistCommonCode) |
208 | 798k | MadeChangeThisIteration |= HoistCommonCode(MF); |
209 | 1.02M | MadeChange |= MadeChangeThisIteration; |
210 | 1.02M | } |
211 | 795k | |
212 | 795k | // See if any jump tables have become dead as the code generator |
213 | 795k | // did its thing. |
214 | 795k | MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); |
215 | 795k | if (!JTI) |
216 | 785k | return MadeChange; |
217 | 9.78k | |
218 | 9.78k | // Walk the function to find jump tables that are live. |
219 | 9.78k | BitVector JTIsLive(JTI->getJumpTables().size()); |
220 | 1.14M | for (const MachineBasicBlock &BB : MF) { |
221 | 1.14M | for (const MachineInstr &I : BB) |
222 | 5.02M | for (const MachineOperand &Op : I.operands()) 5.02M { |
223 | 17.6M | if (!Op.isJTI()17.6M ) continue17.6M ; |
224 | 20.9k | |
225 | 20.9k | // Remember that this JT is live. |
226 | 20.9k | JTIsLive.set(Op.getIndex()); |
227 | 20.9k | } |
228 | 1.14M | } |
229 | 9.78k | |
230 | 9.78k | // Finally, remove dead jump tables. This happens when the |
231 | 9.78k | // indirect jump was unreachable (and thus deleted). |
232 | 20.4k | for (unsigned i = 0, e = JTIsLive.size(); i != e20.4k ; ++i10.6k ) |
233 | 10.6k | if (10.6k !JTIsLive.test(i)10.6k ) { |
234 | 0 | JTI->RemoveJumpTable(i); |
235 | 0 | MadeChange = true; |
236 | 0 | } |
237 | 795k | |
238 | 795k | return MadeChange; |
239 | 795k | } |
240 | | |
241 | | //===----------------------------------------------------------------------===// |
242 | | // Tail Merging of Blocks |
243 | | //===----------------------------------------------------------------------===// |
244 | | |
245 | | /// HashMachineInstr - Compute a hash value for MI and its operands. |
246 | 10.2M | static unsigned HashMachineInstr(const MachineInstr &MI) { |
247 | 10.2M | unsigned Hash = MI.getOpcode(); |
248 | 42.7M | for (unsigned i = 0, e = MI.getNumOperands(); i != e42.7M ; ++i32.5M ) { |
249 | 32.5M | const MachineOperand &Op = MI.getOperand(i); |
250 | 32.5M | |
251 | 32.5M | // Merge in bits from the operand if easy. We can't use MachineOperand's |
252 | 32.5M | // hash_code here because it's not deterministic and we sort by hash value |
253 | 32.5M | // later. |
254 | 32.5M | unsigned OperandHash = 0; |
255 | 32.5M | switch (Op.getType()) { |
256 | 18.3M | case MachineOperand::MO_Register: |
257 | 18.3M | OperandHash = Op.getReg(); |
258 | 18.3M | break; |
259 | 6.42M | case MachineOperand::MO_Immediate: |
260 | 6.42M | OperandHash = Op.getImm(); |
261 | 6.42M | break; |
262 | 4.86M | case MachineOperand::MO_MachineBasicBlock: |
263 | 4.86M | OperandHash = Op.getMBB()->getNumber(); |
264 | 4.86M | break; |
265 | 15.4k | case MachineOperand::MO_FrameIndex: |
266 | 15.4k | case MachineOperand::MO_ConstantPoolIndex: |
267 | 15.4k | case MachineOperand::MO_JumpTableIndex: |
268 | 15.4k | OperandHash = Op.getIndex(); |
269 | 15.4k | break; |
270 | 1.64M | case MachineOperand::MO_GlobalAddress: |
271 | 1.64M | case MachineOperand::MO_ExternalSymbol: |
272 | 1.64M | // Global address / external symbol are too hard, don't bother, but do |
273 | 1.64M | // pull in the offset. |
274 | 1.64M | OperandHash = Op.getOffset(); |
275 | 1.64M | break; |
276 | 1.22M | default: |
277 | 1.22M | break; |
278 | 32.5M | } |
279 | 32.5M | |
280 | 32.5M | Hash += ((OperandHash << 3) | Op.getType()) << (i & 31); |
281 | 32.5M | } |
282 | 10.2M | return Hash; |
283 | 10.2M | } |
284 | | |
285 | | /// HashEndOfMBB - Hash the last instruction in the MBB. |
286 | 10.5M | static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { |
287 | 10.5M | MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); |
288 | 10.5M | if (I == MBB.end()) |
289 | 275k | return 0; |
290 | 10.2M | |
291 | 10.2M | return HashMachineInstr(*I); |
292 | 10.2M | } |
293 | | |
294 | | /// ComputeCommonTailLength - Given two machine basic blocks, compute the number |
295 | | /// of instructions they actually have in common together at their end. Return |
296 | | /// iterators for the first shared instruction in each block. |
297 | | static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, |
298 | | MachineBasicBlock *MBB2, |
299 | | MachineBasicBlock::iterator &I1, |
300 | 7.35M | MachineBasicBlock::iterator &I2) { |
301 | 7.35M | I1 = MBB1->end(); |
302 | 7.35M | I2 = MBB2->end(); |
303 | 7.35M | |
304 | 7.35M | unsigned TailLen = 0; |
305 | 26.4M | while (I1 != MBB1->begin() && 26.4M I2 != MBB2->begin()25.4M ) { |
306 | 25.3M | --I1; --I2; |
307 | 25.3M | // Skip debugging pseudos; necessary to avoid changing the code. |
308 | 25.3M | while (I1->isDebugValue()25.3M ) { |
309 | 4 | if (I1==MBB1->begin()4 ) { |
310 | 0 | while (I2->isDebugValue()0 ) { |
311 | 0 | if (I2==MBB2->begin()) |
312 | 0 | // I1==DBG at begin; I2==DBG at begin |
313 | 0 | return TailLen; |
314 | 0 | --I2; |
315 | 0 | } |
316 | 0 | ++I2; |
317 | 0 | // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin |
318 | 0 | return TailLen; |
319 | 4 | } |
320 | 4 | --I1; |
321 | 4 | } |
322 | 25.3M | // I1==first (untested) non-DBG preceding known match |
323 | 25.3M | while (25.3M I2->isDebugValue()25.3M ) { |
324 | 5 | if (I2==MBB2->begin()5 ) { |
325 | 0 | ++I1; |
326 | 0 | // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin |
327 | 0 | return TailLen; |
328 | 0 | } |
329 | 5 | --I2; |
330 | 5 | } |
331 | 25.3M | // I1, I2==first (untested) non-DBGs preceding known match |
332 | 25.3M | if (25.3M !I1->isIdenticalTo(*I2) || |
333 | 25.3M | // FIXME: This check is dubious. It's used to get around a problem where |
334 | 25.3M | // people incorrectly expect inline asm directives to remain in the same |
335 | 25.3M | // relative order. This is untenable because normal compiler |
336 | 25.3M | // optimizations (like this one) may reorder and/or merge these |
337 | 25.3M | // directives. |
338 | 25.3M | I1->isInlineAsm()19.0M ) { |
339 | 6.22M | ++I1; ++I2; |
340 | 6.22M | break; |
341 | 6.22M | } |
342 | 19.0M | ++TailLen; |
343 | 19.0M | } |
344 | 7.35M | // Back past possible debugging pseudos at beginning of block. This matters |
345 | 7.35M | // when one block differs from the other only by whether debugging pseudos |
346 | 7.35M | // are present at the beginning. (This way, the various checks later for |
347 | 7.35M | // I1==MBB1->begin() work as expected.) |
348 | 7.35M | if (7.35M I1 == MBB1->begin() && 7.35M I2 != MBB2->begin()1.02M ) { |
349 | 217k | --I2; |
350 | 217k | while (I2->isDebugValue()217k ) { |
351 | 0 | if (I2 == MBB2->begin()) |
352 | 0 | return TailLen; |
353 | 0 | --I2; |
354 | 0 | } |
355 | 217k | ++I2; |
356 | 217k | } |
357 | 7.35M | if (7.35M I2 == MBB2->begin() && 7.35M I1 != MBB1->begin()909k ) { |
358 | 106k | --I1; |
359 | 106k | while (I1->isDebugValue()106k ) { |
360 | 0 | if (I1 == MBB1->begin()) |
361 | 0 | return TailLen; |
362 | 0 | --I1; |
363 | 0 | } |
364 | 106k | ++I1; |
365 | 106k | } |
366 | 7.35M | return TailLen; |
367 | 7.35M | } |
368 | | |
369 | | void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, |
370 | 222k | MachineBasicBlock &NewDest) { |
371 | 222k | if (UpdateLiveIns222k ) { |
372 | 222k | // OldInst should always point to an instruction. |
373 | 222k | MachineBasicBlock &OldMBB = *OldInst->getParent(); |
374 | 222k | LiveRegs.clear(); |
375 | 222k | LiveRegs.addLiveOuts(OldMBB); |
376 | 222k | // Move backward to the place where will insert the jump. |
377 | 222k | MachineBasicBlock::iterator I = OldMBB.end(); |
378 | 700k | do { |
379 | 700k | --I; |
380 | 700k | LiveRegs.stepBackward(*I); |
381 | 700k | } while (I != OldInst); |
382 | 222k | |
383 | 222k | // Merging the tails may have switched some undef operand to non-undef ones. |
384 | 222k | // Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the |
385 | 222k | // register. |
386 | 957k | for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) { |
387 | 957k | // We computed the liveins with computeLiveIn earlier and should only see |
388 | 957k | // full registers: |
389 | 957k | assert(P.LaneMask == LaneBitmask::getAll() && |
390 | 957k | "Can only handle full register."); |
391 | 957k | MCPhysReg Reg = P.PhysReg; |
392 | 957k | if (!LiveRegs.available(*MRI, Reg)) |
393 | 957k | continue; |
394 | 1 | DebugLoc DL; |
395 | 1 | BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg); |
396 | 1 | } |
397 | 222k | } |
398 | 222k | |
399 | 222k | TII->ReplaceTailWithBranchTo(OldInst, &NewDest); |
400 | 222k | ++NumTailMerge; |
401 | 222k | } |
402 | | |
403 | | MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, |
404 | | MachineBasicBlock::iterator BBI1, |
405 | 81.9k | const BasicBlock *BB) { |
406 | 81.9k | if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) |
407 | 0 | return nullptr; |
408 | 81.9k | |
409 | 81.9k | MachineFunction &MF = *CurMBB.getParent(); |
410 | 81.9k | |
411 | 81.9k | // Create the fall-through block. |
412 | 81.9k | MachineFunction::iterator MBBI = CurMBB.getIterator(); |
413 | 81.9k | MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB); |
414 | 81.9k | CurMBB.getParent()->insert(++MBBI, NewMBB); |
415 | 81.9k | |
416 | 81.9k | // Move all the successors of this block to the specified block. |
417 | 81.9k | NewMBB->transferSuccessors(&CurMBB); |
418 | 81.9k | |
419 | 81.9k | // Add an edge from CurMBB to NewMBB for the fall-through. |
420 | 81.9k | CurMBB.addSuccessor(NewMBB); |
421 | 81.9k | |
422 | 81.9k | // Splice the code over. |
423 | 81.9k | NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); |
424 | 81.9k | |
425 | 81.9k | // NewMBB belongs to the same loop as CurMBB. |
426 | 81.9k | if (MLI) |
427 | 4.23k | if (MachineLoop *4.23k ML4.23k = MLI->getLoopFor(&CurMBB)) |
428 | 350 | ML->addBasicBlockToLoop(NewMBB, MLI->getBase()); |
429 | 81.9k | |
430 | 81.9k | // NewMBB inherits CurMBB's block frequency. |
431 | 81.9k | MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); |
432 | 81.9k | |
433 | 81.9k | if (UpdateLiveIns) |
434 | 81.9k | computeAndAddLiveIns(LiveRegs, *NewMBB); |
435 | 81.9k | |
436 | 81.9k | // Add the new block to the funclet. |
437 | 81.9k | const auto &FuncletI = FuncletMembership.find(&CurMBB); |
438 | 81.9k | if (FuncletI != FuncletMembership.end()81.9k ) { |
439 | 0 | auto n = FuncletI->second; |
440 | 0 | FuncletMembership[NewMBB] = n; |
441 | 0 | } |
442 | 81.9k | |
443 | 81.9k | return NewMBB; |
444 | 81.9k | } |
445 | | |
446 | | /// EstimateRuntime - Make a rough estimate for how long it will take to run |
447 | | /// the specified code. |
448 | | static unsigned EstimateRuntime(MachineBasicBlock::iterator I, |
449 | 158k | MachineBasicBlock::iterator E) { |
450 | 158k | unsigned Time = 0; |
451 | 460k | for (; I != E460k ; ++I302k ) { |
452 | 302k | if (I->isDebugValue()) |
453 | 6 | continue; |
454 | 302k | if (302k I->isCall()302k ) |
455 | 19.9k | Time += 10; |
456 | 282k | else if (282k I->mayLoad() || 282k I->mayStore()222k ) |
457 | 90.6k | Time += 2; |
458 | 282k | else |
459 | 191k | ++Time; |
460 | 302k | } |
461 | 158k | return Time; |
462 | 158k | } |
463 | | |
464 | | // CurMBB needs to add an unconditional branch to SuccMBB (we removed these |
465 | | // branches temporarily for tail merging). In the case where CurMBB ends |
466 | | // with a conditional branch to the next block, optimize by reversing the |
467 | | // test and conditionally branching to SuccMBB instead. |
468 | | static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, |
469 | 5.83M | const TargetInstrInfo *TII) { |
470 | 5.83M | MachineFunction *MF = CurMBB->getParent(); |
471 | 5.83M | MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); |
472 | 5.83M | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; |
473 | 5.83M | SmallVector<MachineOperand, 4> Cond; |
474 | 5.83M | DebugLoc dl = CurMBB->findBranchDebugLoc(); |
475 | 5.83M | if (I != MF->end() && 5.83M !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)5.73M ) { |
476 | 5.73M | MachineBasicBlock *NextBB = &*I; |
477 | 5.73M | if (TBB == NextBB && 5.73M !Cond.empty()3.34M && !FBB3.34M ) { |
478 | 3.34M | if (!TII->reverseBranchCondition(Cond)3.34M ) { |
479 | 3.34M | TII->removeBranch(*CurMBB); |
480 | 3.34M | TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); |
481 | 3.34M | return; |
482 | 3.34M | } |
483 | 2.48M | } |
484 | 5.73M | } |
485 | 2.48M | TII->insertBranch(*CurMBB, SuccBB, nullptr, |
486 | 2.48M | SmallVector<MachineOperand, 0>(), dl); |
487 | 2.48M | } |
488 | | |
489 | | bool |
490 | 17.2M | BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { |
491 | 17.2M | if (getHash() < o.getHash()) |
492 | 8.67M | return true; |
493 | 8.57M | if (8.57M getHash() > o.getHash()8.57M ) |
494 | 4.21M | return false; |
495 | 4.36M | if (4.36M getBlock()->getNumber() < o.getBlock()->getNumber()4.36M ) |
496 | 3.07M | return true; |
497 | 1.28M | if (1.28M getBlock()->getNumber() > o.getBlock()->getNumber()1.28M ) |
498 | 1.28M | return false; |
499 | 0 | // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing |
500 | 0 | // an object with itself. |
501 | 0 | #ifndef _GLIBCXX_DEBUG |
502 | 0 | llvm_unreachable0 ("Predecessor appears twice"); |
503 | | #else |
504 | | return false; |
505 | | #endif |
506 | | } |
507 | | |
508 | | BlockFrequency |
509 | 11.6M | BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { |
510 | 11.6M | auto I = MergedBBFreq.find(MBB); |
511 | 11.6M | |
512 | 11.6M | if (I != MergedBBFreq.end()) |
513 | 136k | return I->second; |
514 | 11.4M | |
515 | 11.4M | return MBFI.getBlockFreq(MBB); |
516 | 11.4M | } |
517 | | |
518 | | void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, |
519 | 213k | BlockFrequency F) { |
520 | 213k | MergedBBFreq[MBB] = F; |
521 | 213k | } |
522 | | |
523 | | raw_ostream & |
524 | | BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, |
525 | 0 | const MachineBasicBlock *MBB) const { |
526 | 0 | return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); |
527 | 0 | } |
528 | | |
529 | | raw_ostream & |
530 | | BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, |
531 | 0 | const BlockFrequency Freq) const { |
532 | 0 | return MBFI.printBlockFreq(OS, Freq); |
533 | 0 | } |
534 | | |
535 | 0 | void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) { |
536 | 0 | MBFI.view(Name, isSimple); |
537 | 0 | } |
538 | | |
539 | | uint64_t |
540 | 184k | BranchFolder::MBFIWrapper::getEntryFreq() const { |
541 | 184k | return MBFI.getEntryFreq(); |
542 | 184k | } |
543 | | |
544 | | /// CountTerminators - Count the number of terminators in the given |
545 | | /// block and set I to the position of the first non-terminator, if there |
546 | | /// is one, or MBB->end() otherwise. |
547 | | static unsigned CountTerminators(MachineBasicBlock *MBB, |
548 | 227k | MachineBasicBlock::iterator &I) { |
549 | 227k | I = MBB->end(); |
550 | 227k | unsigned NumTerms = 0; |
551 | 266k | while (true266k ) { |
552 | 266k | if (I == MBB->begin()266k ) { |
553 | 793 | I = MBB->end(); |
554 | 793 | break; |
555 | 793 | } |
556 | 266k | --I; |
557 | 266k | if (!I->isTerminator()266k ) break226k ; |
558 | 39.8k | ++NumTerms; |
559 | 39.8k | } |
560 | 227k | return NumTerms; |
561 | 227k | } |
562 | | |
563 | | /// A no successor, non-return block probably ends in unreachable and is cold. |
564 | | /// Also consider a block that ends in an indirect branch to be a return block, |
565 | | /// since many targets use plain indirect branches to return. |
566 | 650k | static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) { |
567 | 650k | if (!MBB->succ_empty()) |
568 | 521k | return false; |
569 | 129k | if (129k MBB->empty()129k ) |
570 | 0 | return true; |
571 | 129k | return !(MBB->back().isReturn() || 129k MBB->back().isIndirectBranch()19.3k ); |
572 | 650k | } |
573 | | |
574 | | /// ProfitableToMerge - Check if two machine basic blocks have a common tail |
575 | | /// and decide if it would be profitable to merge those tails. Return the |
576 | | /// length of the common tail and iterators to the first common instruction |
577 | | /// in each block. |
578 | | /// MBB1, MBB2 The blocks to check |
579 | | /// MinCommonTailLength Minimum size of tail block to be merged. |
580 | | /// CommonTailLen Out parameter to record the size of the shared tail between |
581 | | /// MBB1 and MBB2 |
582 | | /// I1, I2 Iterator references that will be changed to point to the first |
583 | | /// instruction in the common tail shared by MBB1,MBB2 |
584 | | /// SuccBB A common successor of MBB1, MBB2 which are in a canonical form |
585 | | /// relative to SuccBB |
586 | | /// PredBB The layout predecessor of SuccBB, if any. |
587 | | /// FuncletMembership map from block to funclet #. |
588 | | /// AfterPlacement True if we are merging blocks after layout. Stricter |
589 | | /// thresholds apply to prevent undoing tail-duplication. |
590 | | static bool |
591 | | ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, |
592 | | unsigned MinCommonTailLength, unsigned &CommonTailLen, |
593 | | MachineBasicBlock::iterator &I1, |
594 | | MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, |
595 | | MachineBasicBlock *PredBB, |
596 | | DenseMap<const MachineBasicBlock *, int> &FuncletMembership, |
597 | 7.35M | bool AfterPlacement) { |
598 | 7.35M | // It is never profitable to tail-merge blocks from two different funclets. |
599 | 7.35M | if (!FuncletMembership.empty()7.35M ) { |
600 | 92 | auto Funclet1 = FuncletMembership.find(MBB1); |
601 | 92 | assert(Funclet1 != FuncletMembership.end()); |
602 | 92 | auto Funclet2 = FuncletMembership.find(MBB2); |
603 | 92 | assert(Funclet2 != FuncletMembership.end()); |
604 | 92 | if (Funclet1->second != Funclet2->second) |
605 | 64 | return false; |
606 | 7.35M | } |
607 | 7.35M | |
608 | 7.35M | CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); |
609 | 7.35M | if (CommonTailLen == 0) |
610 | 926k | return false; |
611 | 6.42M | DEBUG6.42M (dbgs() << "Common tail length of BB#" << MBB1->getNumber() |
612 | 6.42M | << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen |
613 | 6.42M | << '\n'); |
614 | 6.42M | |
615 | 6.42M | // It's almost always profitable to merge any number of non-terminator |
616 | 6.42M | // instructions with the block that falls through into the common successor. |
617 | 6.42M | // This is true only for a single successor. For multiple successors, we are |
618 | 6.42M | // trading a conditional branch for an unconditional one. |
619 | 6.42M | // TODO: Re-visit successor size for non-layout tail merging. |
620 | 6.42M | if ((MBB1 == PredBB || 6.42M MBB2 == PredBB6.07M ) && |
621 | 6.42M | (!AfterPlacement || 543k MBB1->succ_size() == 1332k )) { |
622 | 227k | MachineBasicBlock::iterator I; |
623 | 227k | unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2178k : MBB148.4k , I); |
624 | 227k | if (CommonTailLen > NumTerms) |
625 | 202k | return true; |
626 | 6.22M | } |
627 | 6.22M | |
628 | 6.22M | // If these are identical non-return blocks with no successors, merge them. |
629 | 6.22M | // Such blocks are typically cold calls to noreturn functions like abort, and |
630 | 6.22M | // are unlikely to become a fallthrough target after machine block placement. |
631 | 6.22M | // Tail merging these blocks is unlikely to create additional unconditional |
632 | 6.22M | // branches, and will reduce the size of this cold code. |
633 | 6.22M | if (6.22M I1 == MBB1->begin() && 6.22M I2 == MBB2->begin()846k && |
634 | 6.22M | blockEndsInUnreachable(MBB1)640k && blockEndsInUnreachable(MBB2)9.69k ) |
635 | 9.69k | return true; |
636 | 6.21M | |
637 | 6.21M | // If one of the blocks can be completely merged and happens to be in |
638 | 6.21M | // a position where the other could fall through into it, merge any number |
639 | 6.21M | // of instructions, because it can be done without a branch. |
640 | 6.21M | // TODO: If the blocks are not adjacent, move one of them so that they are? |
641 | 6.21M | if (6.21M MBB1->isLayoutSuccessor(MBB2) && 6.21M I2 == MBB2->begin()17.3k ) |
642 | 1.31k | return true; |
643 | 6.21M | if (6.21M MBB2->isLayoutSuccessor(MBB1) && 6.21M I1 == MBB1->begin()352k ) |
644 | 12.0k | return true; |
645 | 6.20M | |
646 | 6.20M | // If both blocks are identical and end in a branch, merge them unless they |
647 | 6.20M | // both have a fallthrough predecessor and successor. |
648 | 6.20M | // We can only do this after block placement because it depends on whether |
649 | 6.20M | // there are fallthroughs, and we don't know until after layout. |
650 | 6.20M | if (6.20M AfterPlacement && 6.20M I1 == MBB1->begin()3.34M && I2 == MBB2->begin()159k ) { |
651 | 65.9k | auto BothFallThrough = [](MachineBasicBlock *MBB) { |
652 | 65.9k | if (MBB->succ_size() != 0 && 65.9k !MBB->canFallThrough()65.9k ) |
653 | 65.0k | return false; |
654 | 886 | MachineFunction::iterator I(MBB); |
655 | 886 | MachineFunction *MF = MBB->getParent(); |
656 | 886 | return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough(); |
657 | 65.9k | }; |
658 | 65.4k | if (!BothFallThrough(MBB1) || 65.4k !BothFallThrough(MBB2)528 ) |
659 | 65.2k | return true; |
660 | 6.13M | } |
661 | 6.13M | |
662 | 6.13M | // If both blocks have an unconditional branch temporarily stripped out, |
663 | 6.13M | // count that as an additional common instruction for the following |
664 | 6.13M | // heuristics. This heuristic is only accurate for single-succ blocks, so to |
665 | 6.13M | // make sure that during layout merging and duplicating don't crash, we check |
666 | 6.13M | // for that when merging during layout. |
667 | 6.13M | unsigned EffectiveTailLen = CommonTailLen; |
668 | 6.13M | if (SuccBB && 6.13M MBB1 != PredBB4.61M && MBB2 != PredBB4.42M && |
669 | 4.27M | (MBB1->succ_size() == 1 || 4.27M !AfterPlacement2.91M ) && |
670 | 1.36M | !MBB1->back().isBarrier() && |
671 | 1.36M | !MBB2->back().isBarrier()) |
672 | 1.36M | ++EffectiveTailLen; |
673 | 6.13M | |
674 | 6.13M | // Check if the common tail is long enough to be worthwhile. |
675 | 6.13M | if (EffectiveTailLen >= MinCommonTailLength) |
676 | 829k | return true; |
677 | 5.30M | |
678 | 5.30M | // If we are optimizing for code size, 2 instructions in common is enough if |
679 | 5.30M | // we don't have to split a block. At worst we will be introducing 1 new |
680 | 5.30M | // branch instruction, which is likely to be smaller than the 2 |
681 | 5.30M | // instructions that would be deleted in the merge. |
682 | 5.30M | MachineFunction *MF = MBB1->getParent(); |
683 | 4.76M | return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() && |
684 | 2.07k | (I1 == MBB1->begin() || 2.07k I2 == MBB2->begin()1.61k ); |
685 | 7.35M | } |
686 | | |
687 | | unsigned BranchFolder::ComputeSameTails(unsigned CurHash, |
688 | | unsigned MinCommonTailLength, |
689 | | MachineBasicBlock *SuccBB, |
690 | 5.09M | MachineBasicBlock *PredBB) { |
691 | 5.09M | unsigned maxCommonTailLength = 0U; |
692 | 5.09M | SameTails.clear(); |
693 | 5.09M | MachineBasicBlock::iterator TrialBBI1, TrialBBI2; |
694 | 5.09M | MPIterator HighestMPIter = std::prev(MergePotentials.end()); |
695 | 5.09M | for (MPIterator CurMPIter = std::prev(MergePotentials.end()), |
696 | 5.09M | B = MergePotentials.begin(); |
697 | 11.1M | CurMPIter != B && 11.1M CurMPIter->getHash() == CurHash7.78M ; --CurMPIter6.06M ) { |
698 | 12.6M | for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash12.6M ; --I6.57M ) { |
699 | 7.35M | unsigned CommonTailLen; |
700 | 7.35M | if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), |
701 | 7.35M | MinCommonTailLength, |
702 | 7.35M | CommonTailLen, TrialBBI1, TrialBBI2, |
703 | 7.35M | SuccBB, PredBB, |
704 | 7.35M | FuncletMembership, |
705 | 7.35M | AfterBlockPlacement)) { |
706 | 1.12M | if (CommonTailLen > maxCommonTailLength1.12M ) { |
707 | 159k | SameTails.clear(); |
708 | 159k | maxCommonTailLength = CommonTailLen; |
709 | 159k | HighestMPIter = CurMPIter; |
710 | 159k | SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1)); |
711 | 159k | } |
712 | 1.12M | if (HighestMPIter == CurMPIter && |
713 | 413k | CommonTailLen == maxCommonTailLength) |
714 | 387k | SameTails.push_back(SameTailElt(I, TrialBBI2)); |
715 | 1.12M | } |
716 | 7.35M | if (I == B) |
717 | 783k | break; |
718 | 7.35M | } |
719 | 6.06M | } |
720 | 5.09M | return maxCommonTailLength; |
721 | 5.09M | } |
722 | | |
723 | | void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, |
724 | | MachineBasicBlock *SuccBB, |
725 | 4.96M | MachineBasicBlock *PredBB) { |
726 | 4.96M | MPIterator CurMPIter, B; |
727 | 4.96M | for (CurMPIter = std::prev(MergePotentials.end()), |
728 | 4.96M | B = MergePotentials.begin(); |
729 | 10.5M | CurMPIter->getHash() == CurHash10.5M ; --CurMPIter5.62M ) { |
730 | 5.92M | // Put the unconditional branch back, if we need one. |
731 | 5.92M | MachineBasicBlock *CurMBB = CurMPIter->getBlock(); |
732 | 5.92M | if (SuccBB && 5.92M CurMBB != PredBB5.54M ) |
733 | 3.41M | FixTail(CurMBB, SuccBB, TII); |
734 | 5.92M | if (CurMPIter == B) |
735 | 299k | break; |
736 | 5.92M | } |
737 | 4.96M | if (CurMPIter->getHash() != CurHash) |
738 | 4.66M | CurMPIter++; |
739 | 4.96M | MergePotentials.erase(CurMPIter, MergePotentials.end()); |
740 | 4.96M | } |
741 | | |
742 | | bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, |
743 | | MachineBasicBlock *SuccBB, |
744 | | unsigned maxCommonTailLength, |
745 | 81.9k | unsigned &commonTailIndex) { |
746 | 81.9k | commonTailIndex = 0; |
747 | 81.9k | unsigned TimeEstimate = ~0U; |
748 | 240k | for (unsigned i = 0, e = SameTails.size(); i != e240k ; ++i158k ) { |
749 | 200k | // Use PredBB if possible; that doesn't require a new branch. |
750 | 200k | if (SameTails[i].getBlock() == PredBB200k ) { |
751 | 42.2k | commonTailIndex = i; |
752 | 42.2k | break; |
753 | 42.2k | } |
754 | 158k | // Otherwise, make a (fairly bogus) choice based on estimate of |
755 | 158k | // how long it will take the various blocks to execute. |
756 | 158k | unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(), |
757 | 158k | SameTails[i].getTailStartPos()); |
758 | 158k | if (t <= TimeEstimate158k ) { |
759 | 140k | TimeEstimate = t; |
760 | 140k | commonTailIndex = i; |
761 | 140k | } |
762 | 200k | } |
763 | 81.9k | |
764 | 81.9k | MachineBasicBlock::iterator BBI = |
765 | 81.9k | SameTails[commonTailIndex].getTailStartPos(); |
766 | 81.9k | MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); |
767 | 81.9k | |
768 | 81.9k | DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " |
769 | 81.9k | << maxCommonTailLength); |
770 | 81.9k | |
771 | 81.9k | // If the split block unconditionally falls-thru to SuccBB, it will be |
772 | 81.9k | // merged. In control flow terms it should then take SuccBB's name. e.g. If |
773 | 81.9k | // SuccBB is an inner loop, the common tail is still part of the inner loop. |
774 | 63.0k | const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? |
775 | 81.9k | SuccBB->getBasicBlock()49.8k : MBB->getBasicBlock()32.1k ; |
776 | 81.9k | MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); |
777 | 81.9k | if (!newMBB81.9k ) { |
778 | 0 | DEBUG(dbgs() << "... failed!"); |
779 | 0 | return false; |
780 | 0 | } |
781 | 81.9k | |
782 | 81.9k | SameTails[commonTailIndex].setBlock(newMBB); |
783 | 81.9k | SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); |
784 | 81.9k | |
785 | 81.9k | // If we split PredBB, newMBB is the new predecessor. |
786 | 81.9k | if (PredBB == MBB) |
787 | 42.2k | PredBB = newMBB; |
788 | 81.9k | |
789 | 81.9k | return true; |
790 | 81.9k | } |
791 | | |
792 | | static void |
793 | | mergeOperations(MachineBasicBlock::iterator MBBIStartPos, |
794 | 222k | MachineBasicBlock &MBBCommon) { |
795 | 222k | MachineBasicBlock *MBB = MBBIStartPos->getParent(); |
796 | 222k | // Note CommonTailLen does not necessarily matches the size of |
797 | 222k | // the common BB nor all its instructions because of debug |
798 | 222k | // instructions differences. |
799 | 222k | unsigned CommonTailLen = 0; |
800 | 923k | for (auto E = MBB->end(); MBBIStartPos != E923k ; ++MBBIStartPos700k ) |
801 | 700k | ++CommonTailLen; |
802 | 222k | |
803 | 222k | MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(); |
804 | 222k | MachineBasicBlock::reverse_iterator MBBIE = MBB->rend(); |
805 | 222k | MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin(); |
806 | 222k | MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend(); |
807 | 222k | |
808 | 923k | while (CommonTailLen--923k ) { |
809 | 700k | assert(MBBI != MBBIE && "Reached BB end within common tail length!"); |
810 | 700k | (void)MBBIE; |
811 | 700k | |
812 | 700k | if (MBBI->isDebugValue()700k ) { |
813 | 1 | ++MBBI; |
814 | 1 | continue; |
815 | 1 | } |
816 | 700k | |
817 | 700k | while (700k (MBBICommon != MBBIECommon) && 700k MBBICommon->isDebugValue()700k ) |
818 | 0 | ++MBBICommon; |
819 | 700k | |
820 | 700k | assert(MBBICommon != MBBIECommon && |
821 | 700k | "Reached BB end within common tail length!"); |
822 | 700k | assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!"); |
823 | 700k | |
824 | 700k | // Merge MMOs from memory operations in the common block. |
825 | 700k | if (MBBICommon->mayLoad() || 700k MBBICommon->mayStore()560k ) |
826 | 159k | MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI)); |
827 | 700k | // Drop undef flags if they aren't present in all merged instructions. |
828 | 3.33M | for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E3.33M ; ++I2.63M ) { |
829 | 2.63M | MachineOperand &MO = MBBICommon->getOperand(I); |
830 | 2.63M | if (MO.isReg() && 2.63M MO.isUndef()1.92M ) { |
831 | 1.36k | const MachineOperand &OtherMO = MBBI->getOperand(I); |
832 | 1.36k | if (!OtherMO.isUndef()) |
833 | 42 | MO.setIsUndef(false); |
834 | 1.36k | } |
835 | 2.63M | } |
836 | 700k | |
837 | 700k | ++MBBI; |
838 | 700k | ++MBBICommon; |
839 | 700k | } |
840 | 222k | } |
841 | | |
842 | 131k | void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { |
843 | 131k | MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); |
844 | 131k | |
845 | 131k | std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size()); |
846 | 485k | for (unsigned int i = 0 ; i != SameTails.size()485k ; ++i354k ) { |
847 | 354k | if (i != commonTailIndex354k ) { |
848 | 222k | NextCommonInsts[i] = SameTails[i].getTailStartPos(); |
849 | 222k | mergeOperations(SameTails[i].getTailStartPos(), *MBB); |
850 | 354k | } else { |
851 | 131k | assert(SameTails[i].getTailStartPos() == MBB->begin() && |
852 | 131k | "MBB is not a common tail only block"); |
853 | 131k | } |
854 | 354k | } |
855 | 131k | |
856 | 357k | for (auto &MI : *MBB) { |
857 | 357k | if (MI.isDebugValue()) |
858 | 0 | continue; |
859 | 357k | DebugLoc DL = MI.getDebugLoc(); |
860 | 1.41M | for (unsigned int i = 0 ; i < NextCommonInsts.size()1.41M ; i++1.05M ) { |
861 | 1.05M | if (i == commonTailIndex) |
862 | 357k | continue; |
863 | 700k | |
864 | 700k | auto &Pos = NextCommonInsts[i]; |
865 | 700k | assert(Pos != SameTails[i].getBlock()->end() && |
866 | 700k | "Reached BB end within common tail"); |
867 | 700k | while (Pos->isDebugValue()700k ) { |
868 | 0 | ++Pos; |
869 | 0 | assert(Pos != SameTails[i].getBlock()->end() && |
870 | 0 | "Reached BB end within common tail"); |
871 | 0 | } |
872 | 1.05M | assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); |
873 | 1.05M | DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); |
874 | 1.05M | NextCommonInsts[i] = ++Pos; |
875 | 1.05M | } |
876 | 357k | MI.setDebugLoc(DL); |
877 | 357k | } |
878 | 131k | |
879 | 131k | if (UpdateLiveIns131k ) { |
880 | 131k | LivePhysRegs NewLiveIns(*TRI); |
881 | 131k | computeLiveIns(NewLiveIns, *MBB); |
882 | 131k | |
883 | 131k | // The flag merging may lead to some register uses no longer using the |
884 | 131k | // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary. |
885 | 159k | for (MachineBasicBlock *Pred : MBB->predecessors()) { |
886 | 159k | LiveRegs.init(*TRI); |
887 | 159k | LiveRegs.addLiveOuts(*Pred); |
888 | 159k | MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator(); |
889 | 1.48M | for (unsigned Reg : NewLiveIns) { |
890 | 1.48M | if (!LiveRegs.available(*MRI, Reg)) |
891 | 1.48M | continue; |
892 | 39 | DebugLoc DL; |
893 | 39 | BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF), |
894 | 39 | Reg); |
895 | 39 | } |
896 | 159k | } |
897 | 131k | |
898 | 131k | MBB->clearLiveIns(); |
899 | 131k | addLiveIns(*MBB, NewLiveIns); |
900 | 131k | } |
901 | 131k | } |
902 | | |
903 | | // See if any of the blocks in MergePotentials (which all have SuccBB as a |
904 | | // successor, or all have no successor if it is null) can be tail-merged. |
905 | | // If there is a successor, any blocks in MergePotentials that are not |
906 | | // tail-merged and are not immediately before Succ must have an unconditional |
907 | | // branch to Succ added (but the predecessor/successor lists need no |
908 | | // adjustment). The lone predecessor of Succ that falls through into Succ, |
909 | | // if any, is given in PredBB. |
910 | | // MinCommonTailLength - Except for the special cases below, tail-merge if |
911 | | // there are at least this many instructions in common. |
912 | | bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, |
913 | | MachineBasicBlock *PredBB, |
914 | 3.32M | unsigned MinCommonTailLength) { |
915 | 3.32M | bool MadeChange = false; |
916 | 3.32M | |
917 | 3.32M | DEBUG(dbgs() << "\nTryTailMergeBlocks: "; |
918 | 3.32M | for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) |
919 | 3.32M | dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() |
920 | 3.32M | << (i == e-1 ? "" : ", "); |
921 | 3.32M | dbgs() << "\n"; |
922 | 3.32M | if (SuccBB) { |
923 | 3.32M | dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; |
924 | 3.32M | if (PredBB) |
925 | 3.32M | dbgs() << " which has fall-through from BB#" |
926 | 3.32M | << PredBB->getNumber() << "\n"; |
927 | 3.32M | } |
928 | 3.32M | dbgs() << "Looking for common tails of at least " |
929 | 3.32M | << MinCommonTailLength << " instruction" |
930 | 3.32M | << (MinCommonTailLength == 1 ? "" : "s") << '\n'; |
931 | 3.32M | ); |
932 | 3.32M | |
933 | 3.32M | // Sort by hash value so that blocks with identical end sequences sort |
934 | 3.32M | // together. |
935 | 3.32M | array_pod_sort(MergePotentials.begin(), MergePotentials.end()); |
936 | 3.32M | |
937 | 3.32M | // Walk through equivalence sets looking for actual exact matches. |
938 | 8.41M | while (MergePotentials.size() > 18.41M ) { |
939 | 5.09M | unsigned CurHash = MergePotentials.back().getHash(); |
940 | 5.09M | |
941 | 5.09M | // Build SameTails, identifying the set of blocks with this hash code |
942 | 5.09M | // and with the maximum number of instructions in common. |
943 | 5.09M | unsigned maxCommonTailLength = ComputeSameTails(CurHash, |
944 | 5.09M | MinCommonTailLength, |
945 | 5.09M | SuccBB, PredBB); |
946 | 5.09M | |
947 | 5.09M | // If we didn't find any pair that has at least MinCommonTailLength |
948 | 5.09M | // instructions in common, remove all blocks with this hash code and retry. |
949 | 5.09M | if (SameTails.empty()5.09M ) { |
950 | 4.96M | RemoveBlocksWithHash(CurHash, SuccBB, PredBB); |
951 | 4.96M | continue; |
952 | 4.96M | } |
953 | 131k | |
954 | 131k | // If one of the blocks is the entire common tail (and not the entry |
955 | 131k | // block, which we can't jump to), we can treat all blocks with this same |
956 | 131k | // tail at once. Use PredBB if that is one of the possibilities, as that |
957 | 131k | // will not introduce any extra branches. |
958 | 131k | MachineBasicBlock *EntryBB = |
959 | 131k | &MergePotentials.front().getBlock()->getParent()->front(); |
960 | 131k | unsigned commonTailIndex = SameTails.size(); |
961 | 131k | // If there are two blocks, check to see if one can be made to fall through |
962 | 131k | // into the other. |
963 | 131k | if (SameTails.size() == 2 && |
964 | 105k | SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && |
965 | 3.72k | SameTails[1].tailIsWholeBlock()) |
966 | 3.13k | commonTailIndex = 1; |
967 | 127k | else if (127k SameTails.size() == 2 && |
968 | 102k | SameTails[1].getBlock()->isLayoutSuccessor( |
969 | 102k | SameTails[0].getBlock()) && |
970 | 28.9k | SameTails[0].tailIsWholeBlock()) |
971 | 6.74k | commonTailIndex = 0; |
972 | 121k | else { |
973 | 121k | // Otherwise just pick one, favoring the fall-through predecessor if |
974 | 121k | // there is one. |
975 | 329k | for (unsigned i = 0, e = SameTails.size(); i != e329k ; ++i207k ) { |
976 | 274k | MachineBasicBlock *MBB = SameTails[i].getBlock(); |
977 | 274k | if (MBB == EntryBB && 274k SameTails[i].tailIsWholeBlock()2.98k ) |
978 | 3 | continue; |
979 | 274k | if (274k MBB == PredBB274k ) { |
980 | 66.2k | commonTailIndex = i; |
981 | 66.2k | break; |
982 | 66.2k | } |
983 | 207k | if (207k SameTails[i].tailIsWholeBlock()207k ) |
984 | 53.4k | commonTailIndex = i; |
985 | 274k | } |
986 | 127k | } |
987 | 131k | |
988 | 131k | if (commonTailIndex == SameTails.size() || |
989 | 91.3k | (SameTails[commonTailIndex].getBlock() == PredBB && |
990 | 131k | !SameTails[commonTailIndex].tailIsWholeBlock()69.9k )) { |
991 | 81.9k | // None of the blocks consist entirely of the common tail. |
992 | 81.9k | // Split a block so that one does. |
993 | 81.9k | if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, |
994 | 81.9k | maxCommonTailLength, commonTailIndex)) { |
995 | 0 | RemoveBlocksWithHash(CurHash, SuccBB, PredBB); |
996 | 0 | continue; |
997 | 0 | } |
998 | 131k | } |
999 | 131k | |
1000 | 131k | MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); |
1001 | 131k | |
1002 | 131k | // Recompute common tail MBB's edge weights and block frequency. |
1003 | 131k | setCommonTailEdgeWeights(*MBB); |
1004 | 131k | |
1005 | 131k | // Merge debug locations, MMOs and undef flags across identical instructions |
1006 | 131k | // for common tail. |
1007 | 131k | mergeCommonTails(commonTailIndex); |
1008 | 131k | |
1009 | 131k | // MBB is common tail. Adjust all other BB's to jump to this one. |
1010 | 131k | // Traversal must be forwards so erases work. |
1011 | 131k | DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() |
1012 | 131k | << " for "); |
1013 | 485k | for (unsigned int i=0, e = SameTails.size(); i != e485k ; ++i354k ) { |
1014 | 354k | if (commonTailIndex == i) |
1015 | 131k | continue; |
1016 | 222k | DEBUG222k (dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() |
1017 | 222k | << (i == e-1 ? "" : ", ")); |
1018 | 222k | // Hack the end off BB i, making it jump to BB commonTailIndex instead. |
1019 | 222k | replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB); |
1020 | 222k | // BB i is no longer a predecessor of SuccBB; remove it from the worklist. |
1021 | 222k | MergePotentials.erase(SameTails[i].getMPIter()); |
1022 | 222k | } |
1023 | 131k | DEBUG(dbgs() << "\n"); |
1024 | 5.09M | // We leave commonTailIndex in the worklist in case there are other blocks |
1025 | 5.09M | // that match it with a smaller number of instructions. |
1026 | 5.09M | MadeChange = true; |
1027 | 5.09M | } |
1028 | 3.32M | return MadeChange; |
1029 | 3.32M | } |
1030 | | |
1031 | 1.02M | bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { |
1032 | 1.02M | bool MadeChange = false; |
1033 | 1.02M | if (!EnableTailMerge1.02M ) return MadeChange20.2k ; |
1034 | 1.00M | |
1035 | 1.00M | // First find blocks with no successors. |
1036 | 1.00M | // Block placement does not create new tail merging opportunities for these |
1037 | 1.00M | // blocks. |
1038 | 1.00M | if (1.00M !AfterBlockPlacement1.00M ) { |
1039 | 815k | MergePotentials.clear(); |
1040 | 9.66M | for (MachineBasicBlock &MBB : MF) { |
1041 | 9.66M | if (MergePotentials.size() == TailMergeThreshold) |
1042 | 1 | break; |
1043 | 9.66M | if (9.66M !TriedMerging.count(&MBB) && 9.66M MBB.succ_empty()9.65M ) |
1044 | 1.24M | MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); |
1045 | 9.66M | } |
1046 | 815k | |
1047 | 815k | // If this is a large problem, avoid visiting the same basic blocks |
1048 | 815k | // multiple times. |
1049 | 815k | if (MergePotentials.size() == TailMergeThreshold) |
1050 | 151 | for (unsigned i = 0, e = MergePotentials.size(); 1 i != e151 ; ++i150 ) |
1051 | 150 | TriedMerging.insert(MergePotentials[i].getBlock()); |
1052 | 815k | |
1053 | 815k | // See if we can do any tail merging on those. |
1054 | 815k | if (MergePotentials.size() >= 2) |
1055 | 92.4k | MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); |
1056 | 815k | } |
1057 | 1.00M | |
1058 | 1.00M | // Look at blocks (IBB) with multiple predecessors (PBB). |
1059 | 1.00M | // We change each predecessor to a canonical form, by |
1060 | 1.00M | // (1) temporarily removing any unconditional branch from the predecessor |
1061 | 1.00M | // to IBB, and |
1062 | 1.00M | // (2) alter conditional branches so they branch to the other block |
1063 | 1.00M | // not IBB; this may require adding back an unconditional branch to IBB |
1064 | 1.00M | // later, where there wasn't one coming in. E.g. |
1065 | 1.00M | // Bcc IBB |
1066 | 1.00M | // fallthrough to QBB |
1067 | 1.00M | // here becomes |
1068 | 1.00M | // Bncc QBB |
1069 | 1.00M | // with a conceptual B to IBB after that, which never actually exists. |
1070 | 1.00M | // With those changes, we see whether the predecessors' tails match, |
1071 | 1.00M | // and merge them if so. We change things out of canonical form and |
1072 | 1.00M | // back to the way they were later in the process. (OptimizeBranches |
1073 | 1.00M | // would undo some of this, but we can't use it, because we'd get into |
1074 | 1.00M | // a compile-time infinite loop repeatedly doing and undoing the same |
1075 | 1.00M | // transformations.) |
1076 | 1.00M | |
1077 | 1.00M | for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); |
1078 | 13.7M | I != E13.7M ; ++I12.7M ) { |
1079 | 12.7M | if (I->pred_size() < 212.7M ) continue8.43M ; |
1080 | 4.31M | SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; |
1081 | 4.31M | MachineBasicBlock *IBB = &*I; |
1082 | 4.31M | MachineBasicBlock *PredBB = &*std::prev(I); |
1083 | 4.31M | MergePotentials.clear(); |
1084 | 4.31M | MachineLoop *ML; |
1085 | 4.31M | |
1086 | 4.31M | // Bail if merging after placement and IBB is the loop header because |
1087 | 4.31M | // -- If merging predecessors that belong to the same loop as IBB, the |
1088 | 4.31M | // common tail of merged predecessors may become the loop top if block |
1089 | 4.31M | // placement is called again and the predecessors may branch to this common |
1090 | 4.31M | // tail and require more branches. This can be relaxed if |
1091 | 4.31M | // MachineBlockPlacement::findBestLoopTop is more flexible. |
1092 | 4.31M | // --If merging predecessors that do not belong to the same loop as IBB, the |
1093 | 4.31M | // loop info of IBB's loop and the other loops may be affected. Calling the |
1094 | 4.31M | // block placement again may make big change to the layout and eliminate the |
1095 | 4.31M | // reason to do tail merging here. |
1096 | 4.31M | if (AfterBlockPlacement && 4.31M MLI1.37M ) { |
1097 | 1.37M | ML = MLI->getLoopFor(IBB); |
1098 | 1.37M | if (ML && 1.37M IBB == ML->getHeader()720k ) |
1099 | 413k | continue; |
1100 | 3.90M | } |
1101 | 3.90M | |
1102 | 3.90M | for (MachineBasicBlock *PBB : I->predecessors()) 3.90M { |
1103 | 10.2M | if (MergePotentials.size() == TailMergeThreshold) |
1104 | 18 | break; |
1105 | 10.2M | |
1106 | 10.2M | if (10.2M TriedMerging.count(PBB)10.2M ) |
1107 | 6.88k | continue; |
1108 | 10.1M | |
1109 | 10.1M | // Skip blocks that loop to themselves, can't tail merge these. |
1110 | 10.1M | if (10.1M PBB == IBB10.1M ) |
1111 | 492k | continue; |
1112 | 9.70M | |
1113 | 9.70M | // Visit each predecessor only once. |
1114 | 9.70M | if (9.70M !UniquePreds.insert(PBB).second9.70M ) |
1115 | 0 | continue; |
1116 | 9.70M | |
1117 | 9.70M | // Skip blocks which may jump to a landing pad. Can't tail merge these. |
1118 | 9.70M | if (9.70M PBB->hasEHPadSuccessor()9.70M ) |
1119 | 59.5k | continue; |
1120 | 9.64M | |
1121 | 9.64M | // After block placement, only consider predecessors that belong to the |
1122 | 9.64M | // same loop as IBB. The reason is the same as above when skipping loop |
1123 | 9.64M | // header. |
1124 | 9.64M | if (9.64M AfterBlockPlacement && 9.64M MLI2.69M ) |
1125 | 2.69M | if (2.69M ML != MLI->getLoopFor(PBB)2.69M ) |
1126 | 351k | continue; |
1127 | 9.29M | |
1128 | 9.29M | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; |
1129 | 9.29M | SmallVector<MachineOperand, 4> Cond; |
1130 | 9.29M | if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)9.29M ) { |
1131 | 9.27M | // Failing case: IBB is the target of a cbr, and we cannot reverse the |
1132 | 9.27M | // branch. |
1133 | 9.27M | SmallVector<MachineOperand, 4> NewCond(Cond); |
1134 | 9.27M | if (!Cond.empty() && 9.27M TBB == IBB4.86M ) { |
1135 | 3.79M | if (TII->reverseBranchCondition(NewCond)) |
1136 | 63 | continue; |
1137 | 3.79M | // This is the QBB case described above |
1138 | 3.79M | if (3.79M !FBB3.79M ) { |
1139 | 2.72M | auto Next = ++PBB->getIterator(); |
1140 | 2.72M | if (Next != MF.end()) |
1141 | 2.72M | FBB = &*Next; |
1142 | 2.72M | } |
1143 | 3.79M | } |
1144 | 9.27M | |
1145 | 9.27M | // Failing case: the only way IBB can be reached from PBB is via |
1146 | 9.27M | // exception handling. Happens for landing pads. Would be nice to have |
1147 | 9.27M | // a bit in the edge so we didn't have to do all this. |
1148 | 9.27M | if (9.27M IBB->isEHPad()9.27M ) { |
1149 | 0 | MachineFunction::iterator IP = ++PBB->getIterator(); |
1150 | 0 | MachineBasicBlock *PredNextBB = nullptr; |
1151 | 0 | if (IP != MF.end()) |
1152 | 0 | PredNextBB = &*IP; |
1153 | 0 | if (!TBB0 ) { |
1154 | 0 | if (IBB != PredNextBB) // fallthrough |
1155 | 0 | continue; |
1156 | 0 | } else if (0 FBB0 ) { |
1157 | 0 | if (TBB != IBB && 0 FBB != IBB0 ) // cbr then ubr |
1158 | 0 | continue; |
1159 | 0 | } else if (0 Cond.empty()0 ) { |
1160 | 0 | if (TBB != IBB) // ubr |
1161 | 0 | continue; |
1162 | 0 | } else { |
1163 | 0 | if (TBB != IBB && 0 IBB != PredNextBB0 ) // cbr |
1164 | 0 | continue; |
1165 | 9.27M | } |
1166 | 0 | } |
1167 | 9.27M | |
1168 | 9.27M | // Remove the unconditional branch at the end, if any. |
1169 | 9.27M | if (9.27M TBB && 9.27M (Cond.empty() || 6.85M FBB4.86M )) { |
1170 | 6.10M | DebugLoc dl = PBB->findBranchDebugLoc(); |
1171 | 6.10M | TII->removeBranch(*PBB); |
1172 | 6.10M | if (!Cond.empty()) |
1173 | 6.10M | // reinsert conditional branch only, for now |
1174 | 4.11M | TII->insertBranch(*PBB, (TBB == IBB) ? 4.11M FBB3.79M : TBB322k , nullptr, |
1175 | 4.11M | NewCond, dl); |
1176 | 6.10M | } |
1177 | 9.27M | |
1178 | 9.27M | MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB)); |
1179 | 9.27M | } |
1180 | 10.2M | } |
1181 | 3.90M | |
1182 | 3.90M | // If this is a large problem, avoid visiting the same basic blocks multiple |
1183 | 3.90M | // times. |
1184 | 3.90M | if (MergePotentials.size() == TailMergeThreshold) |
1185 | 2.42k | for (unsigned i = 0, e = MergePotentials.size(); 18 i != e2.42k ; ++i2.40k ) |
1186 | 2.40k | TriedMerging.insert(MergePotentials[i].getBlock()); |
1187 | 3.90M | |
1188 | 3.90M | if (MergePotentials.size() >= 2) |
1189 | 3.23M | MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength); |
1190 | 3.90M | |
1191 | 3.90M | // Reinsert an unconditional branch if needed. The 1 below can occur as a |
1192 | 3.90M | // result of removing blocks in TryTailMergeBlocks. |
1193 | 3.90M | PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks |
1194 | 3.90M | if (MergePotentials.size() == 1 && |
1195 | 3.57M | MergePotentials.begin()->getBlock() != PredBB) |
1196 | 2.41M | FixTail(MergePotentials.begin()->getBlock(), IBB, TII); |
1197 | 12.7M | } |
1198 | 1.02M | |
1199 | 1.02M | return MadeChange; |
1200 | 1.02M | } |
1201 | | |
1202 | 131k | void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { |
1203 | 131k | SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size()); |
1204 | 131k | BlockFrequency AccumulatedMBBFreq; |
1205 | 131k | |
1206 | 131k | // Aggregate edge frequency of successor edge j: |
1207 | 131k | // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)), |
1208 | 131k | // where bb is a basic block that is in SameTails. |
1209 | 354k | for (const auto &Src : SameTails) { |
1210 | 354k | const MachineBasicBlock *SrcMBB = Src.getBlock(); |
1211 | 354k | BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB); |
1212 | 354k | AccumulatedMBBFreq += BlockFreq; |
1213 | 354k | |
1214 | 354k | // It is not necessary to recompute edge weights if TailBB has less than two |
1215 | 354k | // successors. |
1216 | 354k | if (TailMBB.succ_size() <= 1) |
1217 | 317k | continue; |
1218 | 36.7k | |
1219 | 36.7k | auto EdgeFreq = EdgeFreqLs.begin(); |
1220 | 36.7k | |
1221 | 36.7k | for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); |
1222 | 110k | SuccI != SuccE110k ; ++SuccI, ++EdgeFreq73.4k ) |
1223 | 73.4k | *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI); |
1224 | 354k | } |
1225 | 131k | |
1226 | 131k | MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq); |
1227 | 131k | |
1228 | 131k | if (TailMBB.succ_size() <= 1) |
1229 | 113k | return; |
1230 | 17.3k | |
1231 | 17.3k | auto SumEdgeFreq = |
1232 | 17.3k | std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0)) |
1233 | 17.3k | .getFrequency(); |
1234 | 17.3k | auto EdgeFreq = EdgeFreqLs.begin(); |
1235 | 17.3k | |
1236 | 17.3k | if (SumEdgeFreq > 017.3k ) { |
1237 | 17.3k | for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); |
1238 | 52.1k | SuccI != SuccE52.1k ; ++SuccI, ++EdgeFreq34.7k ) { |
1239 | 34.7k | auto Prob = BranchProbability::getBranchProbability( |
1240 | 34.7k | EdgeFreq->getFrequency(), SumEdgeFreq); |
1241 | 34.7k | TailMBB.setSuccProbability(SuccI, Prob); |
1242 | 34.7k | } |
1243 | 17.3k | } |
1244 | 131k | } |
1245 | | |
1246 | | //===----------------------------------------------------------------------===// |
1247 | | // Branch Optimization |
1248 | | //===----------------------------------------------------------------------===// |
1249 | | |
1250 | 847k | bool BranchFolder::OptimizeBranches(MachineFunction &MF) { |
1251 | 847k | bool MadeChange = false; |
1252 | 847k | |
1253 | 847k | // Make sure blocks are numbered in order |
1254 | 847k | MF.RenumberBlocks(); |
1255 | 847k | // Renumbering blocks alters funclet membership, recalculate it. |
1256 | 847k | FuncletMembership = getFuncletMembership(MF); |
1257 | 847k | |
1258 | 847k | for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); |
1259 | 10.7M | I != E10.7M ; ) { |
1260 | 9.89M | MachineBasicBlock *MBB = &*I++; |
1261 | 9.89M | MadeChange |= OptimizeBlock(MBB); |
1262 | 9.89M | |
1263 | 9.89M | // If it is dead, remove it. |
1264 | 9.89M | if (MBB->pred_empty()9.89M ) { |
1265 | 352k | RemoveDeadBlock(MBB); |
1266 | 352k | MadeChange = true; |
1267 | 352k | ++NumDeadBlocks; |
1268 | 352k | } |
1269 | 9.89M | } |
1270 | 847k | |
1271 | 847k | return MadeChange; |
1272 | 847k | } |
1273 | | |
1274 | | // Blocks should be considered empty if they contain only debug info; |
1275 | | // else the debug info would affect codegen. |
1276 | 21.4M | static bool IsEmptyBlock(MachineBasicBlock *MBB) { |
1277 | 21.4M | return MBB->getFirstNonDebugInstr() == MBB->end(); |
1278 | 21.4M | } |
1279 | | |
1280 | | // Blocks with only debug info and branches should be considered the same |
1281 | | // as blocks with only branches. |
1282 | 1.96M | static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { |
1283 | 1.96M | MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr(); |
1284 | 1.96M | assert(I != MBB->end() && "empty block!"); |
1285 | 1.96M | return I->isBranch(); |
1286 | 1.96M | } |
1287 | | |
1288 | | /// IsBetterFallthrough - Return true if it would be clearly better to |
1289 | | /// fall-through to MBB1 than to fall through into MBB2. This has to return |
1290 | | /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will |
1291 | | /// result in infinite loops. |
1292 | | static bool IsBetterFallthrough(MachineBasicBlock *MBB1, |
1293 | 35.0k | MachineBasicBlock *MBB2) { |
1294 | 35.0k | // Right now, we use a simple heuristic. If MBB2 ends with a call, and |
1295 | 35.0k | // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to |
1296 | 35.0k | // optimize branches that branch to either a return block or an assert block |
1297 | 35.0k | // into a fallthrough to the return. |
1298 | 35.0k | MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr(); |
1299 | 35.0k | MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr(); |
1300 | 35.0k | if (MBB1I == MBB1->end() || 35.0k MBB2I == MBB2->end()34.9k ) |
1301 | 6 | return false; |
1302 | 34.9k | |
1303 | 34.9k | // If there is a clear successor ordering we make sure that one block |
1304 | 34.9k | // will fall through to the next |
1305 | 34.9k | if (34.9k MBB1->isSuccessor(MBB2)34.9k ) return true20 ; |
1306 | 34.9k | if (34.9k MBB2->isSuccessor(MBB1)34.9k ) return false0 ; |
1307 | 34.9k | |
1308 | 34.9k | return MBB2I->isCall() && 34.9k !MBB1I->isCall()9.31k ; |
1309 | 35.0k | } |
1310 | | |
1311 | | /// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch |
1312 | | /// instructions on the block. |
1313 | 1.46M | static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { |
1314 | 1.46M | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
1315 | 1.46M | if (I != MBB.end() && 1.46M I->isBranch()1.46M ) |
1316 | 1.46M | return I->getDebugLoc(); |
1317 | 5.46k | return DebugLoc(); |
1318 | 5.46k | } |
1319 | | |
1320 | 9.89M | bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { |
1321 | 9.89M | bool MadeChange = false; |
1322 | 9.89M | MachineFunction &MF = *MBB->getParent(); |
1323 | 11.2M | ReoptimizeBlock: |
1324 | 11.2M | |
1325 | 11.2M | MachineFunction::iterator FallThrough = MBB->getIterator(); |
1326 | 11.2M | ++FallThrough; |
1327 | 11.2M | |
1328 | 11.2M | // Make sure MBB and FallThrough belong to the same funclet. |
1329 | 11.2M | bool SameFunclet = true; |
1330 | 11.2M | if (!FuncletMembership.empty() && 11.2M FallThrough != MF.end()603 ) { |
1331 | 493 | auto MBBFunclet = FuncletMembership.find(MBB); |
1332 | 493 | assert(MBBFunclet != FuncletMembership.end()); |
1333 | 493 | auto FallThroughFunclet = FuncletMembership.find(&*FallThrough); |
1334 | 493 | assert(FallThroughFunclet != FuncletMembership.end()); |
1335 | 493 | SameFunclet = MBBFunclet->second == FallThroughFunclet->second; |
1336 | 493 | } |
1337 | 11.2M | |
1338 | 11.2M | // If this block is empty, make everyone use its fall-through, not the block |
1339 | 11.2M | // explicitly. Landing pads should not do this since the landing-pad table |
1340 | 11.2M | // points to this block. Blocks with their addresses taken shouldn't be |
1341 | 11.2M | // optimized away. |
1342 | 11.2M | if (IsEmptyBlock(MBB) && 11.2M !MBB->isEHPad()99.8k && !MBB->hasAddressTaken()99.8k && |
1343 | 11.2M | SameFunclet99.8k ) { |
1344 | 99.8k | // Dead block? Leave for cleanup later. |
1345 | 99.8k | if (MBB->pred_empty()99.8k ) return MadeChange2.59k ; |
1346 | 97.2k | |
1347 | 97.2k | if (97.2k FallThrough == MF.end()97.2k ) { |
1348 | 365 | // TODO: Simplify preds to not branch here if possible! |
1349 | 97.2k | } else if (96.8k FallThrough->isEHPad()96.8k ) { |
1350 | 630 | // Don't rewrite to a landing pad fallthough. That could lead to the case |
1351 | 630 | // where a BB jumps to more than one landing pad. |
1352 | 630 | // TODO: Is it ever worth rewriting predecessors which don't already |
1353 | 630 | // jump to a landing pad, and so can safely jump to the fallthrough? |
1354 | 96.8k | } else if (96.2k MBB->isSuccessor(&*FallThrough)96.2k ) { |
1355 | 93.8k | // Rewrite all predecessors of the old block to go to the fallthrough |
1356 | 93.8k | // instead. |
1357 | 207k | while (!MBB->pred_empty()207k ) { |
1358 | 113k | MachineBasicBlock *Pred = *(MBB->pred_end()-1); |
1359 | 113k | Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough); |
1360 | 113k | } |
1361 | 93.8k | // If MBB was the target of a jump table, update jump tables to go to the |
1362 | 93.8k | // fallthrough instead. |
1363 | 93.8k | if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) |
1364 | 9.73k | MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough); |
1365 | 96.8k | MadeChange = true; |
1366 | 96.8k | } |
1367 | 99.8k | return MadeChange; |
1368 | 99.8k | } |
1369 | 11.1M | |
1370 | 11.1M | // Check to see if we can simplify the terminator of the block before this |
1371 | 11.1M | // one. |
1372 | 11.1M | MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB)); |
1373 | 11.1M | |
1374 | 11.1M | MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; |
1375 | 11.1M | SmallVector<MachineOperand, 4> PriorCond; |
1376 | 11.1M | bool PriorUnAnalyzable = |
1377 | 11.1M | TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); |
1378 | 11.1M | if (!PriorUnAnalyzable11.1M ) { |
1379 | 10.7M | // If the CFG for the prior block has extra edges, remove them. |
1380 | 10.7M | MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, |
1381 | 10.7M | !PriorCond.empty()); |
1382 | 10.7M | |
1383 | 10.7M | // If the previous branch is conditional and both conditions go to the same |
1384 | 10.7M | // destination, remove the branch, replacing it with an unconditional one or |
1385 | 10.7M | // a fall-through. |
1386 | 10.7M | if (PriorTBB && 10.7M PriorTBB == PriorFBB8.41M ) { |
1387 | 15 | DebugLoc dl = getBranchDebugLoc(PrevBB); |
1388 | 15 | TII->removeBranch(PrevBB); |
1389 | 15 | PriorCond.clear(); |
1390 | 15 | if (PriorTBB != MBB) |
1391 | 3 | TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); |
1392 | 15 | MadeChange = true; |
1393 | 15 | ++NumBranchOpts; |
1394 | 15 | goto ReoptimizeBlock; |
1395 | 15 | } |
1396 | 10.7M | |
1397 | 10.7M | // If the previous block unconditionally falls through to this block and |
1398 | 10.7M | // this block has no other predecessors, move the contents of this block |
1399 | 10.7M | // into the prior block. This doesn't usually happen when SimplifyCFG |
1400 | 10.7M | // has been used, but it can happen if tail merging splits a fall-through |
1401 | 10.7M | // predecessor of a block. |
1402 | 10.7M | // This has to check PrevBB->succ_size() because EH edges are ignored by |
1403 | 10.7M | // AnalyzeBranch. |
1404 | 10.7M | if (10.7M PriorCond.empty() && 10.7M !PriorTBB4.10M && MBB->pred_size() == 12.31M && |
1405 | 88.9k | PrevBB.succ_size() == 1 && |
1406 | 10.7M | !MBB->hasAddressTaken()14.7k && !MBB->isEHPad()14.6k ) { |
1407 | 14.6k | DEBUG(dbgs() << "\nMerging into block: " << PrevBB |
1408 | 14.6k | << "From MBB: " << *MBB); |
1409 | 14.6k | // Remove redundant DBG_VALUEs first. |
1410 | 14.6k | if (PrevBB.begin() != PrevBB.end()14.6k ) { |
1411 | 14.5k | MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); |
1412 | 14.5k | --PrevBBIter; |
1413 | 14.5k | MachineBasicBlock::iterator MBBIter = MBB->begin(); |
1414 | 14.5k | // Check if DBG_VALUE at the end of PrevBB is identical to the |
1415 | 14.5k | // DBG_VALUE at the beginning of MBB. |
1416 | 14.5k | while (PrevBBIter != PrevBB.begin() && 14.5k MBBIter != MBB->end()7.38k |
1417 | 14.5k | && PrevBBIter->isDebugValue()7.38k && MBBIter->isDebugValue()0 ) { |
1418 | 0 | if (!MBBIter->isIdenticalTo(*PrevBBIter)) |
1419 | 0 | break; |
1420 | 0 | MachineInstr &DuplicateDbg = *MBBIter; |
1421 | 0 | ++MBBIter; -- PrevBBIter; |
1422 | 0 | DuplicateDbg.eraseFromParent(); |
1423 | 0 | } |
1424 | 14.5k | } |
1425 | 14.6k | PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); |
1426 | 14.6k | PrevBB.removeSuccessor(PrevBB.succ_begin()); |
1427 | 14.6k | assert(PrevBB.succ_empty()); |
1428 | 14.6k | PrevBB.transferSuccessors(MBB); |
1429 | 14.6k | MadeChange = true; |
1430 | 14.6k | return MadeChange; |
1431 | 14.6k | } |
1432 | 10.7M | |
1433 | 10.7M | // If the previous branch *only* branches to *this* block (conditional or |
1434 | 10.7M | // not) remove the branch. |
1435 | 10.7M | if (10.7M PriorTBB == MBB && 10.7M !PriorFBB498k ) { |
1436 | 195k | TII->removeBranch(PrevBB); |
1437 | 195k | MadeChange = true; |
1438 | 195k | ++NumBranchOpts; |
1439 | 195k | goto ReoptimizeBlock; |
1440 | 195k | } |
1441 | 10.5M | |
1442 | 10.5M | // If the prior block branches somewhere else on the condition and here if |
1443 | 10.5M | // the condition is false, remove the uncond second branch. |
1444 | 10.5M | if (10.5M PriorFBB == MBB10.5M ) { |
1445 | 553k | DebugLoc dl = getBranchDebugLoc(PrevBB); |
1446 | 553k | TII->removeBranch(PrevBB); |
1447 | 553k | TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); |
1448 | 553k | MadeChange = true; |
1449 | 553k | ++NumBranchOpts; |
1450 | 553k | goto ReoptimizeBlock; |
1451 | 553k | } |
1452 | 9.96M | |
1453 | 9.96M | // If the prior block branches here on true and somewhere else on false, and |
1454 | 9.96M | // if the branch condition is reversible, reverse the branch to create a |
1455 | 9.96M | // fall-through. |
1456 | 9.96M | if (9.96M PriorTBB == MBB9.96M ) { |
1457 | 303k | SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); |
1458 | 303k | if (!TII->reverseBranchCondition(NewPriorCond)303k ) { |
1459 | 303k | DebugLoc dl = getBranchDebugLoc(PrevBB); |
1460 | 303k | TII->removeBranch(PrevBB); |
1461 | 303k | TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); |
1462 | 303k | MadeChange = true; |
1463 | 303k | ++NumBranchOpts; |
1464 | 303k | goto ReoptimizeBlock; |
1465 | 303k | } |
1466 | 9.66M | } |
1467 | 9.66M | |
1468 | 9.66M | // If this block has no successors (e.g. it is a return block or ends with |
1469 | 9.66M | // a call to a no-return function like abort or __cxa_throw) and if the pred |
1470 | 9.66M | // falls through into this block, and if it would otherwise fall through |
1471 | 9.66M | // into the block after this, move this block to the end of the function. |
1472 | 9.66M | // |
1473 | 9.66M | // We consider it more likely that execution will stay in the function (e.g. |
1474 | 9.66M | // due to loops) than it is to exit it. This asserts in loops etc, moving |
1475 | 9.66M | // the assert condition out of the loop body. |
1476 | 9.66M | if (9.66M MBB->succ_empty() && 9.66M !PriorCond.empty()697k && !PriorFBB364k && |
1477 | 352k | MachineFunction::iterator(PriorTBB) == FallThrough && |
1478 | 9.66M | !MBB->canFallThrough()182k ) { |
1479 | 182k | bool DoTransform = true; |
1480 | 182k | |
1481 | 182k | // We have to be careful that the succs of PredBB aren't both no-successor |
1482 | 182k | // blocks. If neither have successors and if PredBB is the second from |
1483 | 182k | // last block in the function, we'd just keep swapping the two blocks for |
1484 | 182k | // last. Only do the swap if one is clearly better to fall through than |
1485 | 182k | // the other. |
1486 | 182k | if (FallThrough == --MF.end() && |
1487 | 35.0k | !IsBetterFallthrough(PriorTBB, MBB)) |
1488 | 28.8k | DoTransform = false; |
1489 | 182k | |
1490 | 182k | if (DoTransform182k ) { |
1491 | 153k | // Reverse the branch so we will fall through on the previous true cond. |
1492 | 153k | SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); |
1493 | 153k | if (!TII->reverseBranchCondition(NewPriorCond)153k ) { |
1494 | 153k | DEBUG(dbgs() << "\nMoving MBB: " << *MBB |
1495 | 153k | << "To make fallthrough to: " << *PriorTBB << "\n"); |
1496 | 153k | |
1497 | 153k | DebugLoc dl = getBranchDebugLoc(PrevBB); |
1498 | 153k | TII->removeBranch(PrevBB); |
1499 | 153k | TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); |
1500 | 153k | |
1501 | 153k | // Move this block to the end of the function. |
1502 | 153k | MBB->moveAfter(&MF.back()); |
1503 | 153k | MadeChange = true; |
1504 | 153k | ++NumBranchOpts; |
1505 | 153k | return MadeChange; |
1506 | 153k | } |
1507 | 9.93M | } |
1508 | 182k | } |
1509 | 10.7M | } |
1510 | 9.93M | |
1511 | 9.93M | if (9.93M !IsEmptyBlock(MBB) && 9.93M MBB->pred_size() == 19.93M && |
1512 | 9.93M | MF.getFunction()->optForSize()6.47M ) { |
1513 | 30.7k | // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch |
1514 | 30.7k | // direction, thereby defeating careful block placement and regressing |
1515 | 30.7k | // performance. Therefore, only consider this for optsize functions. |
1516 | 30.7k | MachineInstr &TailCall = *MBB->getFirstNonDebugInstr(); |
1517 | 30.7k | if (TII->isUnconditionalTailCall(TailCall)30.7k ) { |
1518 | 39 | MachineBasicBlock *Pred = *MBB->pred_begin(); |
1519 | 39 | MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; |
1520 | 39 | SmallVector<MachineOperand, 4> PredCond; |
1521 | 39 | bool PredAnalyzable = |
1522 | 39 | !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true); |
1523 | 39 | |
1524 | 39 | if (PredAnalyzable && 39 !PredCond.empty()35 && PredTBB == MBB35 && |
1525 | 39 | PredTBB != PredFBB29 ) { |
1526 | 28 | // The predecessor has a conditional branch to this block which consists |
1527 | 28 | // of only a tail call. Try to fold the tail call into the conditional |
1528 | 28 | // branch. |
1529 | 28 | if (TII->canMakeTailCallConditional(PredCond, TailCall)28 ) { |
1530 | 28 | // TODO: It would be nice if analyzeBranch() could provide a pointer |
1531 | 28 | // to the branch instruction so replaceBranchWithTailCall() doesn't |
1532 | 28 | // have to search for it. |
1533 | 28 | TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall); |
1534 | 28 | ++NumTailCalls; |
1535 | 28 | Pred->removeSuccessor(MBB); |
1536 | 28 | MadeChange = true; |
1537 | 28 | return MadeChange; |
1538 | 28 | } |
1539 | 9.93M | } |
1540 | 39 | // If the predecessor is falling through to this block, we could reverse |
1541 | 39 | // the branch condition and fold the tail call into that. However, after |
1542 | 39 | // that we might have to re-arrange the CFG to fall through to the other |
1543 | 39 | // block and there is a high risk of regressing code size rather than |
1544 | 39 | // improving it. |
1545 | 39 | } |
1546 | 30.7k | } |
1547 | 9.93M | |
1548 | 9.93M | // Analyze the branch in the current block. |
1549 | 9.93M | MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; |
1550 | 9.93M | SmallVector<MachineOperand, 4> CurCond; |
1551 | 9.93M | bool CurUnAnalyzable = |
1552 | 9.93M | TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); |
1553 | 9.93M | if (!CurUnAnalyzable9.93M ) { |
1554 | 9.09M | // If the CFG for the prior block has extra edges, remove them. |
1555 | 9.09M | MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); |
1556 | 9.09M | |
1557 | 9.09M | // If this is a two-way branch, and the FBB branches to this block, reverse |
1558 | 9.09M | // the condition so the single-basic-block loop is faster. Instead of: |
1559 | 9.09M | // Loop: xxx; jcc Out; jmp Loop |
1560 | 9.09M | // we want: |
1561 | 9.09M | // Loop: xxx; jncc Loop; jmp Out |
1562 | 9.09M | if (CurTBB && 9.09M CurFBB6.94M && CurFBB == MBB831k && CurTBB != MBB3.59k ) { |
1563 | 3.59k | SmallVector<MachineOperand, 4> NewCond(CurCond); |
1564 | 3.59k | if (!TII->reverseBranchCondition(NewCond)3.59k ) { |
1565 | 3.59k | DebugLoc dl = getBranchDebugLoc(*MBB); |
1566 | 3.59k | TII->removeBranch(*MBB); |
1567 | 3.59k | TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); |
1568 | 3.59k | MadeChange = true; |
1569 | 3.59k | ++NumBranchOpts; |
1570 | 3.59k | goto ReoptimizeBlock; |
1571 | 3.59k | } |
1572 | 9.08M | } |
1573 | 9.08M | |
1574 | 9.08M | // If this branch is the only thing in its block, see if we can forward |
1575 | 9.08M | // other blocks across it. |
1576 | 9.08M | if (9.08M CurTBB && 9.08M CurCond.empty()6.94M && !CurFBB1.96M && |
1577 | 9.08M | IsBranchOnlyBlock(MBB)1.96M && CurTBB != MBB241k && |
1578 | 9.08M | !MBB->hasAddressTaken()241k && !MBB->isEHPad()241k ) { |
1579 | 241k | DebugLoc dl = getBranchDebugLoc(*MBB); |
1580 | 241k | // This block may contain just an unconditional branch. Because there can |
1581 | 241k | // be 'non-branch terminators' in the block, try removing the branch and |
1582 | 241k | // then seeing if the block is empty. |
1583 | 241k | TII->removeBranch(*MBB); |
1584 | 241k | // If the only things remaining in the block are debug info, remove these |
1585 | 241k | // as well, so this will behave the same as an empty block in non-debug |
1586 | 241k | // mode. |
1587 | 241k | if (IsEmptyBlock(MBB)241k ) { |
1588 | 241k | // Make the block empty, losing the debug info (we could probably |
1589 | 241k | // improve this in some cases.) |
1590 | 241k | MBB->erase(MBB->begin(), MBB->end()); |
1591 | 241k | } |
1592 | 241k | // If this block is just an unconditional branch to CurTBB, we can |
1593 | 241k | // usually completely eliminate the block. The only case we cannot |
1594 | 241k | // completely eliminate the block is when the block before this one |
1595 | 241k | // falls through into MBB and we can't understand the prior block's branch |
1596 | 241k | // condition. |
1597 | 241k | if (MBB->empty()241k ) { |
1598 | 241k | bool PredHasNoFallThrough = !PrevBB.canFallThrough(); |
1599 | 241k | if (PredHasNoFallThrough || 241k !PriorUnAnalyzable210k || |
1600 | 241k | !PrevBB.isSuccessor(MBB)2 ) { |
1601 | 241k | // If the prior block falls through into us, turn it into an |
1602 | 241k | // explicit branch to us to make updates simpler. |
1603 | 241k | if (!PredHasNoFallThrough && 241k PrevBB.isSuccessor(MBB)210k && |
1604 | 241k | PriorTBB != MBB210k && PriorFBB != MBB210k ) { |
1605 | 210k | if (!PriorTBB210k ) { |
1606 | 5.46k | assert(PriorCond.empty() && !PriorFBB && |
1607 | 5.46k | "Bad branch analysis"); |
1608 | 5.46k | PriorTBB = MBB; |
1609 | 210k | } else { |
1610 | 204k | assert(!PriorFBB && "Machine CFG out of date!"); |
1611 | 204k | PriorFBB = MBB; |
1612 | 204k | } |
1613 | 210k | DebugLoc pdl = getBranchDebugLoc(PrevBB); |
1614 | 210k | TII->removeBranch(PrevBB); |
1615 | 210k | TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); |
1616 | 210k | } |
1617 | 241k | |
1618 | 241k | // Iterate through all the predecessors, revectoring each in-turn. |
1619 | 241k | size_t PI = 0; |
1620 | 241k | bool DidChange = false; |
1621 | 241k | bool HasBranchToSelf = false; |
1622 | 492k | while(PI != MBB->pred_size()492k ) { |
1623 | 251k | MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); |
1624 | 251k | if (PMBB == MBB251k ) { |
1625 | 0 | // If this block has an uncond branch to itself, leave it. |
1626 | 0 | ++PI; |
1627 | 0 | HasBranchToSelf = true; |
1628 | 251k | } else { |
1629 | 251k | DidChange = true; |
1630 | 251k | PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); |
1631 | 251k | // If this change resulted in PMBB ending in a conditional |
1632 | 251k | // branch where both conditions go to the same destination, |
1633 | 251k | // change this to an unconditional branch (and fix the CFG). |
1634 | 251k | MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; |
1635 | 251k | SmallVector<MachineOperand, 4> NewCurCond; |
1636 | 251k | bool NewCurUnAnalyzable = TII->analyzeBranch( |
1637 | 251k | *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); |
1638 | 251k | if (!NewCurUnAnalyzable && 251k NewCurTBB250k && NewCurTBB == NewCurFBB250k ) { |
1639 | 215 | DebugLoc pdl = getBranchDebugLoc(*PMBB); |
1640 | 215 | TII->removeBranch(*PMBB); |
1641 | 215 | NewCurCond.clear(); |
1642 | 215 | TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); |
1643 | 215 | MadeChange = true; |
1644 | 215 | ++NumBranchOpts; |
1645 | 215 | PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); |
1646 | 215 | } |
1647 | 251k | } |
1648 | 251k | } |
1649 | 241k | |
1650 | 241k | // Change any jumptables to go to the new MBB. |
1651 | 241k | if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) |
1652 | 52.5k | MJTI->ReplaceMBBInJumpTables(MBB, CurTBB); |
1653 | 241k | if (DidChange241k ) { |
1654 | 241k | ++NumBranchOpts; |
1655 | 241k | MadeChange = true; |
1656 | 241k | if (!HasBranchToSelf241k ) return MadeChange241k ; |
1657 | 2 | } |
1658 | 241k | } |
1659 | 241k | } |
1660 | 2 | |
1661 | 2 | // Add the branch back if the block is more than just an uncond branch. |
1662 | 2 | TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl); |
1663 | 2 | } |
1664 | 9.09M | } |
1665 | 9.93M | |
1666 | 9.93M | // If the prior block doesn't fall through into this block, and if this |
1667 | 9.93M | // block doesn't fall through into some other block, see if we can find a |
1668 | 9.93M | // place to move this block where a fall-through will happen. |
1669 | 9.68M | if (9.68M !PrevBB.canFallThrough()9.68M ) { |
1670 | 2.37M | // Now we know that there was no fall-through into this block, check to |
1671 | 2.37M | // see if it has a fall-through into its successor. |
1672 | 2.37M | bool CurFallsThru = MBB->canFallThrough(); |
1673 | 2.37M | |
1674 | 2.37M | if (!MBB->isEHPad()2.37M ) { |
1675 | 2.32M | // Check all the predecessors of this block. If one of them has no fall |
1676 | 2.32M | // throughs, move this block right after it. |
1677 | 2.81M | for (MachineBasicBlock *PredBB : MBB->predecessors()) { |
1678 | 2.81M | // Analyze the branch at the end of the pred. |
1679 | 2.81M | MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; |
1680 | 2.81M | SmallVector<MachineOperand, 4> PredCond; |
1681 | 2.81M | if (PredBB != MBB && 2.81M !PredBB->canFallThrough()2.80M && |
1682 | 458k | !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && |
1683 | 353k | (!CurFallsThru || 353k !CurTBB235k || !CurFBB171k ) && |
1684 | 2.81M | (!CurFallsThru || 333k MBB->getNumber() >= PredBB->getNumber()215k )) { |
1685 | 276k | // If the current block doesn't fall through, just move it. |
1686 | 276k | // If the current block can fall through and does not end with a |
1687 | 276k | // conditional branch, we need to append an unconditional jump to |
1688 | 276k | // the (current) next block. To avoid a possible compile-time |
1689 | 276k | // infinite loop, move blocks only backward in this case. |
1690 | 276k | // Also, if there are already 2 branches here, we cannot add a third; |
1691 | 276k | // this means we have the case |
1692 | 276k | // Bcc next |
1693 | 276k | // B elsewhere |
1694 | 276k | // next: |
1695 | 276k | if (CurFallsThru276k ) { |
1696 | 158k | MachineBasicBlock *NextBB = &*std::next(MBB->getIterator()); |
1697 | 158k | CurCond.clear(); |
1698 | 158k | TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); |
1699 | 158k | } |
1700 | 276k | MBB->moveAfter(PredBB); |
1701 | 276k | MadeChange = true; |
1702 | 276k | goto ReoptimizeBlock; |
1703 | 276k | } |
1704 | 2.09M | } |
1705 | 2.32M | } |
1706 | 2.09M | |
1707 | 2.09M | if (2.09M !CurFallsThru2.09M ) { |
1708 | 595k | // Check all successors to see if we can move this block before it. |
1709 | 263k | for (MachineBasicBlock *SuccBB : MBB->successors()) { |
1710 | 263k | // Analyze the branch at the end of the block before the succ. |
1711 | 263k | MachineFunction::iterator SuccPrev = --SuccBB->getIterator(); |
1712 | 263k | |
1713 | 263k | // If this block doesn't already fall-through to that successor, and if |
1714 | 263k | // the succ doesn't already have a block that can fall through into it, |
1715 | 263k | // and if the successor isn't an EH destination, we can arrange for the |
1716 | 263k | // fallthrough to happen. |
1717 | 263k | if (SuccBB != MBB && 263k &*SuccPrev != MBB262k && |
1718 | 263k | !SuccPrev->canFallThrough()260k && !CurUnAnalyzable41.5k && |
1719 | 263k | !SuccBB->isEHPad()28.7k ) { |
1720 | 27.6k | MBB->moveBefore(SuccBB); |
1721 | 27.6k | MadeChange = true; |
1722 | 27.6k | goto ReoptimizeBlock; |
1723 | 27.6k | } |
1724 | 568k | } |
1725 | 568k | |
1726 | 568k | // Okay, there is no really great place to put this block. If, however, |
1727 | 568k | // the block before this one would be a fall-through if this block were |
1728 | 568k | // removed, move this block to the end of the function. There is no real |
1729 | 568k | // advantage in "falling through" to an EH block, so we don't want to |
1730 | 568k | // perform this transformation for that case. |
1731 | 568k | // |
1732 | 568k | // Also, Windows EH introduced the possibility of an arbitrary number of |
1733 | 568k | // successors to a given block. The analyzeBranch call does not consider |
1734 | 568k | // exception handling and so we can get in a state where a block |
1735 | 568k | // containing a call is followed by multiple EH blocks that would be |
1736 | 568k | // rotated infinitely at the end of the function if the transformation |
1737 | 568k | // below were performed for EH "FallThrough" blocks. Therefore, even if |
1738 | 568k | // that appears not to be happening anymore, we should assume that it is |
1739 | 568k | // possible and not remove the "!FallThrough()->isEHPad" condition below. |
1740 | 568k | MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; |
1741 | 568k | SmallVector<MachineOperand, 4> PrevCond; |
1742 | 568k | if (FallThrough != MF.end() && |
1743 | 461k | !FallThrough->isEHPad() && |
1744 | 443k | !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && |
1745 | 568k | PrevBB.isSuccessor(&*FallThrough)170k ) { |
1746 | 7.58k | MBB->moveAfter(&MF.back()); |
1747 | 7.58k | MadeChange = true; |
1748 | 7.58k | return MadeChange; |
1749 | 7.58k | } |
1750 | 9.37M | } |
1751 | 2.37M | } |
1752 | 9.37M | |
1753 | 9.37M | return MadeChange; |
1754 | 9.37M | } |
1755 | | |
1756 | | //===----------------------------------------------------------------------===// |
1757 | | // Hoist Common Code |
1758 | | //===----------------------------------------------------------------------===// |
1759 | | |
1760 | 798k | bool BranchFolder::HoistCommonCode(MachineFunction &MF) { |
1761 | 798k | bool MadeChange = false; |
1762 | 10.1M | for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E10.1M ; ) { |
1763 | 9.33M | MachineBasicBlock *MBB = &*I++; |
1764 | 9.33M | MadeChange |= HoistCommonCodeInSuccs(MBB); |
1765 | 9.33M | } |
1766 | 798k | |
1767 | 798k | return MadeChange; |
1768 | 798k | } |
1769 | | |
1770 | | /// findFalseBlock - BB has a fallthrough. Find its 'false' successor given |
1771 | | /// its 'true' successor. |
1772 | | static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, |
1773 | 4.57M | MachineBasicBlock *TrueBB) { |
1774 | 4.57M | for (MachineBasicBlock *SuccBB : BB->successors()) |
1775 | 6.51M | if (6.51M SuccBB != TrueBB6.51M ) |
1776 | 4.57M | return SuccBB; |
1777 | 126 | return nullptr; |
1778 | 126 | } |
1779 | | |
1780 | | template <class Container> |
1781 | | static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, |
1782 | 3.28M | Container &Set) { |
1783 | 3.28M | if (TargetRegisterInfo::isPhysicalRegister(Reg)3.28M ) { |
1784 | 17.9M | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid()17.9M ; ++AI14.6M ) |
1785 | 14.6M | Set.insert(*AI); |
1786 | 3.28M | } else { |
1787 | 85 | Set.insert(Reg); |
1788 | 85 | } |
1789 | 3.28M | } |
1790 | | |
1791 | | /// findHoistingInsertPosAndDeps - Find the location to move common instructions |
1792 | | /// in successors to. The location is usually just before the terminator, |
1793 | | /// however if the terminator is a conditional branch and its previous |
1794 | | /// instruction is the flag setting instruction, the previous instruction is |
1795 | | /// the preferred location. This function also gathers uses and defs of the |
1796 | | /// instructions from the insertion point to the end of the block. The data is |
1797 | | /// used by HoistCommonCodeInSuccs to ensure safety. |
1798 | | static |
1799 | | MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, |
1800 | | const TargetInstrInfo *TII, |
1801 | | const TargetRegisterInfo *TRI, |
1802 | | SmallSet<unsigned,4> &Uses, |
1803 | 1.49M | SmallSet<unsigned,4> &Defs) { |
1804 | 1.49M | MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); |
1805 | 1.49M | if (!TII->isUnpredicatedTerminator(*Loc)) |
1806 | 0 | return MBB->end(); |
1807 | 1.49M | |
1808 | 1.49M | for (const MachineOperand &MO : Loc->operands()) 1.49M { |
1809 | 3.52M | if (!MO.isReg()) |
1810 | 2.02M | continue; |
1811 | 1.49M | unsigned Reg = MO.getReg(); |
1812 | 1.49M | if (!Reg) |
1813 | 0 | continue; |
1814 | 1.49M | if (1.49M MO.isUse()1.49M ) { |
1815 | 1.49M | addRegAndItsAliases(Reg, TRI, Uses); |
1816 | 1.49M | } else { |
1817 | 424 | if (!MO.isDead()) |
1818 | 424 | // Don't try to hoist code in the rare case the terminator defines a |
1819 | 424 | // register that is later used. |
1820 | 312 | return MBB->end(); |
1821 | 112 | |
1822 | 112 | // If the terminator defines a register, make sure we don't hoist |
1823 | 112 | // the instruction whose def might be clobbered by the terminator. |
1824 | 112 | addRegAndItsAliases(Reg, TRI, Defs); |
1825 | 112 | } |
1826 | 3.52M | } |
1827 | 1.49M | |
1828 | 1.49M | if (1.49M Uses.empty()1.49M ) |
1829 | 0 | return Loc; |
1830 | 1.49M | if (1.49M Loc == MBB->begin()1.49M ) |
1831 | 280k | return MBB->end(); |
1832 | 1.21M | |
1833 | 1.21M | // The terminator is probably a conditional branch, try not to separate the |
1834 | 1.21M | // branch from condition setting instruction. |
1835 | 1.21M | MachineBasicBlock::iterator PI = |
1836 | 1.21M | skipDebugInstructionsBackward(std::prev(Loc), MBB->begin()); |
1837 | 1.21M | |
1838 | 1.21M | bool IsDef = false; |
1839 | 3.84M | for (const MachineOperand &MO : PI->operands()) { |
1840 | 3.84M | // If PI has a regmask operand, it is probably a call. Separate away. |
1841 | 3.84M | if (MO.isRegMask()) |
1842 | 107k | return Loc; |
1843 | 3.74M | if (3.74M !MO.isReg() || 3.74M MO.isUse()2.64M ) |
1844 | 2.49M | continue; |
1845 | 1.24M | unsigned Reg = MO.getReg(); |
1846 | 1.24M | if (!Reg) |
1847 | 0 | continue; |
1848 | 1.24M | if (1.24M Uses.count(Reg)1.24M ) { |
1849 | 750k | IsDef = true; |
1850 | 750k | break; |
1851 | 750k | } |
1852 | 1.10M | } |
1853 | 1.10M | if (1.10M !IsDef1.10M ) |
1854 | 1.10M | // The condition setting instruction is not just before the conditional |
1855 | 1.10M | // branch. |
1856 | 357k | return Loc; |
1857 | 750k | |
1858 | 750k | // Be conservative, don't insert instruction above something that may have |
1859 | 750k | // side-effects. And since it's potentially bad to separate flag setting |
1860 | 750k | // instruction from the conditional branch, just abort the optimization |
1861 | 750k | // completely. |
1862 | 750k | // Also avoid moving code above predicated instruction since it's hard to |
1863 | 750k | // reason about register liveness with predicated instruction. |
1864 | 750k | bool DontMoveAcrossStore = true; |
1865 | 750k | if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || 750k TII->isPredicated(*PI)549k ) |
1866 | 200k | return MBB->end(); |
1867 | 549k | |
1868 | 549k | |
1869 | 549k | // Find out what registers are live. Note this routine is ignoring other live |
1870 | 549k | // registers which are only used by instructions in successor blocks. |
1871 | 549k | for (const MachineOperand &MO : PI->operands()) 549k { |
1872 | 2.41M | if (!MO.isReg()) |
1873 | 660k | continue; |
1874 | 1.75M | unsigned Reg = MO.getReg(); |
1875 | 1.75M | if (!Reg) |
1876 | 17.8k | continue; |
1877 | 1.73M | if (1.73M MO.isUse()1.73M ) { |
1878 | 754k | addRegAndItsAliases(Reg, TRI, Uses); |
1879 | 1.73M | } else { |
1880 | 982k | if (Uses.erase(Reg)982k ) { |
1881 | 549k | if (TargetRegisterInfo::isPhysicalRegister(Reg)549k ) { |
1882 | 601k | for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid()601k ; ++SubRegs51.8k ) |
1883 | 51.8k | Uses.erase(*SubRegs); // Use sub-registers to be conservative |
1884 | 549k | } |
1885 | 549k | } |
1886 | 982k | addRegAndItsAliases(Reg, TRI, Defs); |
1887 | 982k | } |
1888 | 2.41M | } |
1889 | 1.49M | |
1890 | 1.49M | return PI; |
1891 | 1.49M | } |
1892 | | |
1893 | 9.33M | bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { |
1894 | 9.33M | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; |
1895 | 9.33M | SmallVector<MachineOperand, 4> Cond; |
1896 | 9.33M | if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || 9.33M !TBB8.21M || Cond.empty()6.09M ) |
1897 | 4.60M | return false; |
1898 | 4.73M | |
1899 | 4.73M | if (4.73M !FBB4.73M ) FBB = findFalseBlock(MBB, TBB)4.57M ; |
1900 | 4.73M | if (!FBB) |
1901 | 4.73M | // Malformed bcc? True and false blocks are the same? |
1902 | 126 | return false; |
1903 | 4.73M | |
1904 | 4.73M | // Restrict the optimization to cases where MBB is the only predecessor, |
1905 | 4.73M | // it is an obvious win. |
1906 | 4.73M | if (4.73M TBB->pred_size() > 1 || 4.73M FBB->pred_size() > 11.60M ) |
1907 | 3.23M | return false; |
1908 | 1.49M | |
1909 | 1.49M | // Find a suitable position to hoist the common instructions to. Also figure |
1910 | 1.49M | // out which registers are used or defined by instructions from the insertion |
1911 | 1.49M | // point to the end of the block. |
1912 | 1.49M | SmallSet<unsigned, 4> Uses, Defs; |
1913 | 1.49M | MachineBasicBlock::iterator Loc = |
1914 | 1.49M | findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs); |
1915 | 1.49M | if (Loc == MBB->end()) |
1916 | 481k | return false; |
1917 | 1.01M | |
1918 | 1.01M | bool HasDups = false; |
1919 | 1.01M | SmallVector<unsigned, 4> LocalDefs, LocalKills; |
1920 | 1.01M | SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet; |
1921 | 1.01M | MachineBasicBlock::iterator TIB = TBB->begin(); |
1922 | 1.01M | MachineBasicBlock::iterator FIB = FBB->begin(); |
1923 | 1.01M | MachineBasicBlock::iterator TIE = TBB->end(); |
1924 | 1.01M | MachineBasicBlock::iterator FIE = FBB->end(); |
1925 | 1.03M | while (TIB != TIE && 1.03M FIB != FIE1.02M ) { |
1926 | 1.02M | // Skip dbg_value instructions. These do not count. |
1927 | 1.02M | TIB = skipDebugInstructionsForward(TIB, TIE); |
1928 | 1.02M | FIB = skipDebugInstructionsForward(FIB, FIE); |
1929 | 1.02M | if (TIB == TIE || 1.02M FIB == FIE1.02M ) |
1930 | 0 | break; |
1931 | 1.02M | |
1932 | 1.02M | if (1.02M !TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead)1.02M ) |
1933 | 958k | break; |
1934 | 69.1k | |
1935 | 69.1k | if (69.1k TII->isPredicated(*TIB)69.1k ) |
1936 | 69.1k | // Hard to reason about register liveness with predicated instruction. |
1937 | 0 | break; |
1938 | 69.1k | |
1939 | 69.1k | bool IsSafe = true; |
1940 | 184k | for (MachineOperand &MO : TIB->operands()) { |
1941 | 184k | // Don't attempt to hoist instructions with register masks. |
1942 | 184k | if (MO.isRegMask()184k ) { |
1943 | 120 | IsSafe = false; |
1944 | 120 | break; |
1945 | 120 | } |
1946 | 183k | if (183k !MO.isReg()183k ) |
1947 | 50.2k | continue; |
1948 | 133k | unsigned Reg = MO.getReg(); |
1949 | 133k | if (!Reg) |
1950 | 1.24k | continue; |
1951 | 132k | if (132k MO.isDef()132k ) { |
1952 | 74.9k | if (Uses.count(Reg)74.9k ) { |
1953 | 7.16k | // Avoid clobbering a register that's used by the instruction at |
1954 | 7.16k | // the point of insertion. |
1955 | 7.16k | IsSafe = false; |
1956 | 7.16k | break; |
1957 | 7.16k | } |
1958 | 67.7k | |
1959 | 67.7k | if (67.7k Defs.count(Reg) && 67.7k !MO.isDead()1.64k ) { |
1960 | 1.20k | // Don't hoist the instruction if the def would be clobber by the |
1961 | 1.20k | // instruction at the point insertion. FIXME: This is overly |
1962 | 1.20k | // conservative. It should be possible to hoist the instructions |
1963 | 1.20k | // in BB2 in the following example: |
1964 | 1.20k | // BB1: |
1965 | 1.20k | // r1, eflag = op1 r2, r3 |
1966 | 1.20k | // brcc eflag |
1967 | 1.20k | // |
1968 | 1.20k | // BB2: |
1969 | 1.20k | // r1 = op2, ... |
1970 | 1.20k | // = op3, r1<kill> |
1971 | 1.20k | IsSafe = false; |
1972 | 1.20k | break; |
1973 | 1.20k | } |
1974 | 57.4k | } else if (57.4k !ActiveDefsSet.count(Reg)57.4k ) { |
1975 | 57.2k | if (Defs.count(Reg)57.2k ) { |
1976 | 2.75k | // Use is defined by the instruction at the point of insertion. |
1977 | 2.75k | IsSafe = false; |
1978 | 2.75k | break; |
1979 | 2.75k | } |
1980 | 54.5k | |
1981 | 54.5k | if (54.5k MO.isKill() && 54.5k Uses.count(Reg)2.01k ) |
1982 | 54.5k | // Kills a register that's read by the instruction at the point of |
1983 | 54.5k | // insertion. Remove the kill marker. |
1984 | 190 | MO.setIsKill(false); |
1985 | 57.4k | } |
1986 | 184k | } |
1987 | 69.1k | if (!IsSafe) |
1988 | 11.2k | break; |
1989 | 57.8k | |
1990 | 57.8k | bool DontMoveAcrossStore = true; |
1991 | 57.8k | if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) |
1992 | 38.5k | break; |
1993 | 19.3k | |
1994 | 19.3k | // Remove kills from ActiveDefsSet, these registers had short live ranges. |
1995 | 19.3k | for (const MachineOperand &MO : TIB->operands()) 19.3k { |
1996 | 48.9k | if (!MO.isReg() || 48.9k !MO.isUse()39.0k || !MO.isKill()13.6k ) |
1997 | 48.3k | continue; |
1998 | 637 | unsigned Reg = MO.getReg(); |
1999 | 637 | if (!Reg) |
2000 | 0 | continue; |
2001 | 637 | if (637 !AllDefsSet.count(Reg)637 ) { |
2002 | 606 | LocalKills.push_back(Reg); |
2003 | 606 | continue; |
2004 | 606 | } |
2005 | 31 | if (31 TargetRegisterInfo::isPhysicalRegister(Reg)31 ) { |
2006 | 207 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid()207 ; ++AI176 ) |
2007 | 176 | ActiveDefsSet.erase(*AI); |
2008 | 0 | } else { |
2009 | 0 | ActiveDefsSet.erase(Reg); |
2010 | 0 | } |
2011 | 48.9k | } |
2012 | 19.3k | |
2013 | 19.3k | // Track local defs so we can update liveins. |
2014 | 48.9k | for (const MachineOperand &MO : TIB->operands()) { |
2015 | 48.9k | if (!MO.isReg() || 48.9k !MO.isDef()39.0k || MO.isDead()25.4k ) |
2016 | 23.8k | continue; |
2017 | 25.1k | unsigned Reg = MO.getReg(); |
2018 | 25.1k | if (!Reg || 25.1k TargetRegisterInfo::isVirtualRegister(Reg)25.1k ) |
2019 | 0 | continue; |
2020 | 25.1k | LocalDefs.push_back(Reg); |
2021 | 25.1k | addRegAndItsAliases(Reg, TRI, ActiveDefsSet); |
2022 | 25.1k | addRegAndItsAliases(Reg, TRI, AllDefsSet); |
2023 | 25.1k | } |
2024 | 1.02M | |
2025 | 1.02M | HasDups = true; |
2026 | 1.02M | ++TIB; |
2027 | 1.02M | ++FIB; |
2028 | 1.02M | } |
2029 | 1.01M | |
2030 | 1.01M | if (!HasDups) |
2031 | 995k | return false; |
2032 | 18.8k | |
2033 | 18.8k | MBB->splice(Loc, TBB, TBB->begin(), TIB); |
2034 | 18.8k | FBB->erase(FBB->begin(), FIB); |
2035 | 18.8k | |
2036 | 18.8k | // Update livein's. |
2037 | 18.8k | bool ChangedLiveIns = false; |
2038 | 44.0k | for (unsigned i = 0, e = LocalDefs.size(); i != e44.0k ; ++i25.1k ) { |
2039 | 25.1k | unsigned Def = LocalDefs[i]; |
2040 | 25.1k | if (ActiveDefsSet.count(Def)25.1k ) { |
2041 | 25.0k | TBB->addLiveIn(Def); |
2042 | 25.0k | FBB->addLiveIn(Def); |
2043 | 25.0k | ChangedLiveIns = true; |
2044 | 25.0k | } |
2045 | 25.1k | } |
2046 | 606 | for (unsigned K : LocalKills) { |
2047 | 606 | TBB->removeLiveIn(K); |
2048 | 606 | FBB->removeLiveIn(K); |
2049 | 606 | ChangedLiveIns = true; |
2050 | 606 | } |
2051 | 18.8k | |
2052 | 18.8k | if (ChangedLiveIns18.8k ) { |
2053 | 18.8k | TBB->sortUniqueLiveIns(); |
2054 | 18.8k | FBB->sortUniqueLiveIns(); |
2055 | 18.8k | } |
2056 | 9.33M | |
2057 | 9.33M | ++NumHoist; |
2058 | 9.33M | return true; |
2059 | 9.33M | } |