/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/CodeExtractor.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- CodeExtractor.cpp - Pull code region into a new function -----------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the interface to tear out a code region, such as an |
11 | | // individual loop or a parallel section, into a new function, replacing it with |
12 | | // a call to the new function. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/Transforms/Utils/CodeExtractor.h" |
17 | | #include "llvm/ADT/STLExtras.h" |
18 | | #include "llvm/ADT/SetVector.h" |
19 | | #include "llvm/ADT/StringExtras.h" |
20 | | #include "llvm/Analysis/BlockFrequencyInfo.h" |
21 | | #include "llvm/Analysis/BlockFrequencyInfoImpl.h" |
22 | | #include "llvm/Analysis/BranchProbabilityInfo.h" |
23 | | #include "llvm/Analysis/LoopInfo.h" |
24 | | #include "llvm/Analysis/RegionInfo.h" |
25 | | #include "llvm/Analysis/RegionIterator.h" |
26 | | #include "llvm/IR/Constants.h" |
27 | | #include "llvm/IR/DerivedTypes.h" |
28 | | #include "llvm/IR/Dominators.h" |
29 | | #include "llvm/IR/Instructions.h" |
30 | | #include "llvm/IR/IntrinsicInst.h" |
31 | | #include "llvm/IR/Intrinsics.h" |
32 | | #include "llvm/IR/LLVMContext.h" |
33 | | #include "llvm/IR/MDBuilder.h" |
34 | | #include "llvm/IR/Module.h" |
35 | | #include "llvm/IR/Verifier.h" |
36 | | #include "llvm/Pass.h" |
37 | | #include "llvm/Support/BlockFrequency.h" |
38 | | #include "llvm/Support/CommandLine.h" |
39 | | #include "llvm/Support/Debug.h" |
40 | | #include "llvm/Support/ErrorHandling.h" |
41 | | #include "llvm/Support/raw_ostream.h" |
42 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
43 | | #include <algorithm> |
44 | | #include <set> |
45 | | using namespace llvm; |
46 | | |
47 | | #define DEBUG_TYPE "code-extractor" |
48 | | |
49 | | // Provide a command-line option to aggregate function arguments into a struct |
50 | | // for functions produced by the code extractor. This is useful when converting |
51 | | // extracted functions to pthread-based code, as only one argument (void*) can |
52 | | // be passed in to pthread_create(). |
53 | | static cl::opt<bool> |
54 | | AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, |
55 | | cl::desc("Aggregate arguments to code-extracted functions")); |
56 | | |
57 | | /// \brief Test whether a block is valid for extraction. |
58 | 154 | bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { |
59 | 154 | // Landing pads must be in the function where they were inserted for cleanup. |
60 | 154 | if (BB.isEHPad()) |
61 | 1 | return false; |
62 | 153 | // taking the address of a basic block moved to another function is illegal |
63 | 153 | if (153 BB.hasAddressTaken()153 ) |
64 | 2 | return false; |
65 | 151 | |
66 | 151 | // don't hoist code that uses another basicblock address, as it's likely to |
67 | 151 | // lead to unexpected behavior, like cross-function jumps |
68 | 151 | SmallPtrSet<User const *, 16> Visited; |
69 | 151 | SmallVector<User const *, 16> ToVisit; |
70 | 151 | |
71 | 151 | for (Instruction const &Inst : BB) |
72 | 619 | ToVisit.push_back(&Inst); |
73 | 151 | |
74 | 1.42k | while (!ToVisit.empty()1.42k ) { |
75 | 1.26k | User const *Curr = ToVisit.pop_back_val(); |
76 | 1.26k | if (!Visited.insert(Curr).second) |
77 | 381 | continue; |
78 | 888 | if (888 isa<BlockAddress const>(Curr)888 ) |
79 | 0 | return false; // even a reference to self is likely to be not compatible |
80 | 888 | |
81 | 888 | if (888 isa<Instruction>(Curr) && 888 cast<Instruction>(Curr)->getParent() != &BB666 ) |
82 | 47 | continue; |
83 | 841 | |
84 | 841 | for (auto const &U : Curr->operands()) 841 { |
85 | 901 | if (auto *UU = dyn_cast<User>(U)) |
86 | 650 | ToVisit.push_back(UU); |
87 | 901 | } |
88 | 1.26k | } |
89 | 151 | |
90 | 151 | // Don't hoist code containing allocas, invokes, or vastarts. |
91 | 769 | for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); 151 I != E769 ; ++I618 ) { |
92 | 619 | if (isa<AllocaInst>(I) || 619 isa<InvokeInst>(I)619 ) |
93 | 1 | return false; |
94 | 618 | if (const CallInst *618 CI618 = dyn_cast<CallInst>(I)) |
95 | 377 | if (const Function *377 F377 = CI->getCalledFunction()) |
96 | 377 | if (377 F->getIntrinsicID() == Intrinsic::vastart377 ) |
97 | 0 | return false; |
98 | 619 | } |
99 | 151 | |
100 | 150 | return true; |
101 | 154 | } |
102 | | |
103 | | /// \brief Build a set of blocks to extract if the input blocks are viable. |
104 | | static SetVector<BasicBlock *> |
105 | 88 | buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) { |
106 | 88 | assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); |
107 | 88 | SetVector<BasicBlock *> Result; |
108 | 88 | |
109 | 88 | // Loop over the blocks, adding them to our set-vector, and aborting with an |
110 | 88 | // empty set if we encounter invalid blocks. |
111 | 155 | for (BasicBlock *BB : BBs) { |
112 | 155 | |
113 | 155 | // If this block is dead, don't process it. |
114 | 155 | if (DT && 155 !DT->isReachableFromEntry(BB)146 ) |
115 | 1 | continue; |
116 | 154 | |
117 | 154 | if (154 !Result.insert(BB)154 ) |
118 | 0 | llvm_unreachable("Repeated basic blocks in extraction input"); |
119 | 154 | if (154 !CodeExtractor::isBlockValidForExtraction(*BB)154 ) { |
120 | 4 | Result.clear(); |
121 | 4 | return Result; |
122 | 4 | } |
123 | 84 | } |
124 | 84 | |
125 | | #ifndef NDEBUG |
126 | | for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), |
127 | | E = Result.end(); |
128 | | I != E; ++I) |
129 | | for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); |
130 | | PI != PE; ++PI) |
131 | | assert(Result.count(*PI) && |
132 | | "No blocks in this region may have entries from outside the region" |
133 | | " except for the first block!"); |
134 | | #endif |
135 | | |
136 | 84 | return Result; |
137 | 84 | } |
138 | | |
139 | | CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, |
140 | | bool AggregateArgs, BlockFrequencyInfo *BFI, |
141 | | BranchProbabilityInfo *BPI) |
142 | | : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), |
143 | 79 | BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} |
144 | | |
145 | | CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, |
146 | | BlockFrequencyInfo *BFI, |
147 | | BranchProbabilityInfo *BPI) |
148 | | : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), |
149 | | BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), |
150 | 9 | NumExitBlocks(~0U) {} |
151 | | |
152 | | /// definedInRegion - Return true if the specified value is defined in the |
153 | | /// extracted region. |
154 | 233 | static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { |
155 | 233 | if (Instruction *I = dyn_cast<Instruction>(V)) |
156 | 233 | if (233 Blocks.count(I->getParent())233 ) |
157 | 140 | return true; |
158 | 93 | return false; |
159 | 93 | } |
160 | | |
161 | | /// definedInCaller - Return true if the specified value is defined in the |
162 | | /// function being code extracted, but not in the region being extracted. |
163 | | /// These values must be passed in as live-ins to the function. |
164 | 861 | static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { |
165 | 861 | if (isa<Argument>(V)861 ) return true63 ; |
166 | 798 | if (Instruction *798 I798 = dyn_cast<Instruction>(V)) |
167 | 111 | if (111 !Blocks.count(I->getParent())111 ) |
168 | 31 | return true; |
169 | 767 | return false; |
170 | 767 | } |
171 | | |
172 | 84 | static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { |
173 | 84 | BasicBlock *CommonExitBlock = nullptr; |
174 | 140 | auto hasNonCommonExitSucc = [&](BasicBlock *Block) { |
175 | 177 | for (auto *Succ : successors(Block)) { |
176 | 177 | // Internal edges, ok. |
177 | 177 | if (Blocks.count(Succ)) |
178 | 81 | continue; |
179 | 96 | if (96 !CommonExitBlock96 ) { |
180 | 84 | CommonExitBlock = Succ; |
181 | 84 | continue; |
182 | 84 | } |
183 | 12 | if (12 CommonExitBlock == Succ12 ) |
184 | 4 | continue; |
185 | 8 | |
186 | 8 | return true; |
187 | 8 | } |
188 | 132 | return false; |
189 | 132 | }; |
190 | 84 | |
191 | 84 | if (any_of(Blocks, hasNonCommonExitSucc)) |
192 | 8 | return nullptr; |
193 | 76 | |
194 | 76 | return CommonExitBlock; |
195 | 76 | } |
196 | | |
197 | | bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( |
198 | 16 | Instruction *Addr) const { |
199 | 16 | AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); |
200 | 16 | Function *Func = (*Blocks.begin())->getParent(); |
201 | 48 | for (BasicBlock &BB : *Func) { |
202 | 48 | if (Blocks.count(&BB)) |
203 | 12 | continue; |
204 | 36 | for (Instruction &II : BB) 36 { |
205 | 136 | |
206 | 136 | if (isa<DbgInfoIntrinsic>(II)) |
207 | 0 | continue; |
208 | 136 | |
209 | 136 | unsigned Opcode = II.getOpcode(); |
210 | 136 | Value *MemAddr = nullptr; |
211 | 136 | switch (Opcode) { |
212 | 18 | case Instruction::Store: |
213 | 18 | case Instruction::Load: { |
214 | 18 | if (Opcode == Instruction::Store18 ) { |
215 | 0 | StoreInst *SI = cast<StoreInst>(&II); |
216 | 0 | MemAddr = SI->getPointerOperand(); |
217 | 18 | } else { |
218 | 18 | LoadInst *LI = cast<LoadInst>(&II); |
219 | 18 | MemAddr = LI->getPointerOperand(); |
220 | 18 | } |
221 | 18 | // Global variable can not be aliased with locals. |
222 | 18 | if (dyn_cast<Constant>(MemAddr)) |
223 | 14 | break; |
224 | 4 | Value *Base = MemAddr->stripInBoundsConstantOffsets(); |
225 | 4 | if (!dyn_cast<AllocaInst>(Base) || 4 Base == AI0 ) |
226 | 4 | return false; |
227 | 0 | break; |
228 | 0 | } |
229 | 118 | default: { |
230 | 118 | IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); |
231 | 118 | if (IntrInst118 ) { |
232 | 38 | if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || |
233 | 14 | IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) |
234 | 38 | break; |
235 | 0 | return false; |
236 | 0 | } |
237 | 80 | // Treat all the other cases conservatively if it has side effects. |
238 | 80 | if (80 II.mayHaveSideEffects()80 ) |
239 | 2 | return false; |
240 | 10 | } |
241 | 136 | } |
242 | 136 | } |
243 | 48 | } |
244 | 10 | |
245 | 10 | return true; |
246 | 10 | } |
247 | | |
248 | | BasicBlock * |
249 | 8 | CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { |
250 | 8 | BasicBlock *SinglePredFromOutlineRegion = nullptr; |
251 | 8 | assert(!Blocks.count(CommonExitBlock) && |
252 | 8 | "Expect a block outside the region!"); |
253 | 16 | for (auto *Pred : predecessors(CommonExitBlock)) { |
254 | 16 | if (!Blocks.count(Pred)) |
255 | 6 | continue; |
256 | 10 | if (10 !SinglePredFromOutlineRegion10 ) { |
257 | 8 | SinglePredFromOutlineRegion = Pred; |
258 | 10 | } else if (2 SinglePredFromOutlineRegion != Pred2 ) { |
259 | 2 | SinglePredFromOutlineRegion = nullptr; |
260 | 2 | break; |
261 | 2 | } |
262 | 8 | } |
263 | 8 | |
264 | 8 | if (SinglePredFromOutlineRegion) |
265 | 6 | return SinglePredFromOutlineRegion; |
266 | 2 | |
267 | | #ifndef NDEBUG |
268 | | auto getFirstPHI = [](BasicBlock *BB) { |
269 | | BasicBlock::iterator I = BB->begin(); |
270 | | PHINode *FirstPhi = nullptr; |
271 | | while (I != BB->end()) { |
272 | | PHINode *Phi = dyn_cast<PHINode>(I); |
273 | | if (!Phi) |
274 | | break; |
275 | | if (!FirstPhi) { |
276 | | FirstPhi = Phi; |
277 | | break; |
278 | | } |
279 | | } |
280 | | return FirstPhi; |
281 | | }; |
282 | | // If there are any phi nodes, the single pred either exists or has already |
283 | | // be created before code extraction. |
284 | | assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); |
285 | | #endif |
286 | | |
287 | 2 | BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( |
288 | 2 | CommonExitBlock->getFirstNonPHI()->getIterator()); |
289 | 2 | |
290 | 8 | for (auto *Pred : predecessors(CommonExitBlock)) { |
291 | 8 | if (Blocks.count(Pred)) |
292 | 4 | continue; |
293 | 4 | Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); |
294 | 4 | } |
295 | 8 | // Now add the old exit block to the outline region. |
296 | 8 | Blocks.insert(CommonExitBlock); |
297 | 8 | return CommonExitBlock; |
298 | 8 | } |
299 | | |
300 | | void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, |
301 | 84 | BasicBlock *&ExitBlock) const { |
302 | 84 | Function *Func = (*Blocks.begin())->getParent(); |
303 | 84 | ExitBlock = getCommonExitBlock(Blocks); |
304 | 84 | |
305 | 500 | for (BasicBlock &BB : *Func) { |
306 | 500 | if (Blocks.count(&BB)) |
307 | 148 | continue; |
308 | 352 | for (Instruction &II : BB) 352 { |
309 | 548 | auto *AI = dyn_cast<AllocaInst>(&II); |
310 | 548 | if (!AI) |
311 | 524 | continue; |
312 | 24 | |
313 | 24 | // Find the pair of life time markers for address 'Addr' that are either |
314 | 24 | // defined inside the outline region or can legally be shrinkwrapped into |
315 | 24 | // the outline region. If there are not other untracked uses of the |
316 | 24 | // address, return the pair of markers if found; otherwise return a pair |
317 | 24 | // of nullptr. |
318 | 24 | auto GetLifeTimeMarkers = |
319 | 24 | [&](Instruction *Addr, bool &SinkLifeStart, |
320 | 50 | bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> { |
321 | 50 | Instruction *LifeStart = nullptr, *LifeEnd = nullptr; |
322 | 50 | |
323 | 94 | for (User *U : Addr->users()) { |
324 | 94 | IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); |
325 | 94 | if (IntrInst94 ) { |
326 | 40 | if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start40 ) { |
327 | 20 | // Do not handle the case where AI has multiple start markers. |
328 | 20 | if (LifeStart) |
329 | 0 | return std::make_pair<Instruction *>(nullptr, nullptr); |
330 | 20 | LifeStart = IntrInst; |
331 | 20 | } |
332 | 40 | if (40 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end40 ) { |
333 | 20 | if (LifeEnd) |
334 | 0 | return std::make_pair<Instruction *>(nullptr, nullptr); |
335 | 20 | LifeEnd = IntrInst; |
336 | 20 | } |
337 | 40 | continue; |
338 | 54 | } |
339 | 54 | // Find untracked uses of the address, bail. |
340 | 54 | if (54 !definedInRegion(Blocks, U)54 ) |
341 | 24 | return std::make_pair<Instruction *>(nullptr, nullptr); |
342 | 26 | } |
343 | 26 | |
344 | 26 | if (26 !LifeStart || 26 !LifeEnd20 ) |
345 | 6 | return std::make_pair<Instruction *>(nullptr, nullptr); |
346 | 20 | |
347 | 20 | SinkLifeStart = !definedInRegion(Blocks, LifeStart); |
348 | 20 | HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); |
349 | 20 | // Do legality Check. |
350 | 20 | if ((SinkLifeStart || 20 HoistLifeEnd4 ) && |
351 | 16 | !isLegalToShrinkwrapLifetimeMarkers(Addr)) |
352 | 6 | return std::make_pair<Instruction *>(nullptr, nullptr); |
353 | 14 | |
354 | 14 | // Check to see if we have a place to do hoisting, if not, bail. |
355 | 14 | if (14 HoistLifeEnd && 14 !ExitBlock10 ) |
356 | 0 | return std::make_pair<Instruction *>(nullptr, nullptr); |
357 | 14 | |
358 | 14 | return std::make_pair(LifeStart, LifeEnd); |
359 | 14 | }; |
360 | 24 | |
361 | 24 | bool SinkLifeStart = false, HoistLifeEnd = false; |
362 | 24 | auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); |
363 | 24 | |
364 | 24 | if (Markers.first24 ) { |
365 | 2 | if (SinkLifeStart) |
366 | 0 | SinkCands.insert(Markers.first); |
367 | 2 | SinkCands.insert(AI); |
368 | 2 | if (HoistLifeEnd) |
369 | 0 | HoistCands.insert(Markers.second); |
370 | 2 | continue; |
371 | 2 | } |
372 | 22 | |
373 | 22 | // Follow the bitcast. |
374 | 22 | Instruction *MarkerAddr = nullptr; |
375 | 44 | for (User *U : AI->users()) { |
376 | 44 | |
377 | 44 | if (U->stripInBoundsConstantOffsets() == AI44 ) { |
378 | 26 | SinkLifeStart = false; |
379 | 26 | HoistLifeEnd = false; |
380 | 26 | Instruction *Bitcast = cast<Instruction>(U); |
381 | 26 | Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); |
382 | 26 | if (Markers.first26 ) { |
383 | 12 | MarkerAddr = Bitcast; |
384 | 12 | continue; |
385 | 12 | } |
386 | 32 | } |
387 | 32 | |
388 | 32 | // Found unknown use of AI. |
389 | 32 | if (32 !definedInRegion(Blocks, U)32 ) { |
390 | 10 | MarkerAddr = nullptr; |
391 | 10 | break; |
392 | 10 | } |
393 | 22 | } |
394 | 22 | |
395 | 22 | if (MarkerAddr22 ) { |
396 | 12 | if (SinkLifeStart) |
397 | 10 | SinkCands.insert(Markers.first); |
398 | 12 | if (!definedInRegion(Blocks, MarkerAddr)) |
399 | 12 | SinkCands.insert(MarkerAddr); |
400 | 12 | SinkCands.insert(AI); |
401 | 12 | if (HoistLifeEnd) |
402 | 10 | HoistCands.insert(Markers.second); |
403 | 12 | } |
404 | 548 | } |
405 | 500 | } |
406 | 84 | } |
407 | | |
408 | | void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, |
409 | 84 | const ValueSet &SinkCands) const { |
410 | 84 | |
411 | 148 | for (BasicBlock *BB : Blocks) { |
412 | 148 | // If a used value is defined outside the region, it's an input. If an |
413 | 148 | // instruction is used outside the region, it's an output. |
414 | 612 | for (Instruction &II : *BB) { |
415 | 1.49k | for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; |
416 | 885 | ++OI885 ) { |
417 | 885 | Value *V = *OI; |
418 | 885 | if (!SinkCands.count(V) && 885 definedInCaller(Blocks, V)861 ) |
419 | 94 | Inputs.insert(V); |
420 | 885 | } |
421 | 612 | |
422 | 612 | for (User *U : II.users()) |
423 | 95 | if (95 !definedInRegion(Blocks, U)95 ) { |
424 | 15 | Outputs.insert(&II); |
425 | 15 | break; |
426 | 15 | } |
427 | 84 | } |
428 | 148 | } |
429 | 84 | } |
430 | | |
431 | | /// severSplitPHINodes - If a PHI node has multiple inputs from outside of the |
432 | | /// region, we need to split the entry block of the region so that the PHI node |
433 | | /// is easier to deal with. |
434 | 84 | void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { |
435 | 84 | unsigned NumPredsFromRegion = 0; |
436 | 84 | unsigned NumPredsOutsideRegion = 0; |
437 | 84 | |
438 | 84 | if (Header != &Header->getParent()->getEntryBlock()84 ) { |
439 | 81 | PHINode *PN = dyn_cast<PHINode>(Header->begin()); |
440 | 81 | if (!PN81 ) return79 ; // No PHI nodes. |
441 | 2 | |
442 | 2 | // If the header node contains any PHI nodes, check to see if there is more |
443 | 2 | // than one entry from outside the region. If so, we need to sever the |
444 | 2 | // header block into two. |
445 | 4 | for (unsigned i = 0, e = PN->getNumIncomingValues(); 2 i != e4 ; ++i2 ) |
446 | 2 | if (2 Blocks.count(PN->getIncomingBlock(i))2 ) |
447 | 0 | ++NumPredsFromRegion; |
448 | 2 | else |
449 | 2 | ++NumPredsOutsideRegion; |
450 | 2 | |
451 | 2 | // If there is one (or fewer) predecessor from outside the region, we don't |
452 | 2 | // need to do anything special. |
453 | 2 | if (NumPredsOutsideRegion <= 12 ) return2 ; |
454 | 3 | } |
455 | 3 | |
456 | 3 | // Otherwise, we need to split the header block into two pieces: one |
457 | 3 | // containing PHI nodes merging values from outside of the region, and a |
458 | 3 | // second that contains all of the code for the block and merges back any |
459 | 3 | // incoming values from inside of the region. |
460 | 3 | BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); |
461 | 3 | |
462 | 3 | // We only want to code extract the second block now, and it becomes the new |
463 | 3 | // header of the region. |
464 | 3 | BasicBlock *OldPred = Header; |
465 | 3 | Blocks.remove(OldPred); |
466 | 3 | Blocks.insert(NewBB); |
467 | 3 | Header = NewBB; |
468 | 3 | |
469 | 3 | // Okay, now we need to adjust the PHI nodes and any branches from within the |
470 | 3 | // region to go to the new header block instead of the old header block. |
471 | 3 | if (NumPredsFromRegion3 ) { |
472 | 0 | PHINode *PN = cast<PHINode>(OldPred->begin()); |
473 | 0 | // Loop over all of the predecessors of OldPred that are in the region, |
474 | 0 | // changing them to branch to NewBB instead. |
475 | 0 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e0 ; ++i0 ) |
476 | 0 | if (0 Blocks.count(PN->getIncomingBlock(i))0 ) { |
477 | 0 | TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); |
478 | 0 | TI->replaceUsesOfWith(OldPred, NewBB); |
479 | 0 | } |
480 | 0 |
|
481 | 0 | // Okay, everything within the region is now branching to the right block, we |
482 | 0 | // just have to update the PHI nodes now, inserting PHI nodes into NewBB. |
483 | 0 | BasicBlock::iterator AfterPHIs; |
484 | 0 | for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs)0 ; ++AfterPHIs0 ) { |
485 | 0 | PHINode *PN = cast<PHINode>(AfterPHIs); |
486 | 0 | // Create a new PHI node in the new region, which has an incoming value |
487 | 0 | // from OldPred of PN. |
488 | 0 | PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, |
489 | 0 | PN->getName() + ".ce", &NewBB->front()); |
490 | 0 | PN->replaceAllUsesWith(NewPN); |
491 | 0 | NewPN->addIncoming(PN, OldPred); |
492 | 0 |
|
493 | 0 | // Loop over all of the incoming value in PN, moving them to NewPN if they |
494 | 0 | // are from the extracted region. |
495 | 0 | for (unsigned i = 0; i != PN->getNumIncomingValues()0 ; ++i0 ) { |
496 | 0 | if (Blocks.count(PN->getIncomingBlock(i))0 ) { |
497 | 0 | NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); |
498 | 0 | PN->removeIncomingValue(i); |
499 | 0 | --i; |
500 | 0 | } |
501 | 0 | } |
502 | 0 | } |
503 | 0 | } |
504 | 84 | } |
505 | | |
506 | 84 | void CodeExtractor::splitReturnBlocks() { |
507 | 84 | for (BasicBlock *Block : Blocks) |
508 | 148 | if (ReturnInst *148 RI148 = dyn_cast<ReturnInst>(Block->getTerminator())) { |
509 | 7 | BasicBlock *New = |
510 | 7 | Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); |
511 | 7 | if (DT7 ) { |
512 | 3 | // Old dominates New. New node dominates all other nodes dominated |
513 | 3 | // by Old. |
514 | 3 | DomTreeNode *OldNode = DT->getNode(Block); |
515 | 3 | SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), |
516 | 3 | OldNode->end()); |
517 | 3 | |
518 | 3 | DomTreeNode *NewNode = DT->addNewBlock(New, Block); |
519 | 3 | |
520 | 3 | for (DomTreeNode *I : Children) |
521 | 0 | DT->changeImmediateDominator(I, NewNode); |
522 | 3 | } |
523 | 148 | } |
524 | 84 | } |
525 | | |
526 | | /// constructFunction - make a function based on inputs and outputs, as follows: |
527 | | /// f(in0, ..., inN, out0, ..., outN) |
528 | | /// |
529 | | Function *CodeExtractor::constructFunction(const ValueSet &inputs, |
530 | | const ValueSet &outputs, |
531 | | BasicBlock *header, |
532 | | BasicBlock *newRootNode, |
533 | | BasicBlock *newHeader, |
534 | | Function *oldFunction, |
535 | 84 | Module *M) { |
536 | 84 | DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); |
537 | 84 | DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); |
538 | 84 | |
539 | 84 | // This function returns unsigned, outputs will go back by reference. |
540 | 84 | switch (NumExitBlocks) { |
541 | 76 | case 0: |
542 | 76 | case 1: RetTy = Type::getVoidTy(header->getContext()); break; |
543 | 7 | case 2: RetTy = Type::getInt1Ty(header->getContext()); break; |
544 | 1 | default: RetTy = Type::getInt16Ty(header->getContext()); break; |
545 | 84 | } |
546 | 84 | |
547 | 84 | std::vector<Type*> paramTy; |
548 | 84 | |
549 | 84 | // Add the types of the input values to the function's argument list |
550 | 52 | for (Value *value : inputs) { |
551 | 52 | DEBUG(dbgs() << "value used in func: " << *value << "\n"); |
552 | 52 | paramTy.push_back(value->getType()); |
553 | 52 | } |
554 | 84 | |
555 | 84 | // Add the types of the output values to the function's argument list. |
556 | 15 | for (Value *output : outputs) { |
557 | 15 | DEBUG(dbgs() << "instr used in func: " << *output << "\n"); |
558 | 15 | if (AggregateArgs) |
559 | 0 | paramTy.push_back(output->getType()); |
560 | 15 | else |
561 | 15 | paramTy.push_back(PointerType::getUnqual(output->getType())); |
562 | 15 | } |
563 | 84 | |
564 | 84 | DEBUG({ |
565 | 84 | dbgs() << "Function type: " << *RetTy << " f("; |
566 | 84 | for (Type *i : paramTy) |
567 | 84 | dbgs() << *i << ", "; |
568 | 84 | dbgs() << ")\n"; |
569 | 84 | }); |
570 | 84 | |
571 | 84 | StructType *StructTy; |
572 | 84 | if (AggregateArgs && 84 (inputs.size() + outputs.size() > 0)0 ) { |
573 | 0 | StructTy = StructType::get(M->getContext(), paramTy); |
574 | 0 | paramTy.clear(); |
575 | 0 | paramTy.push_back(PointerType::getUnqual(StructTy)); |
576 | 0 | } |
577 | 84 | FunctionType *funcType = |
578 | 84 | FunctionType::get(RetTy, paramTy, false); |
579 | 84 | |
580 | 84 | // Create the new function |
581 | 84 | Function *newFunction = Function::Create(funcType, |
582 | 84 | GlobalValue::InternalLinkage, |
583 | 84 | oldFunction->getName() + "_" + |
584 | 84 | header->getName(), M); |
585 | 84 | // If the old function is no-throw, so is the new one. |
586 | 84 | if (oldFunction->doesNotThrow()) |
587 | 45 | newFunction->setDoesNotThrow(); |
588 | 84 | |
589 | 84 | // Inherit the uwtable attribute if we need to. |
590 | 84 | if (oldFunction->hasUWTable()) |
591 | 18 | newFunction->setHasUWTable(); |
592 | 84 | |
593 | 84 | // Inherit all of the target dependent attributes. |
594 | 84 | // (e.g. If the extracted region contains a call to an x86.sse |
595 | 84 | // instruction we need to make sure that the extracted region has the |
596 | 84 | // "target-features" attribute allowing it to be lowered. |
597 | 84 | // FIXME: This should be changed to check to see if a specific |
598 | 84 | // attribute can not be inherited. |
599 | 84 | AttrBuilder AB(oldFunction->getAttributes().getFnAttributes()); |
600 | 84 | for (const auto &Attr : AB.td_attrs()) |
601 | 4 | newFunction->addFnAttr(Attr.first, Attr.second); |
602 | 84 | |
603 | 84 | newFunction->getBasicBlockList().push_back(newRootNode); |
604 | 84 | |
605 | 84 | // Create an iterator to name all of the arguments we inserted. |
606 | 84 | Function::arg_iterator AI = newFunction->arg_begin(); |
607 | 84 | |
608 | 84 | // Rewrite all users of the inputs in the extracted region to use the |
609 | 84 | // arguments (or appropriate addressing into struct) instead. |
610 | 136 | for (unsigned i = 0, e = inputs.size(); i != e136 ; ++i52 ) { |
611 | 52 | Value *RewriteVal; |
612 | 52 | if (AggregateArgs52 ) { |
613 | 0 | Value *Idx[2]; |
614 | 0 | Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); |
615 | 0 | Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); |
616 | 0 | TerminatorInst *TI = newFunction->begin()->getTerminator(); |
617 | 0 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
618 | 0 | StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); |
619 | 0 | RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); |
620 | 0 | } else |
621 | 52 | RewriteVal = &*AI++; |
622 | 52 | |
623 | 52 | std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end()); |
624 | 52 | for (User *use : Users) |
625 | 132 | if (Instruction *132 inst132 = dyn_cast<Instruction>(use)) |
626 | 132 | if (132 Blocks.count(inst->getParent())132 ) |
627 | 94 | inst->replaceUsesOfWith(inputs[i], RewriteVal); |
628 | 52 | } |
629 | 84 | |
630 | 84 | // Set names for input and output arguments. |
631 | 84 | if (!AggregateArgs84 ) { |
632 | 84 | AI = newFunction->arg_begin(); |
633 | 136 | for (unsigned i = 0, e = inputs.size(); i != e136 ; ++i, ++AI52 ) |
634 | 52 | AI->setName(inputs[i]->getName()); |
635 | 99 | for (unsigned i = 0, e = outputs.size(); i != e99 ; ++i, ++AI15 ) |
636 | 15 | AI->setName(outputs[i]->getName()+".out"); |
637 | 84 | } |
638 | 84 | |
639 | 84 | // Rewrite branches to basic blocks outside of the loop to new dummy blocks |
640 | 84 | // within the new function. This must be done before we lose track of which |
641 | 84 | // blocks were originally in the code region. |
642 | 84 | std::vector<User*> Users(header->user_begin(), header->user_end()); |
643 | 266 | for (unsigned i = 0, e = Users.size(); i != e266 ; ++i182 ) |
644 | 84 | // The BasicBlock which contains the branch is not in the region |
645 | 84 | // modify the branch target to a new block |
646 | 182 | if (TerminatorInst *182 TI182 = dyn_cast<TerminatorInst>(Users[i])) |
647 | 182 | if (182 !Blocks.count(TI->getParent()) && |
648 | 174 | TI->getParent()->getParent() == oldFunction) |
649 | 90 | TI->replaceUsesOfWith(header, newHeader); |
650 | 84 | |
651 | 84 | return newFunction; |
652 | 84 | } |
653 | | |
654 | | /// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI |
655 | | /// that uses the value within the basic block, and return the predecessor |
656 | | /// block associated with that use, or return 0 if none is found. |
657 | 16 | static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { |
658 | 16 | for (Use &U : Used->uses()) { |
659 | 16 | PHINode *P = dyn_cast<PHINode>(U.getUser()); |
660 | 16 | if (P && 16 P->getParent() == BB14 ) |
661 | 14 | return P->getIncomingBlock(U); |
662 | 2 | } |
663 | 2 | |
664 | 2 | return nullptr; |
665 | 2 | } |
666 | | |
667 | | /// emitCallAndSwitchStatement - This method sets up the caller side by adding |
668 | | /// the call instruction, splitting any PHI nodes in the header block as |
669 | | /// necessary. |
670 | | void CodeExtractor:: |
671 | | emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, |
672 | 84 | ValueSet &inputs, ValueSet &outputs) { |
673 | 84 | // Emit a call to the new function, passing in: *pointer to struct (if |
674 | 84 | // aggregating parameters), or plan inputs and allocated memory for outputs |
675 | 84 | std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; |
676 | 84 | |
677 | 84 | Module *M = newFunction->getParent(); |
678 | 84 | LLVMContext &Context = M->getContext(); |
679 | 84 | const DataLayout &DL = M->getDataLayout(); |
680 | 84 | |
681 | 84 | // Add inputs as params, or to be filled into the struct |
682 | 84 | for (Value *input : inputs) |
683 | 52 | if (52 AggregateArgs52 ) |
684 | 0 | StructValues.push_back(input); |
685 | 52 | else |
686 | 52 | params.push_back(input); |
687 | 84 | |
688 | 84 | // Create allocas for the outputs |
689 | 15 | for (Value *output : outputs) { |
690 | 15 | if (AggregateArgs15 ) { |
691 | 0 | StructValues.push_back(output); |
692 | 15 | } else { |
693 | 15 | AllocaInst *alloca = |
694 | 15 | new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), |
695 | 15 | nullptr, output->getName() + ".loc", |
696 | 15 | &codeReplacer->getParent()->front().front()); |
697 | 15 | ReloadOutputs.push_back(alloca); |
698 | 15 | params.push_back(alloca); |
699 | 15 | } |
700 | 15 | } |
701 | 84 | |
702 | 84 | StructType *StructArgTy = nullptr; |
703 | 84 | AllocaInst *Struct = nullptr; |
704 | 84 | if (AggregateArgs && 84 (inputs.size() + outputs.size() > 0)0 ) { |
705 | 0 | std::vector<Type*> ArgTypes; |
706 | 0 | for (ValueSet::iterator v = StructValues.begin(), |
707 | 0 | ve = StructValues.end(); v != ve0 ; ++v0 ) |
708 | 0 | ArgTypes.push_back((*v)->getType()); |
709 | 0 |
|
710 | 0 | // Allocate a struct at the beginning of this function |
711 | 0 | StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); |
712 | 0 | Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, |
713 | 0 | "structArg", |
714 | 0 | &codeReplacer->getParent()->front().front()); |
715 | 0 | params.push_back(Struct); |
716 | 0 |
|
717 | 0 | for (unsigned i = 0, e = inputs.size(); i != e0 ; ++i0 ) { |
718 | 0 | Value *Idx[2]; |
719 | 0 | Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); |
720 | 0 | Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); |
721 | 0 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
722 | 0 | StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); |
723 | 0 | codeReplacer->getInstList().push_back(GEP); |
724 | 0 | StoreInst *SI = new StoreInst(StructValues[i], GEP); |
725 | 0 | codeReplacer->getInstList().push_back(SI); |
726 | 0 | } |
727 | 0 | } |
728 | 84 | |
729 | 84 | // Emit the call to the function |
730 | 84 | CallInst *call = CallInst::Create(newFunction, params, |
731 | 84 | NumExitBlocks > 1 ? "targetBlock"8 : ""76 ); |
732 | 84 | codeReplacer->getInstList().push_back(call); |
733 | 84 | |
734 | 84 | Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); |
735 | 84 | unsigned FirstOut = inputs.size(); |
736 | 84 | if (!AggregateArgs) |
737 | 84 | std::advance(OutputArgBegin, inputs.size()); |
738 | 84 | |
739 | 84 | // Reload the outputs passed in by reference |
740 | 99 | for (unsigned i = 0, e = outputs.size(); i != e99 ; ++i15 ) { |
741 | 15 | Value *Output = nullptr; |
742 | 15 | if (AggregateArgs15 ) { |
743 | 0 | Value *Idx[2]; |
744 | 0 | Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); |
745 | 0 | Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); |
746 | 0 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
747 | 0 | StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); |
748 | 0 | codeReplacer->getInstList().push_back(GEP); |
749 | 0 | Output = GEP; |
750 | 15 | } else { |
751 | 15 | Output = ReloadOutputs[i]; |
752 | 15 | } |
753 | 15 | LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); |
754 | 15 | Reloads.push_back(load); |
755 | 15 | codeReplacer->getInstList().push_back(load); |
756 | 15 | std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end()); |
757 | 30 | for (unsigned u = 0, e = Users.size(); u != e30 ; ++u15 ) { |
758 | 15 | Instruction *inst = cast<Instruction>(Users[u]); |
759 | 15 | if (!Blocks.count(inst->getParent())) |
760 | 15 | inst->replaceUsesOfWith(outputs[i], load); |
761 | 15 | } |
762 | 15 | } |
763 | 84 | |
764 | 84 | // Now we can emit a switch statement using the call as a value. |
765 | 84 | SwitchInst *TheSwitch = |
766 | 84 | SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), |
767 | 84 | codeReplacer, 0, codeReplacer); |
768 | 84 | |
769 | 84 | // Since there may be multiple exits from the original region, make the new |
770 | 84 | // function return an unsigned, switch on that number. This loop iterates |
771 | 84 | // over all of the blocks in the extracted region, updating any terminator |
772 | 84 | // instructions in the to-be-extracted region that branch to blocks that are |
773 | 84 | // not in the region to be extracted. |
774 | 84 | std::map<BasicBlock*, BasicBlock*> ExitBlockMap; |
775 | 84 | |
776 | 84 | unsigned switchVal = 0; |
777 | 150 | for (BasicBlock *Block : Blocks) { |
778 | 150 | TerminatorInst *TI = Block->getTerminator(); |
779 | 341 | for (unsigned i = 0, e = TI->getNumSuccessors(); i != e341 ; ++i191 ) |
780 | 191 | if (191 !Blocks.count(TI->getSuccessor(i))191 ) { |
781 | 95 | BasicBlock *OldTarget = TI->getSuccessor(i); |
782 | 95 | // add a new basic block which returns the appropriate value |
783 | 95 | BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; |
784 | 95 | if (!NewTarget95 ) { |
785 | 93 | // If we don't already have an exit stub for this non-extracted |
786 | 93 | // destination, create one now! |
787 | 93 | NewTarget = BasicBlock::Create(Context, |
788 | 93 | OldTarget->getName() + ".exitStub", |
789 | 93 | newFunction); |
790 | 93 | unsigned SuccNum = switchVal++; |
791 | 93 | |
792 | 93 | Value *brVal = nullptr; |
793 | 93 | switch (NumExitBlocks) { |
794 | 76 | case 0: |
795 | 76 | case 1: break; // No value needed. |
796 | 14 | case 2: // Conditional branch, return a bool |
797 | 14 | brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); |
798 | 14 | break; |
799 | 3 | default: |
800 | 3 | brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); |
801 | 3 | break; |
802 | 93 | } |
803 | 93 | |
804 | 93 | ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); |
805 | 93 | |
806 | 93 | // Update the switch instruction. |
807 | 93 | TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), |
808 | 93 | SuccNum), |
809 | 93 | OldTarget); |
810 | 93 | |
811 | 93 | // Restore values just before we exit |
812 | 93 | Function::arg_iterator OAI = OutputArgBegin; |
813 | 109 | for (unsigned out = 0, e = outputs.size(); out != e109 ; ++out16 ) { |
814 | 16 | // For an invoke, the normal destination is the only one that is |
815 | 16 | // dominated by the result of the invocation |
816 | 16 | BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent(); |
817 | 16 | |
818 | 16 | bool DominatesDef = true; |
819 | 16 | |
820 | 16 | BasicBlock *NormalDest = nullptr; |
821 | 16 | if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out])) |
822 | 0 | NormalDest = Invoke->getNormalDest(); |
823 | 16 | |
824 | 16 | if (NormalDest16 ) { |
825 | 0 | DefBlock = NormalDest; |
826 | 0 |
|
827 | 0 | // Make sure we are looking at the original successor block, not |
828 | 0 | // at a newly inserted exit block, which won't be in the dominator |
829 | 0 | // info. |
830 | 0 | for (const auto &I : ExitBlockMap) |
831 | 0 | if (0 DefBlock == I.second0 ) { |
832 | 0 | DefBlock = I.first; |
833 | 0 | break; |
834 | 0 | } |
835 | 0 |
|
836 | 0 | // In the extract block case, if the block we are extracting ends |
837 | 0 | // with an invoke instruction, make sure that we don't emit a |
838 | 0 | // store of the invoke value for the unwind block. |
839 | 0 | if (!DT && 0 DefBlock != OldTarget0 ) |
840 | 0 | DominatesDef = false; |
841 | 0 | } |
842 | 16 | |
843 | 16 | if (DT16 ) { |
844 | 16 | DominatesDef = DT->dominates(DefBlock, OldTarget); |
845 | 16 | |
846 | 16 | // If the output value is used by a phi in the target block, |
847 | 16 | // then we need to test for dominance of the phi's predecessor |
848 | 16 | // instead. Unfortunately, this a little complicated since we |
849 | 16 | // have already rewritten uses of the value to uses of the reload. |
850 | 16 | BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], |
851 | 16 | OldTarget); |
852 | 16 | if (pred && 16 DT14 && DT->dominates(DefBlock, pred)14 ) |
853 | 14 | DominatesDef = true; |
854 | 16 | } |
855 | 16 | |
856 | 16 | if (DominatesDef16 ) { |
857 | 15 | if (AggregateArgs15 ) { |
858 | 0 | Value *Idx[2]; |
859 | 0 | Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); |
860 | 0 | Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), |
861 | 0 | FirstOut+out); |
862 | 0 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
863 | 0 | StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(), |
864 | 0 | NTRet); |
865 | 0 | new StoreInst(outputs[out], GEP, NTRet); |
866 | 15 | } else { |
867 | 15 | new StoreInst(outputs[out], &*OAI, NTRet); |
868 | 15 | } |
869 | 15 | } |
870 | 16 | // Advance output iterator even if we don't emit a store |
871 | 16 | if (!AggregateArgs16 ) ++OAI16 ; |
872 | 16 | } |
873 | 93 | } |
874 | 95 | |
875 | 95 | // rewrite the original branch instruction with this new target |
876 | 95 | TI->setSuccessor(i, NewTarget); |
877 | 95 | } |
878 | 150 | } |
879 | 84 | |
880 | 84 | // Now that we've done the deed, simplify the switch instruction. |
881 | 84 | Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); |
882 | 84 | switch (NumExitBlocks) { |
883 | 0 | case 0: |
884 | 0 | // There are no successors (the block containing the switch itself), which |
885 | 0 | // means that previously this was the last part of the function, and hence |
886 | 0 | // this should be rewritten as a `ret' |
887 | 0 |
|
888 | 0 | // Check if the function should return a value |
889 | 0 | if (OldFnRetTy->isVoidTy()0 ) { |
890 | 0 | ReturnInst::Create(Context, nullptr, TheSwitch); // Return void |
891 | 0 | } else if (0 OldFnRetTy == TheSwitch->getCondition()->getType()0 ) { |
892 | 0 | // return what we have |
893 | 0 | ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); |
894 | 0 | } else { |
895 | 0 | // Otherwise we must have code extracted an unwind or something, just |
896 | 0 | // return whatever we want. |
897 | 0 | ReturnInst::Create(Context, |
898 | 0 | Constant::getNullValue(OldFnRetTy), TheSwitch); |
899 | 0 | } |
900 | 0 |
|
901 | 0 | TheSwitch->eraseFromParent(); |
902 | 0 | break; |
903 | 76 | case 1: |
904 | 76 | // Only a single destination, change the switch into an unconditional |
905 | 76 | // branch. |
906 | 76 | BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); |
907 | 76 | TheSwitch->eraseFromParent(); |
908 | 76 | break; |
909 | 7 | case 2: |
910 | 7 | BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), |
911 | 7 | call, TheSwitch); |
912 | 7 | TheSwitch->eraseFromParent(); |
913 | 7 | break; |
914 | 1 | default: |
915 | 1 | // Otherwise, make the default destination of the switch instruction be one |
916 | 1 | // of the other successors. |
917 | 1 | TheSwitch->setCondition(call); |
918 | 1 | TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); |
919 | 1 | // Remove redundant case |
920 | 1 | TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); |
921 | 1 | break; |
922 | 84 | } |
923 | 84 | } |
924 | | |
925 | 84 | void CodeExtractor::moveCodeToFunction(Function *newFunction) { |
926 | 84 | Function *oldFunc = (*Blocks.begin())->getParent(); |
927 | 84 | Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); |
928 | 84 | Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); |
929 | 84 | |
930 | 150 | for (BasicBlock *Block : Blocks) { |
931 | 150 | // Delete the basic block from the old function, and the list of blocks |
932 | 150 | oldBlocks.remove(Block); |
933 | 150 | |
934 | 150 | // Insert this basic block into the new function |
935 | 150 | newBlocks.push_back(Block); |
936 | 150 | } |
937 | 84 | } |
938 | | |
939 | | void CodeExtractor::calculateNewCallTerminatorWeights( |
940 | | BasicBlock *CodeReplacer, |
941 | | DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, |
942 | 3 | BranchProbabilityInfo *BPI) { |
943 | 3 | typedef BlockFrequencyInfoImplBase::Distribution Distribution; |
944 | 3 | typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; |
945 | 3 | |
946 | 3 | // Update the branch weights for the exit block. |
947 | 3 | TerminatorInst *TI = CodeReplacer->getTerminator(); |
948 | 3 | SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0); |
949 | 3 | |
950 | 3 | // Block Frequency distribution with dummy node. |
951 | 3 | Distribution BranchDist; |
952 | 3 | |
953 | 3 | // Add each of the frequencies of the successors. |
954 | 9 | for (unsigned i = 0, e = TI->getNumSuccessors(); i < e9 ; ++i6 ) { |
955 | 6 | BlockNode ExitNode(i); |
956 | 6 | uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); |
957 | 6 | if (ExitFreq != 0) |
958 | 6 | BranchDist.addExit(ExitNode, ExitFreq); |
959 | 6 | else |
960 | 0 | BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero()); |
961 | 6 | } |
962 | 3 | |
963 | 3 | // Check for no total weight. |
964 | 3 | if (BranchDist.Total == 0) |
965 | 0 | return; |
966 | 3 | |
967 | 3 | // Normalize the distribution so that they can fit in unsigned. |
968 | 3 | BranchDist.normalize(); |
969 | 3 | |
970 | 3 | // Create normalized branch weights and set the metadata. |
971 | 9 | for (unsigned I = 0, E = BranchDist.Weights.size(); I < E9 ; ++I6 ) { |
972 | 6 | const auto &Weight = BranchDist.Weights[I]; |
973 | 6 | |
974 | 6 | // Get the weight and update the current BFI. |
975 | 6 | BranchWeights[Weight.TargetNode.Index] = Weight.Amount; |
976 | 6 | BranchProbability BP(Weight.Amount, BranchDist.Total); |
977 | 6 | BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP); |
978 | 6 | } |
979 | 3 | TI->setMetadata( |
980 | 3 | LLVMContext::MD_prof, |
981 | 3 | MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); |
982 | 3 | } |
983 | | |
984 | 88 | Function *CodeExtractor::extractCodeRegion() { |
985 | 88 | if (!isEligible()) |
986 | 4 | return nullptr; |
987 | 84 | |
988 | 84 | ValueSet inputs, outputs, SinkingCands, HoistingCands; |
989 | 84 | BasicBlock *CommonExit = nullptr; |
990 | 84 | |
991 | 84 | // Assumption: this is a single-entry code region, and the header is the first |
992 | 84 | // block in the region. |
993 | 84 | BasicBlock *header = *Blocks.begin(); |
994 | 84 | |
995 | 84 | // Calculate the entry frequency of the new function before we change the root |
996 | 84 | // block. |
997 | 84 | BlockFrequency EntryFreq; |
998 | 84 | if (BFI84 ) { |
999 | 70 | assert(BPI && "Both BPI and BFI are required to preserve profile info"); |
1000 | 77 | for (BasicBlock *Pred : predecessors(header)) { |
1001 | 77 | if (Blocks.count(Pred)) |
1002 | 1 | continue; |
1003 | 76 | EntryFreq += |
1004 | 76 | BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); |
1005 | 76 | } |
1006 | 70 | } |
1007 | 84 | |
1008 | 84 | // If we have to split PHI nodes or the entry block, do so now. |
1009 | 84 | severSplitPHINodes(header); |
1010 | 84 | |
1011 | 84 | // If we have any return instructions in the region, split those blocks so |
1012 | 84 | // that the return is not in the region. |
1013 | 84 | splitReturnBlocks(); |
1014 | 84 | |
1015 | 84 | Function *oldFunction = header->getParent(); |
1016 | 84 | |
1017 | 84 | // This takes place of the original loop |
1018 | 84 | BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), |
1019 | 84 | "codeRepl", oldFunction, |
1020 | 84 | header); |
1021 | 84 | |
1022 | 84 | // The new function needs a root node because other nodes can branch to the |
1023 | 84 | // head of the region, but the entry node of a function cannot have preds. |
1024 | 84 | BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), |
1025 | 84 | "newFuncRoot"); |
1026 | 84 | newFuncRoot->getInstList().push_back(BranchInst::Create(header)); |
1027 | 84 | |
1028 | 84 | findAllocas(SinkingCands, HoistingCands, CommonExit); |
1029 | 84 | assert(HoistingCands.empty() || CommonExit); |
1030 | 84 | |
1031 | 84 | // Find inputs to, outputs from the code region. |
1032 | 84 | findInputsOutputs(inputs, outputs, SinkingCands); |
1033 | 84 | |
1034 | 84 | // Now sink all instructions which only have non-phi uses inside the region |
1035 | 84 | for (auto *II : SinkingCands) |
1036 | 36 | cast<Instruction>(II)->moveBefore(*newFuncRoot, |
1037 | 36 | newFuncRoot->getFirstInsertionPt()); |
1038 | 84 | |
1039 | 84 | if (!HoistingCands.empty()84 ) { |
1040 | 8 | auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); |
1041 | 8 | Instruction *TI = HoistToBlock->getTerminator(); |
1042 | 8 | for (auto *II : HoistingCands) |
1043 | 10 | cast<Instruction>(II)->moveBefore(TI); |
1044 | 8 | } |
1045 | 84 | |
1046 | 84 | // Calculate the exit blocks for the extracted region and the total exit |
1047 | 84 | // weights for each of those blocks. |
1048 | 84 | DenseMap<BasicBlock *, BlockFrequency> ExitWeights; |
1049 | 84 | SmallPtrSet<BasicBlock *, 1> ExitBlocks; |
1050 | 150 | for (BasicBlock *Block : Blocks) { |
1051 | 341 | for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; |
1052 | 191 | ++SI191 ) { |
1053 | 191 | if (!Blocks.count(*SI)191 ) { |
1054 | 95 | // Update the branch weight for this successor. |
1055 | 95 | if (BFI95 ) { |
1056 | 73 | BlockFrequency &BF = ExitWeights[*SI]; |
1057 | 73 | BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); |
1058 | 73 | } |
1059 | 95 | ExitBlocks.insert(*SI); |
1060 | 95 | } |
1061 | 191 | } |
1062 | 150 | } |
1063 | 84 | NumExitBlocks = ExitBlocks.size(); |
1064 | 84 | |
1065 | 84 | // Construct new function based on inputs/outputs & add allocas for all defs. |
1066 | 84 | Function *newFunction = constructFunction(inputs, outputs, header, |
1067 | 84 | newFuncRoot, |
1068 | 84 | codeReplacer, oldFunction, |
1069 | 84 | oldFunction->getParent()); |
1070 | 84 | |
1071 | 84 | // Update the entry count of the function. |
1072 | 84 | if (BFI84 ) { |
1073 | 70 | Optional<uint64_t> EntryCount = |
1074 | 70 | BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); |
1075 | 70 | if (EntryCount.hasValue()) |
1076 | 4 | newFunction->setEntryCount(EntryCount.getValue()); |
1077 | 70 | BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); |
1078 | 70 | } |
1079 | 84 | |
1080 | 84 | emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); |
1081 | 84 | |
1082 | 84 | moveCodeToFunction(newFunction); |
1083 | 84 | |
1084 | 84 | // Update the branch weights for the exit block. |
1085 | 84 | if (BFI && 84 NumExitBlocks > 170 ) |
1086 | 3 | calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); |
1087 | 84 | |
1088 | 84 | // Loop over all of the PHI nodes in the header block, and change any |
1089 | 84 | // references to the old incoming edge to be the new incoming edge. |
1090 | 86 | for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I)86 ; ++I2 ) { |
1091 | 2 | PHINode *PN = cast<PHINode>(I); |
1092 | 4 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e4 ; ++i2 ) |
1093 | 2 | if (2 !Blocks.count(PN->getIncomingBlock(i))2 ) |
1094 | 2 | PN->setIncomingBlock(i, newFuncRoot); |
1095 | 2 | } |
1096 | 84 | |
1097 | 84 | // Look at all successors of the codeReplacer block. If any of these blocks |
1098 | 84 | // had PHI nodes in them, we need to update the "from" block to be the code |
1099 | 84 | // replacer, not the original block in the extracted region. |
1100 | 84 | std::vector<BasicBlock*> Succs(succ_begin(codeReplacer), |
1101 | 84 | succ_end(codeReplacer)); |
1102 | 177 | for (unsigned i = 0, e = Succs.size(); i != e177 ; ++i93 ) |
1103 | 145 | for (BasicBlock::iterator I = Succs[i]->begin(); 93 isa<PHINode>(I)145 ; ++I52 ) { |
1104 | 52 | PHINode *PN = cast<PHINode>(I); |
1105 | 52 | std::set<BasicBlock*> ProcessedPreds; |
1106 | 162 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e162 ; ++i110 ) |
1107 | 110 | if (110 Blocks.count(PN->getIncomingBlock(i))110 ) { |
1108 | 54 | if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) |
1109 | 52 | PN->setIncomingBlock(i, codeReplacer); |
1110 | 2 | else { |
1111 | 2 | // There were multiple entries in the PHI for this block, now there |
1112 | 2 | // is only one, so remove the duplicated entries. |
1113 | 2 | PN->removeIncomingValue(i, false); |
1114 | 2 | --i; --e; |
1115 | 2 | } |
1116 | 110 | } |
1117 | 93 | } |
1118 | 84 | |
1119 | 84 | DEBUG(if (verifyFunction(*newFunction)) |
1120 | 88 | report_fatal_error("verifyFunction failed!")); |
1121 | 88 | return newFunction; |
1122 | 88 | } |