/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the transformation that promotes indirect calls to |
11 | | // conditional direct calls when the indirect-call value profile metadata is |
12 | | // available. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/ADT/ArrayRef.h" |
17 | | #include "llvm/ADT/Statistic.h" |
18 | | #include "llvm/ADT/StringRef.h" |
19 | | #include "llvm/ADT/Twine.h" |
20 | | #include "llvm/Analysis/BlockFrequencyInfo.h" |
21 | | #include "llvm/Analysis/GlobalsModRef.h" |
22 | | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
23 | | #include "llvm/Analysis/IndirectCallSiteVisitor.h" |
24 | | #include "llvm/Analysis/OptimizationDiagnosticInfo.h" |
25 | | #include "llvm/Analysis/ProfileSummaryInfo.h" |
26 | | #include "llvm/IR/BasicBlock.h" |
27 | | #include "llvm/IR/CallSite.h" |
28 | | #include "llvm/IR/DerivedTypes.h" |
29 | | #include "llvm/IR/DiagnosticInfo.h" |
30 | | #include "llvm/IR/Function.h" |
31 | | #include "llvm/IR/IRBuilder.h" |
32 | | #include "llvm/IR/InstrTypes.h" |
33 | | #include "llvm/IR/Instruction.h" |
34 | | #include "llvm/IR/Instructions.h" |
35 | | #include "llvm/IR/LLVMContext.h" |
36 | | #include "llvm/IR/MDBuilder.h" |
37 | | #include "llvm/IR/PassManager.h" |
38 | | #include "llvm/IR/Type.h" |
39 | | #include "llvm/Pass.h" |
40 | | #include "llvm/PassRegistry.h" |
41 | | #include "llvm/PassSupport.h" |
42 | | #include "llvm/ProfileData/InstrProf.h" |
43 | | #include "llvm/Support/Casting.h" |
44 | | #include "llvm/Support/CommandLine.h" |
45 | | #include "llvm/Support/Debug.h" |
46 | | #include "llvm/Support/ErrorHandling.h" |
47 | | #include "llvm/Support/MathExtras.h" |
48 | | #include "llvm/Transforms/Instrumentation.h" |
49 | | #include "llvm/Transforms/PGOInstrumentation.h" |
50 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
51 | | #include <cassert> |
52 | | #include <cstdint> |
53 | | #include <vector> |
54 | | |
55 | | using namespace llvm; |
56 | | |
57 | 48 | #define DEBUG_TYPE "pgo-icall-prom" |
58 | | |
59 | | STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); |
60 | | STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); |
61 | | |
62 | | // Command line option to disable indirect-call promotion with the default as |
63 | | // false. This is for debug purpose. |
64 | | static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, |
65 | | cl::desc("Disable indirect call promotion")); |
66 | | |
67 | | // Set the cutoff value for the promotion. If the value is other than 0, we |
68 | | // stop the transformation once the total number of promotions equals the cutoff |
69 | | // value. |
70 | | // For debug use only. |
71 | | static cl::opt<unsigned> |
72 | | ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, |
73 | | cl::desc("Max number of promotions for this compilation")); |
74 | | |
75 | | // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. |
76 | | // For debug use only. |
77 | | static cl::opt<unsigned> |
78 | | ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, |
79 | | cl::desc("Skip Callsite up to this number for this compilation")); |
80 | | |
81 | | // Set if the pass is called in LTO optimization. The difference for LTO mode |
82 | | // is the pass won't prefix the source module name to the internal linkage |
83 | | // symbols. |
84 | | static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, |
85 | | cl::desc("Run indirect-call promotion in LTO " |
86 | | "mode")); |
87 | | |
88 | | // Set if the pass is called in SamplePGO mode. The difference for SamplePGO |
89 | | // mode is it will add prof metadatato the created direct call. |
90 | | static cl::opt<bool> |
91 | | ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, |
92 | | cl::desc("Run indirect-call promotion in SamplePGO mode")); |
93 | | |
94 | | // If the option is set to true, only call instructions will be considered for |
95 | | // transformation -- invoke instructions will be ignored. |
96 | | static cl::opt<bool> |
97 | | ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, |
98 | | cl::desc("Run indirect-call promotion for call instructions " |
99 | | "only")); |
100 | | |
101 | | // If the option is set to true, only invoke instructions will be considered for |
102 | | // transformation -- call instructions will be ignored. |
103 | | static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false), |
104 | | cl::Hidden, |
105 | | cl::desc("Run indirect-call promotion for " |
106 | | "invoke instruction only")); |
107 | | |
108 | | // Dump the function level IR if the transformation happened in this |
109 | | // function. For debug use only. |
110 | | static cl::opt<bool> |
111 | | ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, |
112 | | cl::desc("Dump IR after transformation happens")); |
113 | | |
114 | | namespace { |
115 | | class PGOIndirectCallPromotionLegacyPass : public ModulePass { |
116 | | public: |
117 | | static char ID; |
118 | | |
119 | | PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false) |
120 | 386 | : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) { |
121 | 386 | initializePGOIndirectCallPromotionLegacyPassPass( |
122 | 386 | *PassRegistry::getPassRegistry()); |
123 | 386 | } |
124 | | |
125 | 389 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
126 | 389 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
127 | 389 | } |
128 | | |
129 | 3 | StringRef getPassName() const override { return "PGOIndirectCallPromotion"; } |
130 | | |
131 | | private: |
132 | | bool runOnModule(Module &M) override; |
133 | | |
134 | | // If this pass is called in LTO. We need to special handling the PGOFuncName |
135 | | // for the static variables due to LTO's internalization. |
136 | | bool InLTO; |
137 | | |
138 | | // If this pass is called in SamplePGO. We need to add the prof metadata to |
139 | | // the promoted direct call. |
140 | | bool SamplePGO; |
141 | | }; |
142 | | } // end anonymous namespace |
143 | | |
144 | | char PGOIndirectCallPromotionLegacyPass::ID = 0; |
145 | 8.21k | INITIALIZE_PASS_BEGIN8.21k (PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
|
146 | 8.21k | "Use PGO instrumentation profile to promote indirect " |
147 | 8.21k | "calls to direct calls.", |
148 | 8.21k | false, false) |
149 | 8.21k | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
150 | 8.21k | INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", |
151 | | "Use PGO instrumentation profile to promote indirect " |
152 | | "calls to direct calls.", |
153 | | false, false) |
154 | | |
155 | | ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, |
156 | 373 | bool SamplePGO) { |
157 | 373 | return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); |
158 | 373 | } |
159 | | |
160 | | namespace { |
161 | | // The class for main data structure to promote indirect calls to conditional |
162 | | // direct calls. |
163 | | class ICallPromotionFunc { |
164 | | private: |
165 | | Function &F; |
166 | | Module *M; |
167 | | |
168 | | // Symtab that maps indirect call profile values to function names and |
169 | | // defines. |
170 | | InstrProfSymtab *Symtab; |
171 | | |
172 | | bool SamplePGO; |
173 | | |
174 | | OptimizationRemarkEmitter &ORE; |
175 | | |
176 | | // A struct that records the direct target and it's call count. |
177 | | struct PromotionCandidate { |
178 | | Function *TargetFunction; |
179 | | uint64_t Count; |
180 | 38 | PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} |
181 | | }; |
182 | | |
183 | | // Check if the indirect-call call site should be promoted. Return the number |
184 | | // of promotions. Inst is the candidate indirect call, ValueDataRef |
185 | | // contains the array of value profile data for profiled targets, |
186 | | // TotalCount is the total profiled count of call executions, and |
187 | | // NumCandidates is the number of candidate entries in ValueDataRef. |
188 | | std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( |
189 | | Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, |
190 | | uint64_t TotalCount, uint32_t NumCandidates); |
191 | | |
192 | | // Promote a list of targets for one indirect-call callsite. Return |
193 | | // the number of promotions. |
194 | | uint32_t tryToPromote(Instruction *Inst, |
195 | | const std::vector<PromotionCandidate> &Candidates, |
196 | | uint64_t &TotalCount); |
197 | | |
198 | | // Noncopyable |
199 | | ICallPromotionFunc(const ICallPromotionFunc &other) = delete; |
200 | | ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; |
201 | | |
202 | | public: |
203 | | ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, |
204 | | bool SamplePGO, OptimizationRemarkEmitter &ORE) |
205 | 683 | : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} |
206 | | |
207 | | bool processFunction(ProfileSummaryInfo *PSI); |
208 | | }; |
209 | | } // end anonymous namespace |
210 | | |
211 | | bool llvm::isLegalToPromote(Instruction *Inst, Function *F, |
212 | 47 | const char **Reason) { |
213 | 47 | // Check the return type. |
214 | 47 | Type *CallRetType = Inst->getType(); |
215 | 47 | if (!CallRetType->isVoidTy()47 ) { |
216 | 31 | Type *FuncRetType = F->getReturnType(); |
217 | 31 | if (FuncRetType != CallRetType && |
218 | 31 | !CastInst::isBitCastable(FuncRetType, CallRetType)10 ) { |
219 | 2 | if (Reason) |
220 | 2 | *Reason = "Return type mismatch"; |
221 | 2 | return false; |
222 | 2 | } |
223 | 45 | } |
224 | 45 | |
225 | 45 | // Check if the arguments are compatible with the parameters |
226 | 45 | FunctionType *DirectCalleeType = F->getFunctionType(); |
227 | 45 | unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); |
228 | 45 | CallSite CS(Inst); |
229 | 45 | unsigned ArgNum = CS.arg_size(); |
230 | 45 | |
231 | 45 | if (ParamNum != ArgNum && 45 !DirectCalleeType->isVarArg()5 ) { |
232 | 3 | if (Reason) |
233 | 3 | *Reason = "The number of arguments mismatch"; |
234 | 3 | return false; |
235 | 3 | } |
236 | 42 | |
237 | 52 | for (unsigned I = 0; 42 I < ParamNum52 ; ++I10 ) { |
238 | 10 | Type *PTy = DirectCalleeType->getFunctionParamType(I); |
239 | 10 | Type *ATy = CS.getArgument(I)->getType(); |
240 | 10 | if (PTy == ATy) |
241 | 6 | continue; |
242 | 4 | if (4 !CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)4 ) { |
243 | 0 | if (Reason) |
244 | 0 | *Reason = "Argument type mismatch"; |
245 | 0 | return false; |
246 | 0 | } |
247 | 10 | } |
248 | 42 | |
249 | 42 | DEBUG42 (dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " |
250 | 42 | << F->getName() << "\n"); |
251 | 42 | return true; |
252 | 47 | } |
253 | | |
254 | | // Indirect-call promotion heuristic. The direct targets are sorted based on |
255 | | // the count. Stop at the first target that is not promoted. |
256 | | std::vector<ICallPromotionFunc::PromotionCandidate> |
257 | | ICallPromotionFunc::getPromotionCandidatesForCallSite( |
258 | | Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, |
259 | 37 | uint64_t TotalCount, uint32_t NumCandidates) { |
260 | 37 | std::vector<PromotionCandidate> Ret; |
261 | 37 | |
262 | 37 | DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst |
263 | 37 | << " Num_targets: " << ValueDataRef.size() |
264 | 37 | << " Num_candidates: " << NumCandidates << "\n"); |
265 | 37 | NumOfPGOICallsites++; |
266 | 37 | if (ICPCSSkip != 0 && 37 NumOfPGOICallsites <= ICPCSSkip0 ) { |
267 | 0 | DEBUG(dbgs() << " Skip: User options.\n"); |
268 | 0 | return Ret; |
269 | 0 | } |
270 | 37 | |
271 | 75 | for (uint32_t I = 0; 37 I < NumCandidates75 ; I++38 ) { |
272 | 44 | uint64_t Count = ValueDataRef[I].Count; |
273 | 44 | assert(Count <= TotalCount); |
274 | 44 | uint64_t Target = ValueDataRef[I].Value; |
275 | 44 | DEBUG(dbgs() << " Candidate " << I << " Count=" << Count |
276 | 44 | << " Target_func: " << Target << "\n"); |
277 | 44 | |
278 | 44 | if (ICPInvokeOnly && 44 dyn_cast<CallInst>(Inst)0 ) { |
279 | 0 | DEBUG(dbgs() << " Not promote: User options.\n"); |
280 | 0 | ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) |
281 | 0 | << " Not promote: User options"); |
282 | 0 | break; |
283 | 0 | } |
284 | 44 | if (44 ICPCallOnly && 44 dyn_cast<InvokeInst>(Inst)0 ) { |
285 | 0 | DEBUG(dbgs() << " Not promote: User option.\n"); |
286 | 0 | ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) |
287 | 0 | << " Not promote: User options"); |
288 | 0 | break; |
289 | 0 | } |
290 | 44 | if (44 ICPCutOff != 0 && 44 NumOfPGOICallPromotion >= ICPCutOff0 ) { |
291 | 0 | DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); |
292 | 0 | ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst) |
293 | 0 | << " Not promote: Cutoff reached"); |
294 | 0 | break; |
295 | 0 | } |
296 | 44 | |
297 | 44 | Function *TargetFunction = Symtab->getFunction(Target); |
298 | 44 | if (TargetFunction == nullptr44 ) { |
299 | 2 | DEBUG(dbgs() << " Not promote: Cannot find the target\n"); |
300 | 2 | ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst) |
301 | 2 | << "Cannot promote indirect call: target not found"); |
302 | 2 | break; |
303 | 2 | } |
304 | 42 | |
305 | 42 | const char *Reason = nullptr; |
306 | 42 | if (!isLegalToPromote(Inst, TargetFunction, &Reason)42 ) { |
307 | 4 | using namespace ore; |
308 | 4 | ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst) |
309 | 4 | << "Cannot promote indirect call to " |
310 | 4 | << NV("TargetFunction", TargetFunction) << " with count of " |
311 | 4 | << NV("Count", Count) << ": " << Reason); |
312 | 4 | break; |
313 | 4 | } |
314 | 38 | |
315 | 38 | Ret.push_back(PromotionCandidate(TargetFunction, Count)); |
316 | 38 | TotalCount -= Count; |
317 | 38 | } |
318 | 37 | return Ret; |
319 | 37 | } |
320 | | |
321 | | // Create a diamond structure for If_Then_Else. Also update the profile |
322 | | // count. Do the fix-up for the invoke instruction. |
323 | | static void createIfThenElse(Instruction *Inst, Function *DirectCallee, |
324 | | uint64_t Count, uint64_t TotalCount, |
325 | | BasicBlock **DirectCallBB, |
326 | | BasicBlock **IndirectCallBB, |
327 | 42 | BasicBlock **MergeBB) { |
328 | 42 | CallSite CS(Inst); |
329 | 42 | Value *OrigCallee = CS.getCalledValue(); |
330 | 42 | |
331 | 42 | IRBuilder<> BBBuilder(Inst); |
332 | 42 | LLVMContext &Ctx = Inst->getContext(); |
333 | 42 | Value *BCI1 = |
334 | 42 | BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), ""); |
335 | 42 | Value *BCI2 = |
336 | 42 | BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), ""); |
337 | 42 | Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, ""); |
338 | 42 | |
339 | 42 | uint64_t ElseCount = TotalCount - Count; |
340 | 42 | uint64_t MaxCount = (Count >= ElseCount ? Count42 : ElseCount0 ); |
341 | 42 | uint64_t Scale = calculateCountScale(MaxCount); |
342 | 42 | MDBuilder MDB(Inst->getContext()); |
343 | 42 | MDNode *BranchWeights = MDB.createBranchWeights( |
344 | 42 | scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); |
345 | 42 | TerminatorInst *ThenTerm, *ElseTerm; |
346 | 42 | SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm, |
347 | 42 | BranchWeights); |
348 | 42 | *DirectCallBB = ThenTerm->getParent(); |
349 | 42 | (*DirectCallBB)->setName("if.true.direct_targ"); |
350 | 42 | *IndirectCallBB = ElseTerm->getParent(); |
351 | 42 | (*IndirectCallBB)->setName("if.false.orig_indirect"); |
352 | 42 | *MergeBB = Inst->getParent(); |
353 | 42 | (*MergeBB)->setName("if.end.icp"); |
354 | 42 | |
355 | 42 | // Special handing of Invoke instructions. |
356 | 42 | InvokeInst *II = dyn_cast<InvokeInst>(Inst); |
357 | 42 | if (!II) |
358 | 35 | return; |
359 | 7 | |
360 | 7 | // We don't need branch instructions for invoke. |
361 | 7 | ThenTerm->eraseFromParent(); |
362 | 7 | ElseTerm->eraseFromParent(); |
363 | 7 | |
364 | 7 | // Add jump from Merge BB to the NormalDest. This is needed for the newly |
365 | 7 | // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB. |
366 | 7 | BranchInst::Create(II->getNormalDest(), *MergeBB); |
367 | 7 | } |
368 | | |
369 | | // Find the PHI in BB that have the CallResult as the operand. |
370 | 23 | static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) { |
371 | 23 | BasicBlock *From = Inst->getParent(); |
372 | 25 | for (auto &I : *BB) { |
373 | 25 | PHINode *PHI = dyn_cast<PHINode>(&I); |
374 | 25 | if (!PHI) |
375 | 23 | continue; |
376 | 2 | int IX = PHI->getBasicBlockIndex(From); |
377 | 2 | if (IX == -1) |
378 | 0 | continue; |
379 | 2 | Value *V = PHI->getIncomingValue(IX); |
380 | 2 | if (dyn_cast<Instruction>(V) == Inst) |
381 | 2 | return true; |
382 | 21 | } |
383 | 21 | return false; |
384 | 21 | } |
385 | | |
386 | | // This method fixes up PHI nodes in BB where BB is the UnwindDest of an |
387 | | // invoke instruction. In BB, there may be PHIs with incoming block being |
388 | | // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke |
389 | | // instructions to its own BB, OrigBB is no longer the predecessor block of BB. |
390 | | // Instead two new predecessors are added: IndirectCallBB and DirectCallBB, |
391 | | // so the PHI node's incoming BBs need to be fixed up accordingly. |
392 | | static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB, |
393 | | BasicBlock *OrigBB, |
394 | | BasicBlock *IndirectCallBB, |
395 | 7 | BasicBlock *DirectCallBB) { |
396 | 37 | for (auto &I : *BB) { |
397 | 37 | PHINode *PHI = dyn_cast<PHINode>(&I); |
398 | 37 | if (!PHI) |
399 | 37 | continue; |
400 | 0 | int IX = PHI->getBasicBlockIndex(OrigBB); |
401 | 0 | if (IX == -1) |
402 | 0 | continue; |
403 | 0 | Value *V = PHI->getIncomingValue(IX); |
404 | 0 | PHI->addIncoming(V, IndirectCallBB); |
405 | 0 | PHI->setIncomingBlock(IX, DirectCallBB); |
406 | 0 | } |
407 | 7 | } |
408 | | |
409 | | // This method fixes up PHI nodes in BB where BB is the NormalDest of an |
410 | | // invoke instruction. In BB, there may be PHIs with incoming block being |
411 | | // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke |
412 | | // instructions to its own BB, a new incoming edge will be added to the original |
413 | | // NormalDstBB from the IndirectCallBB. |
414 | | static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB, |
415 | | BasicBlock *OrigBB, |
416 | | BasicBlock *IndirectCallBB, |
417 | 7 | Instruction *NewInst) { |
418 | 13 | for (auto &I : *BB) { |
419 | 13 | PHINode *PHI = dyn_cast<PHINode>(&I); |
420 | 13 | if (!PHI) |
421 | 11 | continue; |
422 | 2 | int IX = PHI->getBasicBlockIndex(OrigBB); |
423 | 2 | if (IX == -1) |
424 | 0 | continue; |
425 | 2 | Value *V = PHI->getIncomingValue(IX); |
426 | 2 | if (dyn_cast<Instruction>(V) == Inst2 ) { |
427 | 2 | PHI->setIncomingBlock(IX, IndirectCallBB); |
428 | 2 | PHI->addIncoming(NewInst, OrigBB); |
429 | 2 | continue; |
430 | 2 | } |
431 | 0 | PHI->addIncoming(V, IndirectCallBB); |
432 | 0 | } |
433 | 7 | } |
434 | | |
435 | | // Add a bitcast instruction to the direct-call return value if needed. |
436 | | static Instruction *insertCallRetCast(const Instruction *Inst, |
437 | | Instruction *DirectCallInst, |
438 | 42 | Function *DirectCallee) { |
439 | 42 | if (Inst->getType()->isVoidTy()) |
440 | 15 | return DirectCallInst; |
441 | 27 | |
442 | 27 | Type *CallRetType = Inst->getType(); |
443 | 27 | Type *FuncRetType = DirectCallee->getReturnType(); |
444 | 27 | if (FuncRetType == CallRetType) |
445 | 19 | return DirectCallInst; |
446 | 8 | |
447 | 8 | BasicBlock *InsertionBB; |
448 | 8 | if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst)) |
449 | 6 | InsertionBB = CI->getParent(); |
450 | 8 | else |
451 | 2 | InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest(); |
452 | 42 | |
453 | 42 | return (new BitCastInst(DirectCallInst, CallRetType, "", |
454 | 42 | InsertionBB->getTerminator())); |
455 | 42 | } |
456 | | |
457 | | // Create a DirectCall instruction in the DirectCallBB. |
458 | | // Parameter Inst is the indirect-call (invoke) instruction. |
459 | | // DirectCallee is the decl of the direct-call (invoke) target. |
460 | | // DirecallBB is the BB that the direct-call (invoke) instruction is inserted. |
461 | | // MergeBB is the bottom BB of the if-then-else-diamond after the |
462 | | // transformation. For invoke instruction, the edges from DirectCallBB and |
463 | | // IndirectCallBB to MergeBB are removed before this call (during |
464 | | // createIfThenElse). |
465 | | static Instruction *createDirectCallInst(const Instruction *Inst, |
466 | | Function *DirectCallee, |
467 | | BasicBlock *DirectCallBB, |
468 | 42 | BasicBlock *MergeBB) { |
469 | 42 | Instruction *NewInst = Inst->clone(); |
470 | 42 | if (CallInst *CI42 = dyn_cast<CallInst>(NewInst)) { |
471 | 35 | CI->setCalledFunction(DirectCallee); |
472 | 35 | CI->mutateFunctionType(DirectCallee->getFunctionType()); |
473 | 42 | } else { |
474 | 7 | // Must be an invoke instruction. Direct invoke's normal destination is |
475 | 7 | // fixed up to MergeBB. MergeBB is the place where return cast is inserted. |
476 | 7 | // Also since IndirectCallBB does not have an edge to MergeBB, there is no |
477 | 7 | // need to insert new PHIs into MergeBB. |
478 | 7 | InvokeInst *II = dyn_cast<InvokeInst>(NewInst); |
479 | 7 | assert(II); |
480 | 7 | II->setCalledFunction(DirectCallee); |
481 | 7 | II->mutateFunctionType(DirectCallee->getFunctionType()); |
482 | 7 | II->setNormalDest(MergeBB); |
483 | 7 | } |
484 | 42 | |
485 | 42 | DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(), |
486 | 42 | NewInst); |
487 | 42 | |
488 | 42 | // Clear the value profile data. |
489 | 42 | NewInst->setMetadata(LLVMContext::MD_prof, nullptr); |
490 | 42 | CallSite NewCS(NewInst); |
491 | 42 | FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); |
492 | 42 | unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); |
493 | 52 | for (unsigned I = 0; I < ParamNum52 ; ++I10 ) { |
494 | 10 | Type *ATy = NewCS.getArgument(I)->getType(); |
495 | 10 | Type *PTy = DirectCalleeType->getParamType(I); |
496 | 10 | if (ATy != PTy10 ) { |
497 | 4 | BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst); |
498 | 4 | NewCS.setArgument(I, BI); |
499 | 4 | } |
500 | 10 | } |
501 | 42 | |
502 | 42 | return insertCallRetCast(Inst, NewInst, DirectCallee); |
503 | 42 | } |
504 | | |
505 | | // Create a PHI to unify the return values of calls. |
506 | | static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult, |
507 | 42 | Function *DirectCallee) { |
508 | 42 | if (Inst->getType()->isVoidTy()) |
509 | 15 | return; |
510 | 27 | |
511 | 27 | if (27 Inst->use_empty()27 ) |
512 | 4 | return; |
513 | 23 | |
514 | 23 | BasicBlock *RetValBB = CallResult->getParent(); |
515 | 23 | |
516 | 23 | BasicBlock *PHIBB; |
517 | 23 | if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult)) |
518 | 2 | RetValBB = II->getNormalDest(); |
519 | 23 | |
520 | 23 | PHIBB = RetValBB->getSingleSuccessor(); |
521 | 23 | if (getCallRetPHINode(PHIBB, Inst)) |
522 | 2 | return; |
523 | 21 | |
524 | 21 | PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0); |
525 | 21 | PHIBB->getInstList().push_front(CallRetPHI); |
526 | 21 | Inst->replaceAllUsesWith(CallRetPHI); |
527 | 21 | CallRetPHI->addIncoming(Inst, Inst->getParent()); |
528 | 21 | CallRetPHI->addIncoming(CallResult, RetValBB); |
529 | 21 | } |
530 | | |
531 | | // This function does the actual indirect-call promotion transformation: |
532 | | // For an indirect-call like: |
533 | | // Ret = (*Foo)(Args); |
534 | | // It transforms to: |
535 | | // if (Foo == DirectCallee) |
536 | | // Ret1 = DirectCallee(Args); |
537 | | // else |
538 | | // Ret2 = (*Foo)(Args); |
539 | | // Ret = phi(Ret1, Ret2); |
540 | | // It adds type casts for the args do not match the parameters and the return |
541 | | // value. Branch weights metadata also updated. |
542 | | // If \p AttachProfToDirectCall is true, a prof metadata is attached to the |
543 | | // new direct call to contain \p Count. This is used by SamplePGO inliner to |
544 | | // check callsite hotness. |
545 | | // Returns the promoted direct call instruction. |
546 | | Instruction *llvm::promoteIndirectCall(Instruction *Inst, |
547 | | Function *DirectCallee, uint64_t Count, |
548 | | uint64_t TotalCount, |
549 | | bool AttachProfToDirectCall, |
550 | 42 | OptimizationRemarkEmitter *ORE) { |
551 | 42 | assert(DirectCallee != nullptr); |
552 | 42 | BasicBlock *BB = Inst->getParent(); |
553 | 42 | // Just to suppress the non-debug build warning. |
554 | 42 | (void)BB; |
555 | 42 | DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); |
556 | 42 | DEBUG(dbgs() << *BB << "\n"); |
557 | 42 | |
558 | 42 | BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB; |
559 | 42 | createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, |
560 | 42 | &IndirectCallBB, &MergeBB); |
561 | 42 | |
562 | 42 | Instruction *NewInst = |
563 | 42 | createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); |
564 | 42 | |
565 | 42 | if (AttachProfToDirectCall42 ) { |
566 | 6 | SmallVector<uint32_t, 1> Weights; |
567 | 6 | Weights.push_back(Count); |
568 | 6 | MDBuilder MDB(NewInst->getContext()); |
569 | 6 | if (Instruction *DI = dyn_cast<Instruction>(NewInst->stripPointerCasts())) |
570 | 5 | DI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); |
571 | 6 | } |
572 | 42 | |
573 | 42 | // Move Inst from MergeBB to IndirectCallBB. |
574 | 42 | Inst->removeFromParent(); |
575 | 42 | IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), |
576 | 42 | Inst); |
577 | 42 | |
578 | 42 | if (InvokeInst *II42 = dyn_cast<InvokeInst>(Inst)) { |
579 | 7 | // At this point, the original indirect invoke instruction has the original |
580 | 7 | // UnwindDest and NormalDest. For the direct invoke instruction, the |
581 | 7 | // NormalDest points to MergeBB, and MergeBB jumps to the original |
582 | 7 | // NormalDest. MergeBB might have a new bitcast instruction for the return |
583 | 7 | // value. The PHIs are with the original NormalDest. Since we now have two |
584 | 7 | // incoming edges to NormalDest and UnwindDest, we have to do some fixups. |
585 | 7 | // |
586 | 7 | // UnwindDest will not use the return value. So pass nullptr here. |
587 | 7 | fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB, |
588 | 7 | DirectCallBB); |
589 | 7 | // We don't need to update the operand from NormalDest for DirectCallBB. |
590 | 7 | // Pass nullptr here. |
591 | 7 | fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, |
592 | 7 | IndirectCallBB, NewInst); |
593 | 7 | } |
594 | 42 | |
595 | 42 | insertCallRetPHI(Inst, NewInst, DirectCallee); |
596 | 42 | |
597 | 42 | DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); |
598 | 42 | DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); |
599 | 42 | |
600 | 42 | using namespace ore; |
601 | 42 | if (ORE) |
602 | 42 | ORE->emit(OptimizationRemark(42 DEBUG_TYPE42 , "Promoted", Inst) |
603 | 42 | << "Promote indirect call to " << NV("DirectCallee", DirectCallee) |
604 | 42 | << " with count " << NV("Count", Count) << " out of " |
605 | 42 | << NV("TotalCount", TotalCount)); |
606 | 42 | return NewInst; |
607 | 42 | } |
608 | | |
609 | | // Promote indirect-call to conditional direct-call for one callsite. |
610 | | uint32_t ICallPromotionFunc::tryToPromote( |
611 | | Instruction *Inst, const std::vector<PromotionCandidate> &Candidates, |
612 | 37 | uint64_t &TotalCount) { |
613 | 37 | uint32_t NumPromoted = 0; |
614 | 37 | |
615 | 38 | for (auto &C : Candidates) { |
616 | 38 | uint64_t Count = C.Count; |
617 | 38 | promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO, |
618 | 38 | &ORE); |
619 | 38 | assert(TotalCount >= Count); |
620 | 38 | TotalCount -= Count; |
621 | 38 | NumOfPGOICallPromotion++; |
622 | 38 | NumPromoted++; |
623 | 38 | } |
624 | 37 | return NumPromoted; |
625 | 37 | } |
626 | | |
627 | | // Traverse all the indirect-call callsite and get the value profile |
628 | | // annotation to perform indirect-call promotion. |
629 | 683 | bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) { |
630 | 683 | bool Changed = false; |
631 | 683 | ICallPromotionAnalysis ICallAnalysis; |
632 | 49 | for (auto &I : findIndirectCallSites(F)) { |
633 | 49 | uint32_t NumVals, NumCandidates; |
634 | 49 | uint64_t TotalCount; |
635 | 49 | auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( |
636 | 49 | I, NumVals, TotalCount, NumCandidates); |
637 | 49 | if (!NumCandidates || |
638 | 37 | (PSI && 37 PSI->hasProfileSummary()37 && !PSI->isHotCount(TotalCount)9 )) |
639 | 12 | continue; |
640 | 37 | auto PromotionCandidates = getPromotionCandidatesForCallSite( |
641 | 37 | I, ICallProfDataRef, TotalCount, NumCandidates); |
642 | 37 | uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount); |
643 | 37 | if (NumPromoted == 0) |
644 | 6 | continue; |
645 | 31 | |
646 | 31 | Changed = true; |
647 | 31 | // Adjust the MD.prof metadata. First delete the old one. |
648 | 31 | I->setMetadata(LLVMContext::MD_prof, nullptr); |
649 | 31 | // If all promoted, we don't need the MD.prof metadata. |
650 | 31 | if (TotalCount == 0 || 31 NumPromoted == NumVals4 ) |
651 | 27 | continue; |
652 | 4 | // Otherwise we need update with the un-promoted records back. |
653 | 4 | annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount, |
654 | 4 | IPVK_IndirectCallTarget, NumCandidates); |
655 | 4 | } |
656 | 683 | return Changed; |
657 | 683 | } |
658 | | |
659 | | // A wrapper function that does the actual work. |
660 | | static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, |
661 | | bool InLTO, bool SamplePGO, |
662 | 416 | ModuleAnalysisManager *AM = nullptr) { |
663 | 416 | if (DisableICP) |
664 | 0 | return false; |
665 | 416 | InstrProfSymtab Symtab; |
666 | 416 | if (Error E416 = Symtab.create(M, InLTO)) { |
667 | 0 | std::string SymtabFailure = toString(std::move(E)); |
668 | 0 | DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n"); |
669 | 0 | (void)SymtabFailure; |
670 | 0 | return false; |
671 | 0 | } |
672 | 416 | bool Changed = false; |
673 | 889 | for (auto &F : M) { |
674 | 889 | if (F.isDeclaration()) |
675 | 207 | continue; |
676 | 682 | if (682 F.hasFnAttribute(Attribute::OptimizeNone)682 ) |
677 | 0 | continue; |
678 | 682 | |
679 | 682 | std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; |
680 | 682 | OptimizationRemarkEmitter *ORE; |
681 | 682 | if (AM682 ) { |
682 | 51 | auto &FAM = |
683 | 51 | AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); |
684 | 51 | ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); |
685 | 682 | } else { |
686 | 631 | OwnedORE = make_unique<OptimizationRemarkEmitter>(&F); |
687 | 631 | ORE = OwnedORE.get(); |
688 | 631 | } |
689 | 682 | |
690 | 682 | ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE); |
691 | 682 | bool FuncChanged = ICallPromotion.processFunction(PSI); |
692 | 682 | if (ICPDUMPAFTER && 682 FuncChanged0 ) { |
693 | 0 | DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); |
694 | 0 | DEBUG(dbgs() << "\n"); |
695 | 0 | } |
696 | 682 | Changed |= FuncChanged; |
697 | 682 | if (ICPCutOff != 0 && 682 NumOfPGOICallPromotion >= ICPCutOff0 ) { |
698 | 0 | DEBUG(dbgs() << " Stop: Cutoff reached.\n"); |
699 | 0 | break; |
700 | 0 | } |
701 | 416 | } |
702 | 416 | return Changed; |
703 | 416 | } |
704 | | |
705 | 388 | bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { |
706 | 388 | ProfileSummaryInfo *PSI = |
707 | 388 | getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
708 | 388 | |
709 | 388 | // Command-line option has the priority for InLTO. |
710 | 388 | return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, |
711 | 388 | SamplePGO | ICPSamplePGOMode); |
712 | 388 | } |
713 | | |
714 | | PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, |
715 | 28 | ModuleAnalysisManager &AM) { |
716 | 28 | ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); |
717 | 28 | |
718 | 28 | if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, |
719 | 28 | SamplePGO | ICPSamplePGOMode, &AM)) |
720 | 19 | return PreservedAnalyses::all(); |
721 | 9 | |
722 | 9 | return PreservedAnalyses::none(); |
723 | 9 | } |