/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | /// |
10 | | /// \file |
11 | | /// This file implements interprocedural passes which walk the |
12 | | /// call-graph deducing and/or propagating function attributes. |
13 | | /// |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
17 | | #include "llvm/ADT/SCCIterator.h" |
18 | | #include "llvm/ADT/SetVector.h" |
19 | | #include "llvm/ADT/SmallSet.h" |
20 | | #include "llvm/ADT/Statistic.h" |
21 | | #include "llvm/ADT/StringSwitch.h" |
22 | | #include "llvm/Analysis/AliasAnalysis.h" |
23 | | #include "llvm/Analysis/AssumptionCache.h" |
24 | | #include "llvm/Analysis/BasicAliasAnalysis.h" |
25 | | #include "llvm/Analysis/CallGraph.h" |
26 | | #include "llvm/Analysis/CallGraphSCCPass.h" |
27 | | #include "llvm/Analysis/CaptureTracking.h" |
28 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
29 | | #include "llvm/Analysis/ValueTracking.h" |
30 | | #include "llvm/IR/GlobalVariable.h" |
31 | | #include "llvm/IR/InstIterator.h" |
32 | | #include "llvm/IR/IntrinsicInst.h" |
33 | | #include "llvm/IR/LLVMContext.h" |
34 | | #include "llvm/Support/Debug.h" |
35 | | #include "llvm/Support/raw_ostream.h" |
36 | | #include "llvm/Transforms/IPO.h" |
37 | | using namespace llvm; |
38 | | |
39 | | #define DEBUG_TYPE "functionattrs" |
40 | | |
41 | | STATISTIC(NumReadNone, "Number of functions marked readnone"); |
42 | | STATISTIC(NumReadOnly, "Number of functions marked readonly"); |
43 | | STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); |
44 | | STATISTIC(NumReturned, "Number of arguments marked returned"); |
45 | | STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); |
46 | | STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); |
47 | | STATISTIC(NumNoAlias, "Number of function returns marked noalias"); |
48 | | STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); |
49 | | STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); |
50 | | |
51 | | // FIXME: This is disabled by default to avoid exposing security vulnerabilities |
52 | | // in C/C++ code compiled by clang: |
53 | | // http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html |
54 | | static cl::opt<bool> EnableNonnullArgPropagation( |
55 | | "enable-nonnull-arg-prop", cl::Hidden, |
56 | | cl::desc("Try to propagate nonnull argument attributes from callsites to " |
57 | | "caller functions.")); |
58 | | |
59 | | namespace { |
60 | | typedef SmallSetVector<Function *, 8> SCCNodeSet; |
61 | | } |
62 | | |
63 | | /// Returns the memory access attribute for function F using AAR for AA results, |
64 | | /// where SCCNodes is the current SCC. |
65 | | /// |
66 | | /// If ThisBody is true, this function may examine the function body and will |
67 | | /// return a result pertaining to this copy of the function. If it is false, the |
68 | | /// result will be based only on AA results for the function declaration; it |
69 | | /// will be assumed that some other (perhaps less optimized) version of the |
70 | | /// function may be selected at link time. |
71 | | static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, |
72 | | AAResults &AAR, |
73 | 727k | const SCCNodeSet &SCCNodes) { |
74 | 727k | FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); |
75 | 727k | if (MRB == FMRB_DoesNotAccessMemory) |
76 | 727k | // Already perfect! |
77 | 26.0k | return MAK_ReadNone; |
78 | 701k | |
79 | 701k | if (701k !ThisBody701k ) { |
80 | 617k | if (AliasAnalysis::onlyReadsMemory(MRB)) |
81 | 13.9k | return MAK_ReadOnly; |
82 | 603k | |
83 | 603k | // Conservatively assume it writes to memory. |
84 | 603k | return MAK_MayWrite; |
85 | 603k | } |
86 | 83.2k | |
87 | 83.2k | // Scan the function body for instructions that may read or write memory. |
88 | 83.2k | bool ReadsMemory = false; |
89 | 1.02M | for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E1.02M ; ++II944k ) { |
90 | 1.01M | Instruction *I = &*II; |
91 | 1.01M | |
92 | 1.01M | // Some instructions can be ignored even if they read or write memory. |
93 | 1.01M | // Detect these now, skipping to the next instruction if one is found. |
94 | 1.01M | CallSite CS(cast<Value>(I)); |
95 | 1.01M | if (CS1.01M ) { |
96 | 81.6k | // Ignore calls to functions in the same SCC, as long as the call sites |
97 | 81.6k | // don't have operand bundles. Calls with operand bundles are allowed to |
98 | 81.6k | // have memory effects not described by the memory effects of the call |
99 | 81.6k | // target. |
100 | 81.6k | if (!CS.hasOperandBundles() && 81.6k CS.getCalledFunction()81.6k && |
101 | 78.5k | SCCNodes.count(CS.getCalledFunction())) |
102 | 684 | continue; |
103 | 80.9k | FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); |
104 | 80.9k | |
105 | 80.9k | // If the call doesn't access memory, we're done. |
106 | 80.9k | if (!(MRB & MRI_ModRef)) |
107 | 10.5k | continue; |
108 | 70.3k | |
109 | 70.3k | if (70.3k !AliasAnalysis::onlyAccessesArgPointees(MRB)70.3k ) { |
110 | 54.4k | // The call could access any memory. If that includes writes, give up. |
111 | 54.4k | if (MRB & MRI_Mod) |
112 | 52.9k | return MAK_MayWrite; |
113 | 1.42k | // If it reads, note it. |
114 | 1.42k | if (1.42k MRB & MRI_Ref1.42k ) |
115 | 1.42k | ReadsMemory = true; |
116 | 54.4k | continue; |
117 | 54.4k | } |
118 | 15.9k | |
119 | 15.9k | // Check whether all pointer arguments point to local memory, and |
120 | 15.9k | // ignore calls that only access local memory. |
121 | 15.9k | for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); |
122 | 47.7k | CI != CE47.7k ; ++CI31.7k ) { |
123 | 32.2k | Value *Arg = *CI; |
124 | 32.2k | if (!Arg->getType()->isPtrOrPtrVectorTy()) |
125 | 15.9k | continue; |
126 | 16.2k | |
127 | 16.2k | AAMDNodes AAInfo; |
128 | 16.2k | I->getAAMetadata(AAInfo); |
129 | 16.2k | MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); |
130 | 16.2k | |
131 | 16.2k | // Skip accesses to local or constant memory as they don't impact the |
132 | 16.2k | // externally visible mod/ref behavior. |
133 | 16.2k | if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) |
134 | 14.8k | continue; |
135 | 1.42k | |
136 | 1.42k | if (1.42k MRB & MRI_Mod1.42k ) |
137 | 1.42k | // Writes non-local memory. Give up. |
138 | 432 | return MAK_MayWrite; |
139 | 992 | if (992 MRB & MRI_Ref992 ) |
140 | 992 | // Ok, it reads non-local memory. |
141 | 992 | ReadsMemory = true; |
142 | 32.2k | } |
143 | 15.4k | continue; |
144 | 934k | } else if (LoadInst *934k LI934k = dyn_cast<LoadInst>(I)) { |
145 | 180k | // Ignore non-volatile loads from local memory. (Atomic is okay here.) |
146 | 180k | if (!LI->isVolatile()180k ) { |
147 | 180k | MemoryLocation Loc = MemoryLocation::get(LI); |
148 | 180k | if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) |
149 | 9.56k | continue; |
150 | 934k | } |
151 | 753k | } else if (StoreInst *753k SI753k = dyn_cast<StoreInst>(I)) { |
152 | 31.6k | // Ignore non-volatile stores to local memory. (Atomic is okay here.) |
153 | 31.6k | if (!SI->isVolatile()31.6k ) { |
154 | 31.4k | MemoryLocation Loc = MemoryLocation::get(SI); |
155 | 31.4k | if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) |
156 | 15.0k | continue; |
157 | 753k | } |
158 | 721k | } else if (VAArgInst *721k VI721k = dyn_cast<VAArgInst>(I)) { |
159 | 4 | // Ignore vaargs on local memory. |
160 | 4 | MemoryLocation Loc = MemoryLocation::get(VI); |
161 | 4 | if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) |
162 | 3 | continue; |
163 | 909k | } |
164 | 909k | |
165 | 909k | // Any remaining instructions need to be taken seriously! Check if they |
166 | 909k | // read or write memory. |
167 | 909k | if (909k I->mayWriteToMemory()909k ) |
168 | 909k | // Writes memory. Just give up. |
169 | 18.0k | return MAK_MayWrite; |
170 | 891k | |
171 | 891k | // If this instruction may read memory, remember that. |
172 | 891k | ReadsMemory |= I->mayReadFromMemory(); |
173 | 891k | } |
174 | 83.2k | |
175 | 11.7k | return ReadsMemory ? 11.7k MAK_ReadOnly6.54k : MAK_ReadNone5.18k ; |
176 | 727k | } |
177 | | |
178 | | MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, |
179 | 94 | AAResults &AAR) { |
180 | 94 | return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); |
181 | 94 | } |
182 | | |
183 | | /// Deduce readonly/readnone attributes for the SCC. |
184 | | template <typename AARGetterT> |
185 | 726k | static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { |
186 | 726k | // Check if any of the functions in the SCC read or write memory. If they |
187 | 726k | // write memory then they can't be marked readnone or readonly. |
188 | 726k | bool ReadsMemory = false; |
189 | 727k | for (Function *F : SCCNodes) { |
190 | 727k | // Call the callable parameter to look up AA results for this function. |
191 | 727k | AAResults &AAR = AARGetter(*F); |
192 | 727k | |
193 | 727k | // Non-exact function definitions may not be selected at link time, and an |
194 | 727k | // alternative version that writes to memory may be selected. See the |
195 | 727k | // comment on GlobalValue::isDefinitionExact for more details. |
196 | 727k | switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), |
197 | 727k | AAR, SCCNodes)) { |
198 | 675k | case MAK_MayWrite: |
199 | 675k | return false; |
200 | 20.5k | case MAK_ReadOnly: |
201 | 20.5k | ReadsMemory = true; |
202 | 20.5k | break; |
203 | 31.1k | case MAK_ReadNone: |
204 | 31.1k | // Nothing to do! |
205 | 31.1k | break; |
206 | 51.5k | } |
207 | 51.5k | } |
208 | 51.5k | |
209 | 51.5k | // Success! Functions in this SCC do not access memory, or only read memory. |
210 | 51.5k | // Give them the appropriate attribute. |
211 | 51.5k | bool MadeChange = false; |
212 | 51.6k | for (Function *F : SCCNodes) { |
213 | 51.6k | if (F->doesNotAccessMemory()) |
214 | 51.6k | // Already perfect! |
215 | 9.71k | continue; |
216 | 41.8k | |
217 | 41.8k | if (41.8k F->onlyReadsMemory() && 41.8k ReadsMemory13.9k ) |
218 | 41.8k | // No change. |
219 | 13.9k | continue; |
220 | 27.9k | |
221 | 27.9k | MadeChange = true; |
222 | 27.9k | |
223 | 27.9k | // Clear out any existing attributes. |
224 | 27.9k | F->removeFnAttr(Attribute::ReadOnly); |
225 | 27.9k | F->removeFnAttr(Attribute::ReadNone); |
226 | 27.9k | |
227 | 27.9k | // Add in the new attribute. |
228 | 27.9k | F->addFnAttr(ReadsMemory ? Attribute::ReadOnly6.51k : Attribute::ReadNone21.3k ); |
229 | 27.9k | |
230 | 27.9k | if (ReadsMemory) |
231 | 6.51k | ++NumReadOnly; |
232 | 27.9k | else |
233 | 21.3k | ++NumReadNone; |
234 | 51.6k | } |
235 | 726k | |
236 | 726k | return MadeChange; |
237 | 726k | } FunctionAttrs.cpp:bool addReadAttrs<llvm::LegacyAARGetter&>(llvm::SmallSetVector<llvm::Function*, 8u> const&, llvm::LegacyAARGetter&&&) Line | Count | Source | 185 | 726k | static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { | 186 | 726k | // Check if any of the functions in the SCC read or write memory. If they | 187 | 726k | // write memory then they can't be marked readnone or readonly. | 188 | 726k | bool ReadsMemory = false; | 189 | 726k | for (Function *F : SCCNodes) { | 190 | 726k | // Call the callable parameter to look up AA results for this function. | 191 | 726k | AAResults &AAR = AARGetter(*F); | 192 | 726k | | 193 | 726k | // Non-exact function definitions may not be selected at link time, and an | 194 | 726k | // alternative version that writes to memory may be selected. See the | 195 | 726k | // comment on GlobalValue::isDefinitionExact for more details. | 196 | 726k | switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), | 197 | 726k | AAR, SCCNodes)) { | 198 | 675k | case MAK_MayWrite: | 199 | 675k | return false; | 200 | 20.5k | case MAK_ReadOnly: | 201 | 20.5k | ReadsMemory = true; | 202 | 20.5k | break; | 203 | 30.9k | case MAK_ReadNone: | 204 | 30.9k | // Nothing to do! | 205 | 30.9k | break; | 206 | 51.4k | } | 207 | 51.4k | } | 208 | 51.4k | | 209 | 51.4k | // Success! Functions in this SCC do not access memory, or only read memory. | 210 | 51.4k | // Give them the appropriate attribute. | 211 | 51.4k | bool MadeChange = false; | 212 | 51.4k | for (Function *F : SCCNodes) { | 213 | 51.4k | if (F->doesNotAccessMemory()) | 214 | 51.4k | // Already perfect! | 215 | 9.69k | continue; | 216 | 41.7k | | 217 | 41.7k | if (41.7k F->onlyReadsMemory() && 41.7k ReadsMemory13.9k ) | 218 | 41.7k | // No change. | 219 | 13.9k | continue; | 220 | 27.7k | | 221 | 27.7k | MadeChange = true; | 222 | 27.7k | | 223 | 27.7k | // Clear out any existing attributes. | 224 | 27.7k | F->removeFnAttr(Attribute::ReadOnly); | 225 | 27.7k | F->removeFnAttr(Attribute::ReadNone); | 226 | 27.7k | | 227 | 27.7k | // Add in the new attribute. | 228 | 27.7k | F->addFnAttr(ReadsMemory ? Attribute::ReadOnly6.50k : Attribute::ReadNone21.2k ); | 229 | 27.7k | | 230 | 27.7k | if (ReadsMemory) | 231 | 6.50k | ++NumReadOnly; | 232 | 27.7k | else | 233 | 21.2k | ++NumReadNone; | 234 | 51.4k | } | 235 | 726k | | 236 | 726k | return MadeChange; | 237 | 726k | } |
FunctionAttrs.cpp:bool addReadAttrs<llvm::PostOrderFunctionAttrsPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&)::$_0&>(llvm::SmallSetVector<llvm::Function*, 8u> const&, llvm::PostOrderFunctionAttrsPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&)::$_0&&&) Line | Count | Source | 185 | 271 | static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { | 186 | 271 | // Check if any of the functions in the SCC read or write memory. If they | 187 | 271 | // write memory then they can't be marked readnone or readonly. | 188 | 271 | bool ReadsMemory = false; | 189 | 311 | for (Function *F : SCCNodes) { | 190 | 311 | // Call the callable parameter to look up AA results for this function. | 191 | 311 | AAResults &AAR = AARGetter(*F); | 192 | 311 | | 193 | 311 | // Non-exact function definitions may not be selected at link time, and an | 194 | 311 | // alternative version that writes to memory may be selected. See the | 195 | 311 | // comment on GlobalValue::isDefinitionExact for more details. | 196 | 311 | switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), | 197 | 311 | AAR, SCCNodes)) { | 198 | 155 | case MAK_MayWrite: | 199 | 155 | return false; | 200 | 9 | case MAK_ReadOnly: | 201 | 9 | ReadsMemory = true; | 202 | 9 | break; | 203 | 147 | case MAK_ReadNone: | 204 | 147 | // Nothing to do! | 205 | 147 | break; | 206 | 116 | } | 207 | 116 | } | 208 | 116 | | 209 | 116 | // Success! Functions in this SCC do not access memory, or only read memory. | 210 | 116 | // Give them the appropriate attribute. | 211 | 116 | bool MadeChange = false; | 212 | 133 | for (Function *F : SCCNodes) { | 213 | 133 | if (F->doesNotAccessMemory()) | 214 | 133 | // Already perfect! | 215 | 17 | continue; | 216 | 116 | | 217 | 116 | if (116 F->onlyReadsMemory() && 116 ReadsMemory3 ) | 218 | 116 | // No change. | 219 | 0 | continue; | 220 | 116 | | 221 | 116 | MadeChange = true; | 222 | 116 | | 223 | 116 | // Clear out any existing attributes. | 224 | 116 | F->removeFnAttr(Attribute::ReadOnly); | 225 | 116 | F->removeFnAttr(Attribute::ReadNone); | 226 | 116 | | 227 | 116 | // Add in the new attribute. | 228 | 116 | F->addFnAttr(ReadsMemory ? Attribute::ReadOnly8 : Attribute::ReadNone108 ); | 229 | 116 | | 230 | 116 | if (ReadsMemory) | 231 | 8 | ++NumReadOnly; | 232 | 116 | else | 233 | 108 | ++NumReadNone; | 234 | 133 | } | 235 | 271 | | 236 | 271 | return MadeChange; | 237 | 271 | } |
|
238 | | |
239 | | namespace { |
240 | | /// For a given pointer Argument, this retains a list of Arguments of functions |
241 | | /// in the same SCC that the pointer data flows into. We use this to build an |
242 | | /// SCC of the arguments. |
243 | | struct ArgumentGraphNode { |
244 | | Argument *Definition; |
245 | | SmallVector<ArgumentGraphNode *, 4> Uses; |
246 | | }; |
247 | | |
248 | | class ArgumentGraph { |
249 | | // We store pointers to ArgumentGraphNode objects, so it's important that |
250 | | // that they not move around upon insert. |
251 | | typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy; |
252 | | |
253 | | ArgumentMapTy ArgumentMap; |
254 | | |
255 | | // There is no root node for the argument graph, in fact: |
256 | | // void f(int *x, int *y) { if (...) f(x, y); } |
257 | | // is an example where the graph is disconnected. The SCCIterator requires a |
258 | | // single entry point, so we maintain a fake ("synthetic") root node that |
259 | | // uses every node. Because the graph is directed and nothing points into |
260 | | // the root, it will not participate in any SCCs (except for its own). |
261 | | ArgumentGraphNode SyntheticRoot; |
262 | | |
263 | | public: |
264 | 726k | ArgumentGraph() { SyntheticRoot.Definition = nullptr; } |
265 | | |
266 | | typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator; |
267 | | |
268 | 0 | iterator begin() { return SyntheticRoot.Uses.begin(); } |
269 | 0 | iterator end() { return SyntheticRoot.Uses.end(); } |
270 | 726k | ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } |
271 | | |
272 | 2.37k | ArgumentGraphNode *operator[](Argument *A) { |
273 | 2.37k | ArgumentGraphNode &Node = ArgumentMap[A]; |
274 | 2.37k | Node.Definition = A; |
275 | 2.37k | SyntheticRoot.Uses.push_back(&Node); |
276 | 2.37k | return &Node; |
277 | 2.37k | } |
278 | | }; |
279 | | |
280 | | /// This tracker checks whether callees are in the SCC, and if so it does not |
281 | | /// consider that a capture, instead adding it to the "Uses" list and |
282 | | /// continuing with the analysis. |
283 | | struct ArgumentUsesTracker : public CaptureTracker { |
284 | | ArgumentUsesTracker(const SCCNodeSet &SCCNodes) |
285 | 204k | : Captured(false), SCCNodes(SCCNodes) {} |
286 | | |
287 | 2.03k | void tooManyUses() override { Captured = true; } |
288 | | |
289 | 160k | bool captured(const Use *U) override { |
290 | 160k | CallSite CS(U->getUser()); |
291 | 160k | if (!CS.getInstruction()160k ) { |
292 | 135k | Captured = true; |
293 | 135k | return true; |
294 | 135k | } |
295 | 24.8k | |
296 | 24.8k | Function *F = CS.getCalledFunction(); |
297 | 24.8k | if (!F || 24.8k !F->hasExactDefinition()21.3k || !SCCNodes.count(F)7.11k ) { |
298 | 22.6k | Captured = true; |
299 | 22.6k | return true; |
300 | 22.6k | } |
301 | 2.26k | |
302 | 2.26k | // Note: the callee and the two successor blocks *follow* the argument |
303 | 2.26k | // operands. This means there is no need to adjust UseIndex to account for |
304 | 2.26k | // these. |
305 | 2.26k | |
306 | 2.26k | unsigned UseIndex = |
307 | 2.26k | std::distance(const_cast<const Use *>(CS.arg_begin()), U); |
308 | 2.26k | |
309 | 2.26k | assert(UseIndex < CS.data_operands_size() && |
310 | 2.26k | "Indirect function calls should have been filtered above!"); |
311 | 2.26k | |
312 | 2.26k | if (UseIndex >= CS.getNumArgOperands()2.26k ) { |
313 | 0 | // Data operand, but not a argument operand -- must be a bundle operand |
314 | 0 | assert(CS.hasOperandBundles() && "Must be!"); |
315 | 0 |
|
316 | 0 | // CaptureTracking told us that we're being captured by an operand bundle |
317 | 0 | // use. In this case it does not matter if the callee is within our SCC |
318 | 0 | // or not -- we've been captured in some unknown way, and we have to be |
319 | 0 | // conservative. |
320 | 0 | Captured = true; |
321 | 0 | return true; |
322 | 0 | } |
323 | 2.26k | |
324 | 2.26k | if (2.26k UseIndex >= F->arg_size()2.26k ) { |
325 | 2 | assert(F->isVarArg() && "More params than args in non-varargs call"); |
326 | 2 | Captured = true; |
327 | 2 | return true; |
328 | 2 | } |
329 | 2.26k | |
330 | 2.26k | Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); |
331 | 2.26k | return false; |
332 | 2.26k | } |
333 | | |
334 | | bool Captured; // True only if certainly captured (used outside our SCC). |
335 | | SmallVector<Argument *, 4> Uses; // Uses within our SCC. |
336 | | |
337 | | const SCCNodeSet &SCCNodes; |
338 | | }; |
339 | | } |
340 | | |
341 | | namespace llvm { |
342 | | template <> struct GraphTraits<ArgumentGraphNode *> { |
343 | | typedef ArgumentGraphNode *NodeRef; |
344 | | typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType; |
345 | | |
346 | 0 | static NodeRef getEntryNode(NodeRef A) { return A; } |
347 | 728k | static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } |
348 | 731k | static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } |
349 | | }; |
350 | | template <> |
351 | | struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { |
352 | 726k | static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } |
353 | 0 | static ChildIteratorType nodes_begin(ArgumentGraph *AG) { |
354 | 0 | return AG->begin(); |
355 | 0 | } |
356 | 0 | static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } |
357 | | }; |
358 | | } |
359 | | |
360 | | /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. |
361 | | static Attribute::AttrKind |
362 | | determinePointerReadAttrs(Argument *A, |
363 | 204k | const SmallPtrSet<Argument *, 8> &SCCNodes) { |
364 | 204k | |
365 | 204k | SmallVector<Use *, 32> Worklist; |
366 | 204k | SmallSet<Use *, 32> Visited; |
367 | 204k | |
368 | 204k | // inalloca arguments are always clobbered by the call. |
369 | 204k | if (A->hasInAllocaAttr()) |
370 | 2 | return Attribute::None; |
371 | 204k | |
372 | 204k | bool IsRead = false; |
373 | 204k | // We don't need to track IsWritten. If A is written to, return immediately. |
374 | 204k | |
375 | 728k | for (Use &U : A->uses()) { |
376 | 728k | Visited.insert(&U); |
377 | 728k | Worklist.push_back(&U); |
378 | 728k | } |
379 | 204k | |
380 | 933k | while (!Worklist.empty()933k ) { |
381 | 802k | Use *U = Worklist.pop_back_val(); |
382 | 802k | Instruction *I = cast<Instruction>(U->getUser()); |
383 | 802k | |
384 | 802k | switch (I->getOpcode()) { |
385 | 305k | case Instruction::BitCast: |
386 | 305k | case Instruction::GetElementPtr: |
387 | 305k | case Instruction::PHI: |
388 | 305k | case Instruction::Select: |
389 | 305k | case Instruction::AddrSpaceCast: |
390 | 305k | // The original value is not read/written via this if the new value isn't. |
391 | 305k | for (Use &UU : I->uses()) |
392 | 379k | if (379k Visited.insert(&UU).second379k ) |
393 | 363k | Worklist.push_back(&UU); |
394 | 305k | break; |
395 | 305k | |
396 | 102k | case Instruction::Call: |
397 | 102k | case Instruction::Invoke: { |
398 | 102k | bool Captures = true; |
399 | 102k | |
400 | 102k | if (I->getType()->isVoidTy()) |
401 | 10.7k | Captures = false; |
402 | 102k | |
403 | 65.9k | auto AddUsersToWorklistIfCapturing = [&] { |
404 | 65.9k | if (Captures) |
405 | 51.5k | for (Use &UU : I->uses()) |
406 | 51.9k | if (51.9k Visited.insert(&UU).second51.9k ) |
407 | 51.9k | Worklist.push_back(&UU); |
408 | 65.9k | }; |
409 | 102k | |
410 | 102k | CallSite CS(I); |
411 | 102k | if (CS.doesNotAccessMemory()102k ) { |
412 | 236 | AddUsersToWorklistIfCapturing(); |
413 | 236 | continue; |
414 | 236 | } |
415 | 102k | |
416 | 102k | Function *F = CS.getCalledFunction(); |
417 | 102k | if (!F102k ) { |
418 | 3.65k | if (CS.onlyReadsMemory()3.65k ) { |
419 | 2 | IsRead = true; |
420 | 2 | AddUsersToWorklistIfCapturing(); |
421 | 2 | continue; |
422 | 2 | } |
423 | 3.65k | return Attribute::None; |
424 | 3.65k | } |
425 | 98.3k | |
426 | 98.3k | // Note: the callee and the two successor blocks *follow* the argument |
427 | 98.3k | // operands. This means there is no need to adjust UseIndex to account |
428 | 98.3k | // for these. |
429 | 98.3k | |
430 | 98.3k | unsigned UseIndex = std::distance(CS.arg_begin(), U); |
431 | 98.3k | |
432 | 98.3k | // U cannot be the callee operand use: since we're exploring the |
433 | 98.3k | // transitive uses of an Argument, having such a use be a callee would |
434 | 98.3k | // imply the CallSite is an indirect call or invoke; and we'd take the |
435 | 98.3k | // early exit above. |
436 | 98.3k | assert(UseIndex < CS.data_operands_size() && |
437 | 98.3k | "Data operand use expected!"); |
438 | 98.3k | |
439 | 98.3k | bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands(); |
440 | 98.3k | |
441 | 98.3k | if (UseIndex >= F->arg_size() && 98.3k !IsOperandBundleUse1.18k ) { |
442 | 1.17k | assert(F->isVarArg() && "More params than args in non-varargs call"); |
443 | 1.17k | return Attribute::None; |
444 | 1.17k | } |
445 | 97.1k | |
446 | 97.1k | Captures &= !CS.doesNotCapture(UseIndex); |
447 | 97.1k | |
448 | 97.1k | // Since the optimizer (by design) cannot see the data flow corresponding |
449 | 97.1k | // to a operand bundle use, these cannot participate in the optimistic SCC |
450 | 97.1k | // analysis. Instead, we model the operand bundle uses as arguments in |
451 | 97.1k | // call to a function external to the SCC. |
452 | 97.1k | if (IsOperandBundleUse || |
453 | 97.1k | !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))97.1k ) { |
454 | 95.8k | |
455 | 95.8k | // The accessors used on CallSite here do the right thing for calls and |
456 | 95.8k | // invokes with operand bundles. |
457 | 95.8k | |
458 | 95.8k | if (!CS.onlyReadsMemory() && 95.8k !CS.onlyReadsMemory(UseIndex)84.6k ) |
459 | 31.4k | return Attribute::None; |
460 | 64.4k | if (64.4k !CS.doesNotAccessMemory(UseIndex)64.4k ) |
461 | 14.3k | IsRead = true; |
462 | 95.8k | } |
463 | 97.1k | |
464 | 65.7k | AddUsersToWorklistIfCapturing(); |
465 | 65.7k | break; |
466 | 97.1k | } |
467 | 97.1k | |
468 | 175k | case Instruction::Load: |
469 | 175k | // A volatile load has side effects beyond what readonly can be relied |
470 | 175k | // upon. |
471 | 175k | if (cast<LoadInst>(I)->isVolatile()) |
472 | 3 | return Attribute::None; |
473 | 175k | |
474 | 175k | IsRead = true; |
475 | 175k | break; |
476 | 175k | |
477 | 181k | case Instruction::ICmp: |
478 | 181k | case Instruction::Ret: |
479 | 181k | break; |
480 | 181k | |
481 | 37.6k | default: |
482 | 37.6k | return Attribute::None; |
483 | 802k | } |
484 | 802k | } |
485 | 204k | |
486 | 130k | return IsRead ? 130k Attribute::ReadOnly26.4k : Attribute::ReadNone104k ; |
487 | 204k | } |
488 | | |
489 | | /// Deduce returned attributes for the SCC. |
490 | 726k | static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { |
491 | 726k | bool Changed = false; |
492 | 726k | |
493 | 726k | // Check each function in turn, determining if an argument is always returned. |
494 | 727k | for (Function *F : SCCNodes) { |
495 | 727k | // We can infer and propagate function attributes only when we know that the |
496 | 727k | // definition we'll get at link time is *exactly* the definition we see now. |
497 | 727k | // For more details, see GlobalValue::mayBeDerefined. |
498 | 727k | if (!F->hasExactDefinition()) |
499 | 627k | continue; |
500 | 100k | |
501 | 100k | if (100k F->getReturnType()->isVoidTy()100k ) |
502 | 32.5k | continue; |
503 | 67.7k | |
504 | 67.7k | // There is nothing to do if an argument is already marked as 'returned'. |
505 | 67.7k | if (67.7k any_of(F->args(), |
506 | 213k | [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) |
507 | 930 | continue; |
508 | 66.8k | |
509 | 66.8k | auto FindRetArg = [&]() -> Value * 66.8k { |
510 | 66.8k | Value *RetArg = nullptr; |
511 | 66.8k | for (BasicBlock &BB : *F) |
512 | 673k | if (auto *673k Ret673k = dyn_cast<ReturnInst>(BB.getTerminator())) { |
513 | 66.6k | // Note that stripPointerCasts should look through functions with |
514 | 66.6k | // returned arguments. |
515 | 66.6k | Value *RetVal = Ret->getReturnValue()->stripPointerCasts(); |
516 | 66.6k | if (!isa<Argument>(RetVal) || 66.6k RetVal->getType() != F->getReturnType()657 ) |
517 | 66.0k | return nullptr; |
518 | 646 | |
519 | 646 | if (646 !RetArg646 ) |
520 | 646 | RetArg = RetVal; |
521 | 0 | else if (0 RetArg != RetVal0 ) |
522 | 0 | return nullptr; |
523 | 813 | } |
524 | 813 | |
525 | 813 | return RetArg; |
526 | 813 | }; |
527 | 66.8k | |
528 | 66.8k | if (Value *RetArg66.8k = FindRetArg()) { |
529 | 646 | auto *A = cast<Argument>(RetArg); |
530 | 646 | A->addAttr(Attribute::Returned); |
531 | 646 | ++NumReturned; |
532 | 646 | Changed = true; |
533 | 646 | } |
534 | 727k | } |
535 | 726k | |
536 | 726k | return Changed; |
537 | 726k | } |
538 | | |
539 | | /// If a callsite has arguments that are also arguments to the parent function, |
540 | | /// try to propagate attributes from the callsite's arguments to the parent's |
541 | | /// arguments. This may be important because inlining can cause information loss |
542 | | /// when attribute knowledge disappears with the inlined call. |
543 | 100k | static bool addArgumentAttrsFromCallsites(Function &F) { |
544 | 100k | if (!EnableNonnullArgPropagation) |
545 | 100k | return false; |
546 | 19 | |
547 | 19 | bool Changed = false; |
548 | 19 | |
549 | 19 | // For an argument attribute to transfer from a callsite to the parent, the |
550 | 19 | // call must be guaranteed to execute every time the parent is called. |
551 | 19 | // Conservatively, just check for calls in the entry block that are guaranteed |
552 | 19 | // to execute. |
553 | 19 | // TODO: This could be enhanced by testing if the callsite post-dominates the |
554 | 19 | // entry block or by doing simple forward walks or backward walks to the |
555 | 19 | // callsite. |
556 | 19 | BasicBlock &Entry = F.getEntryBlock(); |
557 | 22 | for (Instruction &I : Entry) { |
558 | 22 | if (auto CS22 = CallSite(&I)) { |
559 | 15 | if (auto *CalledFunc15 = CS.getCalledFunction()) { |
560 | 13 | for (auto &CSArg : CalledFunc->args()) { |
561 | 13 | if (!CSArg.hasNonNullAttr()) |
562 | 4 | continue; |
563 | 9 | |
564 | 9 | // If the non-null callsite argument operand is an argument to 'F' |
565 | 9 | // (the caller) and the call is guaranteed to execute, then the value |
566 | 9 | // must be non-null throughout 'F'. |
567 | 9 | auto *FArg = dyn_cast<Argument>(CS.getArgOperand(CSArg.getArgNo())); |
568 | 9 | if (FArg && 9 !FArg->hasNonNullAttr()9 ) { |
569 | 9 | FArg->addAttr(Attribute::NonNull); |
570 | 9 | Changed = true; |
571 | 9 | } |
572 | 13 | } |
573 | 15 | } |
574 | 15 | } |
575 | 22 | if (!isGuaranteedToTransferExecutionToSuccessor(&I)) |
576 | 18 | break; |
577 | 19 | } |
578 | 19 | |
579 | 19 | return Changed; |
580 | 100k | } |
581 | | |
582 | | /// Deduce nocapture attributes for the SCC. |
583 | 726k | static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { |
584 | 726k | bool Changed = false; |
585 | 726k | |
586 | 726k | ArgumentGraph AG; |
587 | 726k | |
588 | 726k | // Check each function in turn, determining which pointer arguments are not |
589 | 726k | // captured. |
590 | 727k | for (Function *F : SCCNodes) { |
591 | 727k | // We can infer and propagate function attributes only when we know that the |
592 | 727k | // definition we'll get at link time is *exactly* the definition we see now. |
593 | 727k | // For more details, see GlobalValue::mayBeDerefined. |
594 | 727k | if (!F->hasExactDefinition()) |
595 | 627k | continue; |
596 | 100k | |
597 | 100k | Changed |= addArgumentAttrsFromCallsites(*F); |
598 | 100k | |
599 | 100k | // Functions that are readonly (or readnone) and nounwind and don't return |
600 | 100k | // a value can't capture arguments. Don't analyze them. |
601 | 100k | if (F->onlyReadsMemory() && 100k F->doesNotThrow()28.1k && |
602 | 100k | F->getReturnType()->isVoidTy()27.7k ) { |
603 | 3.16k | for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; |
604 | 2.02k | ++A1.14k ) { |
605 | 1.14k | if (A->getType()->isPointerTy() && 1.14k !A->hasNoCaptureAttr()526 ) { |
606 | 526 | A->addAttr(Attribute::NoCapture); |
607 | 526 | ++NumNoCapture; |
608 | 526 | Changed = true; |
609 | 526 | } |
610 | 1.14k | } |
611 | 2.02k | continue; |
612 | 2.02k | } |
613 | 98.2k | |
614 | 359k | for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); 98.2k A != E359k ; |
615 | 261k | ++A261k ) { |
616 | 261k | if (!A->getType()->isPointerTy()) |
617 | 55.8k | continue; |
618 | 205k | bool HasNonLocalUses = false; |
619 | 205k | if (!A->hasNoCaptureAttr()205k ) { |
620 | 204k | ArgumentUsesTracker Tracker(SCCNodes); |
621 | 204k | PointerMayBeCaptured(&*A, &Tracker); |
622 | 204k | if (!Tracker.Captured204k ) { |
623 | 44.7k | if (Tracker.Uses.empty()44.7k ) { |
624 | 43.8k | // If it's trivially not captured, mark it nocapture now. |
625 | 43.8k | A->addAttr(Attribute::NoCapture); |
626 | 43.8k | ++NumNoCapture; |
627 | 43.8k | Changed = true; |
628 | 44.7k | } else { |
629 | 872 | // If it's not trivially captured and not trivially not captured, |
630 | 872 | // then it must be calling into another function in our SCC. Save |
631 | 872 | // its particulars for Argument-SCC analysis later. |
632 | 872 | ArgumentGraphNode *Node = AG[&*A]; |
633 | 1.50k | for (Argument *Use : Tracker.Uses) { |
634 | 1.50k | Node->Uses.push_back(AG[Use]); |
635 | 1.50k | if (Use != &*A) |
636 | 698 | HasNonLocalUses = true; |
637 | 1.50k | } |
638 | 872 | } |
639 | 44.7k | } |
640 | 204k | // Otherwise, it's captured. Don't bother doing SCC analysis on it. |
641 | 204k | } |
642 | 205k | if (!HasNonLocalUses && 205k !A->onlyReadsMemory()204k ) { |
643 | 204k | // Can we determine that it's readonly/readnone without doing an SCC? |
644 | 204k | // Note that we don't allow any calls at all here, or else our result |
645 | 204k | // will be dependent on the iteration order through the functions in the |
646 | 204k | // SCC. |
647 | 204k | SmallPtrSet<Argument *, 8> Self; |
648 | 204k | Self.insert(&*A); |
649 | 204k | Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); |
650 | 204k | if (R != Attribute::None204k ) { |
651 | 130k | A->addAttr(R); |
652 | 130k | Changed = true; |
653 | 130k | R == Attribute::ReadOnly ? ++NumReadOnlyArg26.4k : ++NumReadNoneArg104k ; |
654 | 130k | } |
655 | 204k | } |
656 | 261k | } |
657 | 727k | } |
658 | 726k | |
659 | 726k | // The graph we've collected is partial because we stopped scanning for |
660 | 726k | // argument uses once we solved the argument trivially. These partial nodes |
661 | 726k | // show up as ArgumentGraphNode objects with an empty Uses list, and for |
662 | 726k | // these nodes the final decision about whether they capture has already been |
663 | 726k | // made. If the definition doesn't have a 'nocapture' attribute by now, it |
664 | 726k | // captures. |
665 | 726k | |
666 | 1.45M | for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd()1.45M ; ++I727k ) { |
667 | 727k | const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; |
668 | 727k | if (ArgumentSCC.size() == 1727k ) { |
669 | 727k | if (!ArgumentSCC[0]->Definition) |
670 | 726k | continue; // synthetic root node |
671 | 961 | |
672 | 961 | // eg. "void f(int* x) { if (...) f(x); }" |
673 | 961 | if (961 ArgumentSCC[0]->Uses.size() == 1 && |
674 | 961 | ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]501 ) { |
675 | 222 | Argument *A = ArgumentSCC[0]->Definition; |
676 | 222 | A->addAttr(Attribute::NoCapture); |
677 | 222 | ++NumNoCapture; |
678 | 222 | Changed = true; |
679 | 222 | } |
680 | 727k | continue; |
681 | 727k | } |
682 | 40 | |
683 | 40 | bool SCCCaptured = false; |
684 | 40 | for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); |
685 | 130 | I != E && 130 !SCCCaptured90 ; ++I90 ) { |
686 | 90 | ArgumentGraphNode *Node = *I; |
687 | 90 | if (Node->Uses.empty()90 ) { |
688 | 0 | if (!Node->Definition->hasNoCaptureAttr()) |
689 | 0 | SCCCaptured = true; |
690 | 0 | } |
691 | 90 | } |
692 | 40 | if (SCCCaptured) |
693 | 0 | continue; |
694 | 40 | |
695 | 40 | SmallPtrSet<Argument *, 8> ArgumentSCCNodes; |
696 | 40 | // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for |
697 | 40 | // quickly looking up whether a given Argument is in this ArgumentSCC. |
698 | 90 | for (ArgumentGraphNode *I : ArgumentSCC) { |
699 | 90 | ArgumentSCCNodes.insert(I->Definition); |
700 | 90 | } |
701 | 40 | |
702 | 40 | for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); |
703 | 130 | I != E && 130 !SCCCaptured90 ; ++I90 ) { |
704 | 90 | ArgumentGraphNode *N = *I; |
705 | 162 | for (ArgumentGraphNode *Use : N->Uses) { |
706 | 162 | Argument *A = Use->Definition; |
707 | 162 | if (A->hasNoCaptureAttr() || 162 ArgumentSCCNodes.count(A)162 ) |
708 | 162 | continue; |
709 | 0 | SCCCaptured = true; |
710 | 0 | break; |
711 | 0 | } |
712 | 90 | } |
713 | 40 | if (SCCCaptured) |
714 | 0 | continue; |
715 | 40 | |
716 | 130 | for (unsigned i = 0, e = ArgumentSCC.size(); 40 i != e130 ; ++i90 ) { |
717 | 90 | Argument *A = ArgumentSCC[i]->Definition; |
718 | 90 | A->addAttr(Attribute::NoCapture); |
719 | 90 | ++NumNoCapture; |
720 | 90 | Changed = true; |
721 | 90 | } |
722 | 40 | |
723 | 40 | // We also want to compute readonly/readnone. With a small number of false |
724 | 40 | // negatives, we can assume that any pointer which is captured isn't going |
725 | 40 | // to be provably readonly or readnone, since by definition we can't |
726 | 40 | // analyze all uses of a captured pointer. |
727 | 40 | // |
728 | 40 | // The false negatives happen when the pointer is captured by a function |
729 | 40 | // that promises readonly/readnone behaviour on the pointer, then the |
730 | 40 | // pointer's lifetime ends before anything that writes to arbitrary memory. |
731 | 40 | // Also, a readonly/readnone pointer may be returned, but returning a |
732 | 40 | // pointer is capturing it. |
733 | 40 | |
734 | 40 | Attribute::AttrKind ReadAttr = Attribute::ReadNone; |
735 | 112 | for (unsigned i = 0, e = ArgumentSCC.size(); i != e112 ; ++i72 ) { |
736 | 82 | Argument *A = ArgumentSCC[i]->Definition; |
737 | 82 | Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); |
738 | 82 | if (K == Attribute::ReadNone) |
739 | 26 | continue; |
740 | 56 | if (56 K == Attribute::ReadOnly56 ) { |
741 | 46 | ReadAttr = Attribute::ReadOnly; |
742 | 46 | continue; |
743 | 46 | } |
744 | 10 | ReadAttr = K; |
745 | 10 | break; |
746 | 10 | } |
747 | 40 | |
748 | 40 | if (ReadAttr != Attribute::None40 ) { |
749 | 101 | for (unsigned i = 0, e = ArgumentSCC.size(); i != e101 ; ++i70 ) { |
750 | 70 | Argument *A = ArgumentSCC[i]->Definition; |
751 | 70 | // Clear out existing readonly/readnone attributes |
752 | 70 | A->removeAttr(Attribute::ReadOnly); |
753 | 70 | A->removeAttr(Attribute::ReadNone); |
754 | 70 | A->addAttr(ReadAttr); |
755 | 70 | ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg64 : ++NumReadNoneArg6 ; |
756 | 70 | Changed = true; |
757 | 70 | } |
758 | 31 | } |
759 | 727k | } |
760 | 726k | |
761 | 726k | return Changed; |
762 | 726k | } |
763 | | |
764 | | /// Tests whether a function is "malloc-like". |
765 | | /// |
766 | | /// A function is "malloc-like" if it returns either null or a pointer that |
767 | | /// doesn't alias any other pointer visible to the caller. |
768 | 15.3k | static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { |
769 | 15.3k | SmallSetVector<Value *, 8> FlowsToReturn; |
770 | 15.3k | for (BasicBlock &BB : *F) |
771 | 107k | if (ReturnInst *107k Ret107k = dyn_cast<ReturnInst>(BB.getTerminator())) |
772 | 15.3k | FlowsToReturn.insert(Ret->getReturnValue()); |
773 | 15.3k | |
774 | 26.6k | for (unsigned i = 0; i != FlowsToReturn.size()26.6k ; ++i11.3k ) { |
775 | 25.6k | Value *RetVal = FlowsToReturn[i]; |
776 | 25.6k | |
777 | 25.6k | if (Constant *C25.6k = dyn_cast<Constant>(RetVal)) { |
778 | 8.02k | if (!C->isNullValue() && 8.02k !isa<UndefValue>(C)6.22k ) |
779 | 6.05k | return false; |
780 | 1.96k | |
781 | 1.96k | continue; |
782 | 1.96k | } |
783 | 17.6k | |
784 | 17.6k | if (17.6k isa<Argument>(RetVal)17.6k ) |
785 | 2.18k | return false; |
786 | 15.4k | |
787 | 15.4k | if (Instruction *15.4k RVI15.4k = dyn_cast<Instruction>(RetVal)) |
788 | 15.4k | switch (RVI->getOpcode()) { |
789 | 15.4k | // Extend the analysis by looking upwards. |
790 | 3.00k | case Instruction::BitCast: |
791 | 3.00k | case Instruction::GetElementPtr: |
792 | 3.00k | case Instruction::AddrSpaceCast: |
793 | 3.00k | FlowsToReturn.insert(RVI->getOperand(0)); |
794 | 3.00k | continue; |
795 | 519 | case Instruction::Select: { |
796 | 519 | SelectInst *SI = cast<SelectInst>(RVI); |
797 | 519 | FlowsToReturn.insert(SI->getTrueValue()); |
798 | 519 | FlowsToReturn.insert(SI->getFalseValue()); |
799 | 519 | continue; |
800 | 3.00k | } |
801 | 5.14k | case Instruction::PHI: { |
802 | 5.14k | PHINode *PN = cast<PHINode>(RVI); |
803 | 5.14k | for (Value *IncValue : PN->incoming_values()) |
804 | 15.1k | FlowsToReturn.insert(IncValue); |
805 | 5.14k | continue; |
806 | 3.00k | } |
807 | 3.00k | |
808 | 3.00k | // Check whether the pointer came from an allocation. |
809 | 2 | case Instruction::Alloca: |
810 | 2 | break; |
811 | 3.76k | case Instruction::Call: |
812 | 3.76k | case Instruction::Invoke: { |
813 | 3.76k | CallSite CS(RVI); |
814 | 3.76k | if (CS.hasRetAttr(Attribute::NoAlias)) |
815 | 963 | break; |
816 | 2.79k | if (2.79k CS.getCalledFunction() && 2.79k SCCNodes.count(CS.getCalledFunction())2.54k ) |
817 | 123 | break; |
818 | 2.67k | LLVM_FALLTHROUGH2.67k ; |
819 | 2.67k | } |
820 | 5.72k | default: |
821 | 5.72k | return false; // Did not come from an allocation. |
822 | 1.08k | } |
823 | 1.08k | |
824 | 1.08k | if (1.08k PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)1.08k ) |
825 | 416 | return false; |
826 | 25.6k | } |
827 | 15.3k | |
828 | 949 | return true; |
829 | 15.3k | } |
830 | | |
831 | | /// Deduce noalias attributes for the SCC. |
832 | 726k | static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { |
833 | 726k | // Check each function in turn, determining which functions return noalias |
834 | 726k | // pointers. |
835 | 727k | for (Function *F : SCCNodes) { |
836 | 727k | // Already noalias. |
837 | 727k | if (F->returnDoesNotAlias()) |
838 | 7.16k | continue; |
839 | 720k | |
840 | 720k | // We can infer and propagate function attributes only when we know that the |
841 | 720k | // definition we'll get at link time is *exactly* the definition we see now. |
842 | 720k | // For more details, see GlobalValue::mayBeDerefined. |
843 | 720k | if (720k !F->hasExactDefinition()720k ) |
844 | 620k | return false; |
845 | 100k | |
846 | 100k | // We annotate noalias return values, which are only applicable to |
847 | 100k | // pointer types. |
848 | 100k | if (100k !F->getReturnType()->isPointerTy()100k ) |
849 | 84.7k | continue; |
850 | 15.3k | |
851 | 15.3k | if (15.3k !isFunctionMallocLike(F, SCCNodes)15.3k ) |
852 | 14.3k | return false; |
853 | 92.3k | } |
854 | 92.3k | |
855 | 92.3k | bool MadeChange = false; |
856 | 92.7k | for (Function *F : SCCNodes) { |
857 | 92.7k | if (F->returnDoesNotAlias() || |
858 | 85.5k | !F->getReturnType()->isPointerTy()) |
859 | 91.7k | continue; |
860 | 945 | |
861 | 945 | F->setReturnDoesNotAlias(); |
862 | 945 | ++NumNoAlias; |
863 | 945 | MadeChange = true; |
864 | 945 | } |
865 | 726k | |
866 | 726k | return MadeChange; |
867 | 726k | } |
868 | | |
869 | | /// Tests whether this function is known to not return null. |
870 | | /// |
871 | | /// Requires that the function returns a pointer. |
872 | | /// |
873 | | /// Returns true if it believes the function will not return a null, and sets |
874 | | /// \p Speculative based on whether the returned conclusion is a speculative |
875 | | /// conclusion due to SCC calls. |
876 | | static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, |
877 | 15.4k | bool &Speculative) { |
878 | 15.4k | assert(F->getReturnType()->isPointerTy() && |
879 | 15.4k | "nonnull only meaningful on pointer types"); |
880 | 15.4k | Speculative = false; |
881 | 15.4k | |
882 | 15.4k | SmallSetVector<Value *, 8> FlowsToReturn; |
883 | 15.4k | for (BasicBlock &BB : *F) |
884 | 117k | if (auto *117k Ret117k = dyn_cast<ReturnInst>(BB.getTerminator())) |
885 | 15.4k | FlowsToReturn.insert(Ret->getReturnValue()); |
886 | 15.4k | |
887 | 15.4k | auto &DL = F->getParent()->getDataLayout(); |
888 | 15.4k | |
889 | 28.0k | for (unsigned i = 0; i != FlowsToReturn.size()28.0k ; ++i12.6k ) { |
890 | 22.0k | Value *RetVal = FlowsToReturn[i]; |
891 | 22.0k | |
892 | 22.0k | // If this value is locally known to be non-null, we're good |
893 | 22.0k | if (isKnownNonZero(RetVal, DL)) |
894 | 6.21k | continue; |
895 | 15.8k | |
896 | 15.8k | // Otherwise, we need to look upwards since we can't make any local |
897 | 15.8k | // conclusions. |
898 | 15.8k | Instruction *RVI = dyn_cast<Instruction>(RetVal); |
899 | 15.8k | if (!RVI) |
900 | 3.98k | return false; |
901 | 11.8k | switch (RVI->getOpcode()) { |
902 | 11.8k | // Extend the analysis by looking upwards. |
903 | 2.07k | case Instruction::BitCast: |
904 | 2.07k | case Instruction::GetElementPtr: |
905 | 2.07k | case Instruction::AddrSpaceCast: |
906 | 2.07k | FlowsToReturn.insert(RVI->getOperand(0)); |
907 | 2.07k | continue; |
908 | 380 | case Instruction::Select: { |
909 | 380 | SelectInst *SI = cast<SelectInst>(RVI); |
910 | 380 | FlowsToReturn.insert(SI->getTrueValue()); |
911 | 380 | FlowsToReturn.insert(SI->getFalseValue()); |
912 | 380 | continue; |
913 | 2.07k | } |
914 | 3.92k | case Instruction::PHI: { |
915 | 3.92k | PHINode *PN = cast<PHINode>(RVI); |
916 | 16.1k | for (int i = 0, e = PN->getNumIncomingValues(); i != e16.1k ; ++i12.2k ) |
917 | 12.2k | FlowsToReturn.insert(PN->getIncomingValue(i)); |
918 | 3.92k | continue; |
919 | 2.07k | } |
920 | 2.80k | case Instruction::Call: |
921 | 2.80k | case Instruction::Invoke: { |
922 | 2.80k | CallSite CS(RVI); |
923 | 2.80k | Function *Callee = CS.getCalledFunction(); |
924 | 2.80k | // A call to a node within the SCC is assumed to return null until |
925 | 2.80k | // proven otherwise |
926 | 2.80k | if (Callee && 2.80k SCCNodes.count(Callee)2.56k ) { |
927 | 93 | Speculative = true; |
928 | 93 | continue; |
929 | 93 | } |
930 | 2.70k | return false; |
931 | 2.70k | } |
932 | 2.65k | default: |
933 | 2.65k | return false; // Unknown source, may be null |
934 | 0 | }; |
935 | 0 | llvm_unreachable("should have either continued or returned"); |
936 | 22.0k | } |
937 | 15.4k | |
938 | 6.05k | return true; |
939 | 15.4k | } |
940 | | |
941 | | /// Deduce nonnull attributes for the SCC. |
942 | 726k | static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) { |
943 | 726k | // Speculative that all functions in the SCC return only nonnull |
944 | 726k | // pointers. We may refute this as we analyze functions. |
945 | 726k | bool SCCReturnsNonNull = true; |
946 | 726k | |
947 | 726k | bool MadeChange = false; |
948 | 726k | |
949 | 726k | // Check each function in turn, determining which functions return nonnull |
950 | 726k | // pointers. |
951 | 727k | for (Function *F : SCCNodes) { |
952 | 727k | // Already nonnull. |
953 | 727k | if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, |
954 | 727k | Attribute::NonNull)) |
955 | 423 | continue; |
956 | 727k | |
957 | 727k | // We can infer and propagate function attributes only when we know that the |
958 | 727k | // definition we'll get at link time is *exactly* the definition we see now. |
959 | 727k | // For more details, see GlobalValue::mayBeDerefined. |
960 | 727k | if (727k !F->hasExactDefinition()727k ) |
961 | 626k | return false; |
962 | 100k | |
963 | 100k | // We annotate nonnull return values, which are only applicable to |
964 | 100k | // pointer types. |
965 | 100k | if (100k !F->getReturnType()->isPointerTy()100k ) |
966 | 84.8k | continue; |
967 | 15.4k | |
968 | 15.4k | bool Speculative = false; |
969 | 15.4k | if (isReturnNonNull(F, SCCNodes, Speculative)15.4k ) { |
970 | 6.05k | if (!Speculative6.05k ) { |
971 | 6.04k | // Mark the function eagerly since we may discover a function |
972 | 6.04k | // which prevents us from speculating about the entire SCC |
973 | 6.04k | DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n"); |
974 | 6.04k | F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); |
975 | 6.04k | ++NumNonNullReturn; |
976 | 6.04k | MadeChange = true; |
977 | 6.04k | } |
978 | 6.05k | continue; |
979 | 6.05k | } |
980 | 9.34k | // At least one function returns something which could be null, can't |
981 | 9.34k | // speculate any more. |
982 | 9.34k | SCCReturnsNonNull = false; |
983 | 9.34k | } |
984 | 726k | |
985 | 100k | if (100k SCCReturnsNonNull100k ) { |
986 | 91.1k | for (Function *F : SCCNodes) { |
987 | 91.1k | if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, |
988 | 91.1k | Attribute::NonNull) || |
989 | 84.6k | !F->getReturnType()->isPointerTy()) |
990 | 91.1k | continue; |
991 | 2 | |
992 | 2 | DEBUG2 (dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); |
993 | 2 | F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); |
994 | 2 | ++NumNonNullReturn; |
995 | 2 | MadeChange = true; |
996 | 2 | } |
997 | 90.7k | } |
998 | 100k | |
999 | 100k | return MadeChange; |
1000 | 726k | } |
1001 | | |
1002 | | /// Remove the convergent attribute from all functions in the SCC if every |
1003 | | /// callsite within the SCC is not convergent (except for calls to functions |
1004 | | /// within the SCC). Returns true if changes were made. |
1005 | 726k | static bool removeConvergentAttrs(const SCCNodeSet &SCCNodes) { |
1006 | 726k | // For every function in SCC, ensure that either |
1007 | 726k | // * it is not convergent, or |
1008 | 726k | // * we can remove its convergent attribute. |
1009 | 726k | bool HasConvergentFn = false; |
1010 | 727k | for (Function *F : SCCNodes) { |
1011 | 727k | if (!F->isConvergent()727k ) continue727k ; |
1012 | 37 | HasConvergentFn = true; |
1013 | 37 | |
1014 | 37 | // Can't remove convergent from function declarations. |
1015 | 37 | if (F->isDeclaration()37 ) return false27 ; |
1016 | 10 | |
1017 | 10 | // Can't remove convergent if any of our functions has a convergent call to a |
1018 | 10 | // function not in the SCC. |
1019 | 10 | for (Instruction &I : instructions(*F)) 10 { |
1020 | 13 | CallSite CS(&I); |
1021 | 13 | // Bail if CS is a convergent call to a function not in the SCC. |
1022 | 13 | if (CS && 13 CS.isConvergent()8 && |
1023 | 6 | SCCNodes.count(CS.getCalledFunction()) == 0) |
1024 | 4 | return false; |
1025 | 726k | } |
1026 | 727k | } |
1027 | 726k | |
1028 | 726k | // If the SCC doesn't have any convergent functions, we have nothing to do. |
1029 | 726k | if (726k !HasConvergentFn726k ) return false726k ; |
1030 | 6 | |
1031 | 6 | // If we got here, all of the calls the SCC makes to functions not in the SCC |
1032 | 6 | // are non-convergent. Therefore all of the SCC's functions can also be made |
1033 | 6 | // non-convergent. We'll remove the attr from the callsites in |
1034 | 6 | // InstCombineCalls. |
1035 | 6 | for (Function *F : SCCNodes) 6 { |
1036 | 5 | if (!F->isConvergent()5 ) continue0 ; |
1037 | 5 | |
1038 | 5 | DEBUG5 (dbgs() << "Removing convergent attr from fn " << F->getName() |
1039 | 5 | << "\n"); |
1040 | 5 | F->setNotConvergent(); |
1041 | 5 | } |
1042 | 726k | return true; |
1043 | 726k | } |
1044 | | |
1045 | 47.5k | static bool setDoesNotRecurse(Function &F) { |
1046 | 47.5k | if (F.doesNotRecurse()) |
1047 | 0 | return false; |
1048 | 47.5k | F.setDoesNotRecurse(); |
1049 | 47.5k | ++NumNoRecurse; |
1050 | 47.5k | return true; |
1051 | 47.5k | } |
1052 | | |
1053 | 726k | static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { |
1054 | 726k | // Try and identify functions that do not recurse. |
1055 | 726k | |
1056 | 726k | // If the SCC contains multiple nodes we know for sure there is recursion. |
1057 | 726k | if (SCCNodes.size() != 1) |
1058 | 271 | return false; |
1059 | 726k | |
1060 | 726k | Function *F = *SCCNodes.begin(); |
1061 | 726k | if (!F || 726k F->isDeclaration()726k || F->doesNotRecurse()515k ) |
1062 | 283k | return false; |
1063 | 443k | |
1064 | 443k | // If all of the calls in F are identifiable and are to norecurse functions, F |
1065 | 443k | // is norecurse. This check also detects self-recursion as F is not currently |
1066 | 443k | // marked norecurse, so any called from F to F will not be marked norecurse. |
1067 | 443k | for (Instruction &I : instructions(*F)) |
1068 | 2.83M | if (auto 2.83M CS2.83M = CallSite(&I)) { |
1069 | 455k | Function *Callee = CS.getCalledFunction(); |
1070 | 455k | if (!Callee || 455k Callee == F403k || !Callee->doesNotRecurse()402k ) |
1071 | 455k | // Function calls a potentially recursive function. |
1072 | 395k | return false; |
1073 | 47.5k | } |
1074 | 47.5k | |
1075 | 47.5k | // Every call was to a non-recursive function other than this function, and |
1076 | 47.5k | // we have no indirect recursion as the SCC size is one. This function cannot |
1077 | 47.5k | // recurse. |
1078 | 47.5k | return setDoesNotRecurse(*F); |
1079 | 47.5k | } |
1080 | | |
1081 | | PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, |
1082 | | CGSCCAnalysisManager &AM, |
1083 | | LazyCallGraph &CG, |
1084 | 271 | CGSCCUpdateResult &) { |
1085 | 271 | FunctionAnalysisManager &FAM = |
1086 | 271 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); |
1087 | 271 | |
1088 | 271 | // We pass a lambda into functions to wire them up to the analysis manager |
1089 | 271 | // for getting function analyses. |
1090 | 311 | auto AARGetter = [&](Function &F) -> AAResults & { |
1091 | 311 | return FAM.getResult<AAManager>(F); |
1092 | 311 | }; |
1093 | 271 | |
1094 | 271 | // Fill SCCNodes with the elements of the SCC. Also track whether there are |
1095 | 271 | // any external or opt-none nodes that will prevent us from optimizing any |
1096 | 271 | // part of the SCC. |
1097 | 271 | SCCNodeSet SCCNodes; |
1098 | 271 | bool HasUnknownCall = false; |
1099 | 346 | for (LazyCallGraph::Node &N : C) { |
1100 | 346 | Function &F = N.getFunction(); |
1101 | 346 | if (F.hasFnAttribute(Attribute::OptimizeNone)346 ) { |
1102 | 0 | // Treat any function we're trying not to optimize as if it were an |
1103 | 0 | // indirect call and omit it from the node set used below. |
1104 | 0 | HasUnknownCall = true; |
1105 | 0 | continue; |
1106 | 0 | } |
1107 | 346 | // Track whether any functions in this SCC have an unknown call edge. |
1108 | 346 | // Note: if this is ever a performance hit, we can common it with |
1109 | 346 | // subsequent routines which also do scans over the instructions of the |
1110 | 346 | // function. |
1111 | 346 | if (346 !HasUnknownCall346 ) |
1112 | 341 | for (Instruction &I : instructions(F)) |
1113 | 1.36k | if (auto 1.36k CS1.36k = CallSite(&I)) |
1114 | 434 | if (434 !CS.getCalledFunction()434 ) { |
1115 | 33 | HasUnknownCall = true; |
1116 | 33 | break; |
1117 | 33 | } |
1118 | 346 | |
1119 | 346 | SCCNodes.insert(&F); |
1120 | 346 | } |
1121 | 271 | |
1122 | 271 | bool Changed = false; |
1123 | 271 | Changed |= addArgumentReturnedAttrs(SCCNodes); |
1124 | 271 | Changed |= addReadAttrs(SCCNodes, AARGetter); |
1125 | 271 | Changed |= addArgumentAttrs(SCCNodes); |
1126 | 271 | |
1127 | 271 | // If we have no external nodes participating in the SCC, we can deduce some |
1128 | 271 | // more precise attributes as well. |
1129 | 271 | if (!HasUnknownCall271 ) { |
1130 | 238 | Changed |= addNoAliasAttrs(SCCNodes); |
1131 | 238 | Changed |= addNonNullAttrs(SCCNodes); |
1132 | 238 | Changed |= removeConvergentAttrs(SCCNodes); |
1133 | 238 | Changed |= addNoRecurseAttrs(SCCNodes); |
1134 | 238 | } |
1135 | 271 | |
1136 | 271 | return Changed ? PreservedAnalyses::none()136 : PreservedAnalyses::all()135 ; |
1137 | 271 | } |
1138 | | |
1139 | | namespace { |
1140 | | struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { |
1141 | | static char ID; // Pass identification, replacement for typeid |
1142 | 17.6k | PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { |
1143 | 17.6k | initializePostOrderFunctionAttrsLegacyPassPass( |
1144 | 17.6k | *PassRegistry::getPassRegistry()); |
1145 | 17.6k | } |
1146 | | |
1147 | | bool runOnSCC(CallGraphSCC &SCC) override; |
1148 | | |
1149 | 17.6k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1150 | 17.6k | AU.setPreservesCFG(); |
1151 | 17.6k | AU.addRequired<AssumptionCacheTracker>(); |
1152 | 17.6k | getAAResultsAnalysisUsage(AU); |
1153 | 17.6k | CallGraphSCCPass::getAnalysisUsage(AU); |
1154 | 17.6k | } |
1155 | | }; |
1156 | | } |
1157 | | |
1158 | | char PostOrderFunctionAttrsLegacyPass::ID = 0; |
1159 | 25.1k | INITIALIZE_PASS_BEGIN25.1k (PostOrderFunctionAttrsLegacyPass, "functionattrs",
|
1160 | 25.1k | "Deduce function attributes", false, false) |
1161 | 25.1k | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
1162 | 25.1k | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) |
1163 | 25.1k | INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs", |
1164 | | "Deduce function attributes", false, false) |
1165 | | |
1166 | 17.6k | Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { |
1167 | 17.6k | return new PostOrderFunctionAttrsLegacyPass(); |
1168 | 17.6k | } |
1169 | | |
1170 | | template <typename AARGetterT> |
1171 | 758k | static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { |
1172 | 758k | bool Changed = false; |
1173 | 758k | |
1174 | 758k | // Fill SCCNodes with the elements of the SCC. Used for quickly looking up |
1175 | 758k | // whether a given CallGraphNode is in this SCC. Also track whether there are |
1176 | 758k | // any external or opt-none nodes that will prevent us from optimizing any |
1177 | 758k | // part of the SCC. |
1178 | 758k | SCCNodeSet SCCNodes; |
1179 | 758k | bool ExternalNode = false; |
1180 | 759k | for (CallGraphNode *I : SCC) { |
1181 | 759k | Function *F = I->getFunction(); |
1182 | 759k | if (!F || 759k F->hasFnAttribute(Attribute::OptimizeNone)727k ) { |
1183 | 31.6k | // External node or function we're trying not to optimize - we both avoid |
1184 | 31.6k | // transform them and avoid leveraging information they provide. |
1185 | 31.6k | ExternalNode = true; |
1186 | 31.6k | continue; |
1187 | 31.6k | } |
1188 | 727k | |
1189 | 727k | SCCNodes.insert(F); |
1190 | 727k | } |
1191 | 758k | |
1192 | 758k | // Skip it if the SCC only contains optnone functions. |
1193 | 758k | if (SCCNodes.empty()) |
1194 | 31.6k | return Changed; |
1195 | 726k | |
1196 | 726k | Changed |= addArgumentReturnedAttrs(SCCNodes); |
1197 | 726k | Changed |= addReadAttrs(SCCNodes, AARGetter); |
1198 | 726k | Changed |= addArgumentAttrs(SCCNodes); |
1199 | 726k | |
1200 | 726k | // If we have no external nodes participating in the SCC, we can deduce some |
1201 | 726k | // more precise attributes as well. |
1202 | 726k | if (!ExternalNode726k ) { |
1203 | 726k | Changed |= addNoAliasAttrs(SCCNodes); |
1204 | 726k | Changed |= addNonNullAttrs(SCCNodes); |
1205 | 726k | Changed |= removeConvergentAttrs(SCCNodes); |
1206 | 726k | Changed |= addNoRecurseAttrs(SCCNodes); |
1207 | 726k | } |
1208 | 758k | |
1209 | 758k | return Changed; |
1210 | 758k | } |
1211 | | |
1212 | 758k | bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { |
1213 | 758k | if (skipSCC(SCC)) |
1214 | 36 | return false; |
1215 | 758k | return runImpl(SCC, LegacyAARGetter(*this)); |
1216 | 758k | } |
1217 | | |
1218 | | namespace { |
1219 | | struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { |
1220 | | static char ID; // Pass identification, replacement for typeid |
1221 | 17.4k | ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { |
1222 | 17.4k | initializeReversePostOrderFunctionAttrsLegacyPassPass( |
1223 | 17.4k | *PassRegistry::getPassRegistry()); |
1224 | 17.4k | } |
1225 | | |
1226 | | bool runOnModule(Module &M) override; |
1227 | | |
1228 | 17.4k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1229 | 17.4k | AU.setPreservesCFG(); |
1230 | 17.4k | AU.addRequired<CallGraphWrapperPass>(); |
1231 | 17.4k | AU.addPreserved<CallGraphWrapperPass>(); |
1232 | 17.4k | } |
1233 | | }; |
1234 | | } |
1235 | | |
1236 | | char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; |
1237 | 25.1k | INITIALIZE_PASS_BEGIN25.1k (ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs",
|
1238 | 25.1k | "Deduce function attributes in RPO", false, false) |
1239 | 25.1k | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) |
1240 | 25.1k | INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs", |
1241 | | "Deduce function attributes in RPO", false, false) |
1242 | | |
1243 | 17.4k | Pass *llvm::createReversePostOrderFunctionAttrsPass() { |
1244 | 17.4k | return new ReversePostOrderFunctionAttrsLegacyPass(); |
1245 | 17.4k | } |
1246 | | |
1247 | 4.43k | static bool addNoRecurseAttrsTopDown(Function &F) { |
1248 | 4.43k | // We check the preconditions for the function prior to calling this to avoid |
1249 | 4.43k | // the cost of building up a reversible post-order list. We assert them here |
1250 | 4.43k | // to make sure none of the invariants this relies on were violated. |
1251 | 4.43k | assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); |
1252 | 4.43k | assert(!F.doesNotRecurse() && |
1253 | 4.43k | "This function has already been deduced as norecurs!"); |
1254 | 4.43k | assert(F.hasInternalLinkage() && |
1255 | 4.43k | "Can only do top-down deduction for internal linkage functions!"); |
1256 | 4.43k | |
1257 | 4.43k | // If F is internal and all of its uses are calls from a non-recursive |
1258 | 4.43k | // functions, then none of its calls could in fact recurse without going |
1259 | 4.43k | // through a function marked norecurse, and so we can mark this function too |
1260 | 4.43k | // as norecurse. Note that the uses must actually be calls -- otherwise |
1261 | 4.43k | // a pointer to this function could be returned from a norecurse function but |
1262 | 4.43k | // this function could be recursively (indirectly) called. Note that this |
1263 | 4.43k | // also detects if F is directly recursive as F is not yet marked as |
1264 | 4.43k | // a norecurse function. |
1265 | 4.43k | for (auto *U : F.users()) { |
1266 | 4.43k | auto *I = dyn_cast<Instruction>(U); |
1267 | 4.43k | if (!I) |
1268 | 1.29k | return false; |
1269 | 3.14k | CallSite CS(I); |
1270 | 3.14k | if (!CS || 3.14k !CS.getParent()->getParent()->doesNotRecurse()2.49k ) |
1271 | 3.13k | return false; |
1272 | 6 | } |
1273 | 6 | return setDoesNotRecurse(F); |
1274 | 6 | } |
1275 | | |
1276 | 17.5k | static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { |
1277 | 17.5k | // We only have a post-order SCC traversal (because SCCs are inherently |
1278 | 17.5k | // discovered in post-order), so we accumulate them in a vector and then walk |
1279 | 17.5k | // it in reverse. This is simpler than using the RPO iterator infrastructure |
1280 | 17.5k | // because we need to combine SCC detection and the PO walk of the call |
1281 | 17.5k | // graph. We can also cheat egregiously because we're primarily interested in |
1282 | 17.5k | // synthesizing norecurse and so we can only save the singular SCCs as SCCs |
1283 | 17.5k | // with multiple functions in them will clearly be recursive. |
1284 | 17.5k | SmallVector<Function *, 16> Worklist; |
1285 | 724k | for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd()724k ; ++I706k ) { |
1286 | 706k | if (I->size() != 1) |
1287 | 113 | continue; |
1288 | 706k | |
1289 | 706k | Function *F = I->front()->getFunction(); |
1290 | 706k | if (F && 706k !F->isDeclaration()675k && !F->doesNotRecurse()462k && |
1291 | 369k | F->hasInternalLinkage()) |
1292 | 4.43k | Worklist.push_back(F); |
1293 | 706k | } |
1294 | 17.5k | |
1295 | 17.5k | bool Changed = false; |
1296 | 17.5k | for (auto *F : reverse(Worklist)) |
1297 | 4.43k | Changed |= addNoRecurseAttrsTopDown(*F); |
1298 | 17.5k | |
1299 | 17.5k | return Changed; |
1300 | 17.5k | } |
1301 | | |
1302 | 17.4k | bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { |
1303 | 17.4k | if (skipModule(M)) |
1304 | 10 | return false; |
1305 | 17.4k | |
1306 | 17.4k | auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); |
1307 | 17.4k | |
1308 | 17.4k | return deduceFunctionAttributeInRPO(M, CG); |
1309 | 17.4k | } |
1310 | | |
1311 | | PreservedAnalyses |
1312 | 50 | ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { |
1313 | 50 | auto &CG = AM.getResult<CallGraphAnalysis>(M); |
1314 | 50 | |
1315 | 50 | if (!deduceFunctionAttributeInRPO(M, CG)) |
1316 | 49 | return PreservedAnalyses::all(); |
1317 | 1 | |
1318 | 1 | PreservedAnalyses PA; |
1319 | 1 | PA.preserve<CallGraphAnalysis>(); |
1320 | 1 | return PA; |
1321 | 1 | } |