/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines type-erased base types and functions for building dataflow |
10 | | // analyses that run over Control-Flow Graphs (CFGs). |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include <algorithm> |
15 | | #include <memory> |
16 | | #include <system_error> |
17 | | #include <utility> |
18 | | #include <vector> |
19 | | |
20 | | #include "clang/AST/DeclCXX.h" |
21 | | #include "clang/AST/OperationKinds.h" |
22 | | #include "clang/AST/StmtVisitor.h" |
23 | | #include "clang/Analysis/Analyses/PostOrderCFGView.h" |
24 | | #include "clang/Analysis/CFG.h" |
25 | | #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" |
26 | | #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" |
27 | | #include "clang/Analysis/FlowSensitive/Transfer.h" |
28 | | #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" |
29 | | #include "clang/Analysis/FlowSensitive/Value.h" |
30 | | #include "llvm/ADT/ArrayRef.h" |
31 | | #include "llvm/ADT/DenseSet.h" |
32 | | #include "llvm/ADT/None.h" |
33 | | #include "llvm/ADT/Optional.h" |
34 | | #include "llvm/ADT/STLExtras.h" |
35 | | #include "llvm/Support/Error.h" |
36 | | #include "llvm/Support/ErrorHandling.h" |
37 | | |
38 | | namespace clang { |
39 | | namespace dataflow { |
40 | | |
41 | | class StmtToEnvMapImpl : public StmtToEnvMap { |
42 | | public: |
43 | | StmtToEnvMapImpl( |
44 | | const ControlFlowContext &CFCtx, |
45 | | llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> |
46 | | BlockToState) |
47 | 10.3k | : CFCtx(CFCtx), BlockToState(BlockToState) {} |
48 | | |
49 | 196 | const Environment *getEnvironment(const Stmt &S) const override { |
50 | 196 | auto BlockIT = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); |
51 | 196 | assert(BlockIT != CFCtx.getStmtToBlock().end()); |
52 | 0 | const auto &State = BlockToState[BlockIT->getSecond()->getBlockID()]; |
53 | 196 | assert(State); |
54 | 0 | return &State.value().Env; |
55 | 196 | } |
56 | | |
57 | | private: |
58 | | const ControlFlowContext &CFCtx; |
59 | | llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState; |
60 | | }; |
61 | | |
62 | | /// Returns the index of `Block` in the successors of `Pred`. |
63 | | static int blockIndexInPredecessor(const CFGBlock &Pred, |
64 | 977 | const CFGBlock &Block) { |
65 | 977 | auto BlockPos = llvm::find_if( |
66 | 1.45k | Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { |
67 | 1.45k | return Succ && Succ->getBlockID() == Block.getBlockID(); |
68 | 1.45k | }); |
69 | 977 | return BlockPos - Pred.succ_begin(); |
70 | 977 | } |
71 | | |
72 | | /// Extends the flow condition of an environment based on a terminator |
73 | | /// statement. |
74 | | class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> { |
75 | | public: |
76 | | TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env, |
77 | | int BlockSuccIdx) |
78 | 977 | : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx) {} |
79 | | |
80 | 674 | void VisitIfStmt(const IfStmt *S) { |
81 | 674 | auto *Cond = S->getCond(); |
82 | 674 | assert(Cond != nullptr); |
83 | 0 | extendFlowCondition(*Cond); |
84 | 674 | } |
85 | | |
86 | 80 | void VisitWhileStmt(const WhileStmt *S) { |
87 | 80 | auto *Cond = S->getCond(); |
88 | 80 | assert(Cond != nullptr); |
89 | 0 | extendFlowCondition(*Cond); |
90 | 80 | } |
91 | | |
92 | 16 | void VisitDoStmt(const DoStmt *S) { |
93 | 16 | auto *Cond = S->getCond(); |
94 | 16 | assert(Cond != nullptr); |
95 | 0 | extendFlowCondition(*Cond); |
96 | 16 | } |
97 | | |
98 | 6 | void VisitForStmt(const ForStmt *S) { |
99 | 6 | auto *Cond = S->getCond(); |
100 | 6 | if (Cond != nullptr) |
101 | 4 | extendFlowCondition(*Cond); |
102 | 6 | } |
103 | | |
104 | 104 | void VisitBinaryOperator(const BinaryOperator *S) { |
105 | 104 | assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); |
106 | 0 | auto *LHS = S->getLHS(); |
107 | 104 | assert(LHS != nullptr); |
108 | 0 | extendFlowCondition(*LHS); |
109 | 104 | } |
110 | | |
111 | 48 | void VisitConditionalOperator(const ConditionalOperator *S) { |
112 | 48 | auto *Cond = S->getCond(); |
113 | 48 | assert(Cond != nullptr); |
114 | 0 | extendFlowCondition(*Cond); |
115 | 48 | } |
116 | | |
117 | | private: |
118 | 926 | void extendFlowCondition(const Expr &Cond) { |
119 | | // The terminator sub-expression might not be evaluated. |
120 | 926 | if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr) |
121 | 156 | transfer(StmtToEnv, Cond, Env); |
122 | | |
123 | | // FIXME: The flow condition must be an r-value, so `SkipPast::None` should |
124 | | // suffice. |
125 | 926 | auto *Val = |
126 | 926 | cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference)); |
127 | | // Value merging depends on flow conditions from different environments |
128 | | // being mutually exclusive -- that is, they cannot both be true in their |
129 | | // entirety (even if they may share some clauses). So, we need *some* value |
130 | | // for the condition expression, even if just an atom. |
131 | 926 | if (Val == nullptr) { |
132 | | // FIXME: Consider introducing a helper for this get-or-create pattern. |
133 | 76 | auto *Loc = Env.getStorageLocation(Cond, SkipPast::None); |
134 | 76 | if (Loc == nullptr) { |
135 | 76 | Loc = &Env.createStorageLocation(Cond); |
136 | 76 | Env.setStorageLocation(Cond, *Loc); |
137 | 76 | } |
138 | 76 | Val = &Env.makeAtomicBoolValue(); |
139 | 76 | Env.setValue(*Loc, *Val); |
140 | 76 | } |
141 | | |
142 | | // The condition must be inverted for the successor that encompasses the |
143 | | // "else" branch, if such exists. |
144 | 926 | if (BlockSuccIdx == 1) |
145 | 463 | Val = &Env.makeNot(*Val); |
146 | | |
147 | 926 | Env.addToFlowCondition(*Val); |
148 | 926 | } |
149 | | |
150 | | const StmtToEnvMap &StmtToEnv; |
151 | | Environment &Env; |
152 | | int BlockSuccIdx; |
153 | | }; |
154 | | |
155 | | /// Computes the input state for a given basic block by joining the output |
156 | | /// states of its predecessors. |
157 | | /// |
158 | | /// Requirements: |
159 | | /// |
160 | | /// All predecessors of `Block` except those with loop back edges must have |
161 | | /// already been transferred. States in `BlockStates` that are set to |
162 | | /// `llvm::None` represent basic blocks that are not evaluated yet. |
163 | | static TypeErasedDataflowAnalysisState computeBlockInputState( |
164 | | const ControlFlowContext &CFCtx, |
165 | | std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, |
166 | | const CFGBlock &Block, const Environment &InitEnv, |
167 | 2.92k | TypeErasedDataflowAnalysis &Analysis) { |
168 | 2.92k | llvm::DenseSet<const CFGBlock *> Preds; |
169 | 2.92k | Preds.insert(Block.pred_begin(), Block.pred_end()); |
170 | 2.92k | if (Block.getTerminator().isTemporaryDtorsBranch()) { |
171 | | // This handles a special case where the code that produced the CFG includes |
172 | | // a conditional operator with a branch that constructs a temporary and |
173 | | // calls a destructor annotated as noreturn. The CFG models this as follows: |
174 | | // |
175 | | // B1 (contains the condition of the conditional operator) - succs: B2, B3 |
176 | | // B2 (contains code that does not call a noreturn destructor) - succs: B4 |
177 | | // B3 (contains code that calls a noreturn destructor) - succs: B4 |
178 | | // B4 (has temporary destructor terminator) - succs: B5, B6 |
179 | | // B5 (noreturn block that is associated with the noreturn destructor call) |
180 | | // B6 (contains code that follows the conditional operator statement) |
181 | | // |
182 | | // The first successor (B5 above) of a basic block with a temporary |
183 | | // destructor terminator (B4 above) is the block that evaluates the |
184 | | // destructor. If that block has a noreturn element then the predecessor |
185 | | // block that constructed the temporary object (B3 above) is effectively a |
186 | | // noreturn block and its state should not be used as input for the state |
187 | | // of the block that has a temporary destructor terminator (B4 above). This |
188 | | // holds regardless of which branch of the ternary operator calls the |
189 | | // noreturn destructor. However, it doesn't cases where a nested ternary |
190 | | // operator includes a branch that contains a noreturn destructor call. |
191 | | // |
192 | | // See `NoreturnDestructorTest` for concrete examples. |
193 | 18 | if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { |
194 | 8 | auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt()); |
195 | 8 | assert(StmtBlock != CFCtx.getStmtToBlock().end()); |
196 | 0 | Preds.erase(StmtBlock->getSecond()); |
197 | 8 | } |
198 | 18 | } |
199 | | |
200 | 0 | llvm::Optional<TypeErasedDataflowAnalysisState> MaybeState; |
201 | 2.92k | bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer(); |
202 | | |
203 | 3.04k | for (const CFGBlock *Pred : Preds) { |
204 | | // Skip if the `Block` is unreachable or control flow cannot get past it. |
205 | 3.04k | if (!Pred || Pred->hasNoReturnElement()3.03k ) |
206 | 22 | continue; |
207 | | |
208 | | // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a |
209 | | // loop back edge to `Block`. |
210 | 3.02k | const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState = |
211 | 3.02k | BlockStates[Pred->getBlockID()]; |
212 | 3.02k | if (!MaybePredState) |
213 | 26 | continue; |
214 | | |
215 | 3.00k | TypeErasedDataflowAnalysisState PredState = MaybePredState.value(); |
216 | 3.00k | if (ApplyBuiltinTransfer) { |
217 | 2.95k | if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { |
218 | 977 | const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates); |
219 | 977 | TerminatorVisitor(StmtToEnv, PredState.Env, |
220 | 977 | blockIndexInPredecessor(*Pred, Block)) |
221 | 977 | .Visit(PredTerminatorStmt); |
222 | 977 | } |
223 | 2.95k | } |
224 | | |
225 | 3.00k | if (MaybeState) { |
226 | 472 | Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); |
227 | 472 | MaybeState->Env.join(PredState.Env, Analysis); |
228 | 2.52k | } else { |
229 | 2.52k | MaybeState = std::move(PredState); |
230 | 2.52k | } |
231 | 3.00k | } |
232 | 2.92k | if (!MaybeState) { |
233 | | // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` |
234 | | // to enable building analyses like computation of dominators that |
235 | | // initialize the state of each basic block differently. |
236 | 391 | MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv); |
237 | 391 | } |
238 | 2.92k | return *MaybeState; |
239 | 2.92k | } |
240 | | |
241 | | /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`. |
242 | | /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it |
243 | | /// is evaluated. |
244 | | static void transferCFGStmt( |
245 | | const ControlFlowContext &CFCtx, |
246 | | llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates, |
247 | | const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, |
248 | | TypeErasedDataflowAnalysisState &State, |
249 | | std::function<void(const CFGStmt &, |
250 | | const TypeErasedDataflowAnalysisState &)> |
251 | 9.34k | HandleTransferredStmt) { |
252 | 9.34k | const Stmt *S = CfgStmt.getStmt(); |
253 | 9.34k | assert(S != nullptr); |
254 | | |
255 | 9.34k | if (Analysis.applyBuiltinTransfer()) |
256 | 9.33k | transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env); |
257 | 9.34k | Analysis.transferTypeErased(S, State.Lattice, State.Env); |
258 | | |
259 | 9.34k | if (HandleTransferredStmt != nullptr) |
260 | 4.58k | HandleTransferredStmt(CfgStmt, State); |
261 | 9.34k | } |
262 | | |
263 | | /// Transfers `State` by evaluating `CfgInit`. |
264 | | static void transferCFGInitializer(const CFGInitializer &CfgInit, |
265 | 12 | TypeErasedDataflowAnalysisState &State) { |
266 | 12 | const auto &ThisLoc = *cast<AggregateStorageLocation>( |
267 | 12 | State.Env.getThisPointeeStorageLocation()); |
268 | | |
269 | 12 | const CXXCtorInitializer *Initializer = CfgInit.getInitializer(); |
270 | 12 | assert(Initializer != nullptr); |
271 | | |
272 | 0 | const FieldDecl *Member = Initializer->getMember(); |
273 | 12 | if (Member == nullptr) |
274 | | // Not a field initializer. |
275 | 2 | return; |
276 | | |
277 | 10 | auto *InitStmt = Initializer->getInit(); |
278 | 10 | assert(InitStmt != nullptr); |
279 | | |
280 | 0 | auto *InitStmtLoc = |
281 | 10 | State.Env.getStorageLocation(*InitStmt, SkipPast::Reference); |
282 | 10 | if (InitStmtLoc == nullptr) |
283 | 4 | return; |
284 | | |
285 | 6 | auto *InitStmtVal = State.Env.getValue(*InitStmtLoc); |
286 | 6 | if (InitStmtVal == nullptr) |
287 | 0 | return; |
288 | | |
289 | 6 | if (Member->getType()->isReferenceType()) { |
290 | 2 | auto &MemberLoc = ThisLoc.getChild(*Member); |
291 | 2 | State.Env.setValue(MemberLoc, |
292 | 2 | State.Env.takeOwnership( |
293 | 2 | std::make_unique<ReferenceValue>(*InitStmtLoc))); |
294 | 4 | } else { |
295 | 4 | auto &MemberLoc = ThisLoc.getChild(*Member); |
296 | 4 | State.Env.setValue(MemberLoc, *InitStmtVal); |
297 | 4 | } |
298 | 6 | } |
299 | | |
300 | | TypeErasedDataflowAnalysisState transferBlock( |
301 | | const ControlFlowContext &CFCtx, |
302 | | std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, |
303 | | const CFGBlock &Block, const Environment &InitEnv, |
304 | | TypeErasedDataflowAnalysis &Analysis, |
305 | | std::function<void(const CFGStmt &, |
306 | | const TypeErasedDataflowAnalysisState &)> |
307 | 2.92k | HandleTransferredStmt) { |
308 | 2.92k | TypeErasedDataflowAnalysisState State = |
309 | 2.92k | computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); |
310 | 9.39k | for (const CFGElement &Element : Block) { |
311 | 9.39k | switch (Element.getKind()) { |
312 | 9.34k | case CFGElement::Statement: |
313 | 9.34k | transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis, |
314 | 9.34k | State, HandleTransferredStmt); |
315 | 9.34k | break; |
316 | 12 | case CFGElement::Initializer: |
317 | 12 | if (Analysis.applyBuiltinTransfer()) |
318 | 12 | transferCFGInitializer(*Element.getAs<CFGInitializer>(), State); |
319 | 12 | break; |
320 | 34 | default: |
321 | | // FIXME: Evaluate other kinds of `CFGElement`. |
322 | 34 | break; |
323 | 9.39k | } |
324 | 9.39k | } |
325 | 2.92k | return State; |
326 | 2.92k | } |
327 | | |
328 | | llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>> |
329 | | runTypeErasedDataflowAnalysis( |
330 | | const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, |
331 | | const Environment &InitEnv, |
332 | | std::function<void(const Stmt *, const TypeErasedDataflowAnalysisState &)> |
333 | 393 | PostVisitStmt) { |
334 | 393 | PostOrderCFGView POV(&CFCtx.getCFG()); |
335 | 393 | ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); |
336 | | |
337 | 393 | std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates; |
338 | 393 | BlockStates.resize(CFCtx.getCFG().size(), llvm::None); |
339 | | |
340 | | // The entry basic block doesn't contain statements so it can be skipped. |
341 | 393 | const CFGBlock &Entry = CFCtx.getCFG().getEntry(); |
342 | 393 | BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), |
343 | 393 | InitEnv}; |
344 | 393 | Worklist.enqueueSuccessors(&Entry); |
345 | | |
346 | | // Bugs in lattices and transfer functions can prevent the analysis from |
347 | | // converging. To limit the damage (infinite loops) that these bugs can cause, |
348 | | // limit the number of iterations. |
349 | | // FIXME: Consider making the maximum number of iterations configurable. |
350 | | // FIXME: Consider restricting the number of backedges followed, rather than |
351 | | // iterations. |
352 | | // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number |
353 | | // of iterations, number of functions that time out, etc. |
354 | 393 | static constexpr uint32_t MaxAverageVisitsPerBlock = 4; |
355 | 393 | static constexpr uint32_t AbsoluteMaxIterations = 1 << 16; |
356 | 393 | const uint32_t RelativeMaxIterations = |
357 | 393 | MaxAverageVisitsPerBlock * BlockStates.size(); |
358 | 393 | const uint32_t MaxIterations = |
359 | 393 | std::min(RelativeMaxIterations, AbsoluteMaxIterations); |
360 | 393 | uint32_t Iterations = 0; |
361 | 1.69k | while (const CFGBlock *Block = Worklist.dequeue()) { |
362 | 1.29k | if (++Iterations > MaxIterations) { |
363 | 1 | return llvm::createStringError(std::errc::timed_out, |
364 | 1 | "maximum number of iterations reached"); |
365 | 1 | } |
366 | | |
367 | 1.29k | const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState = |
368 | 1.29k | BlockStates[Block->getBlockID()]; |
369 | 1.29k | TypeErasedDataflowAnalysisState NewBlockState = |
370 | 1.29k | transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis); |
371 | | |
372 | 1.29k | if (OldBlockState && |
373 | 1.29k | Analysis.isEqualTypeErased(OldBlockState.value().Lattice, |
374 | 63 | NewBlockState.Lattice) && |
375 | 1.29k | OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)50 ) { |
376 | | // The state of `Block` didn't change after transfer so there's no need to |
377 | | // revisit its successors. |
378 | 23 | continue; |
379 | 23 | } |
380 | | |
381 | 1.27k | BlockStates[Block->getBlockID()] = std::move(NewBlockState); |
382 | | |
383 | | // Do not add unreachable successor blocks to `Worklist`. |
384 | 1.27k | if (Block->hasNoReturnElement()) |
385 | 7 | continue; |
386 | | |
387 | 1.26k | Worklist.enqueueSuccessors(Block); |
388 | 1.26k | } |
389 | | // FIXME: Consider evaluating unreachable basic blocks (those that have a |
390 | | // state set to `llvm::None` at this point) to also analyze dead code. |
391 | | |
392 | 392 | if (PostVisitStmt) { |
393 | 987 | for (const CFGBlock *Block : CFCtx.getCFG()) { |
394 | | // Skip blocks that were not evaluated. |
395 | 987 | if (!BlockStates[Block->getBlockID()]) |
396 | 0 | continue; |
397 | 987 | transferBlock( |
398 | 987 | CFCtx, BlockStates, *Block, InitEnv, Analysis, |
399 | 987 | [&PostVisitStmt](const clang::CFGStmt &Stmt, |
400 | 3.39k | const TypeErasedDataflowAnalysisState &State) { |
401 | 3.39k | PostVisitStmt(Stmt.getStmt(), State); |
402 | 3.39k | }); |
403 | 987 | } |
404 | 243 | } |
405 | | |
406 | 392 | return BlockStates; |
407 | 393 | } |
408 | | |
409 | | } // namespace dataflow |
410 | | } // namespace clang |