/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines ArrayBoundCheckerV2, which is a path-sensitive check |
10 | | // which looks for an out-of-bound array element access. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "clang/AST/CharUnits.h" |
15 | | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
16 | | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
17 | | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
18 | | #include "clang/StaticAnalyzer/Core/Checker.h" |
19 | | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
20 | | #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" |
21 | | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
22 | | #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" |
23 | | #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" |
24 | | #include "llvm/ADT/SmallString.h" |
25 | | #include "llvm/Support/raw_ostream.h" |
26 | | #include <optional> |
27 | | |
28 | | using namespace clang; |
29 | | using namespace ento; |
30 | | using namespace taint; |
31 | | |
32 | | namespace { |
33 | | class ArrayBoundCheckerV2 : |
34 | | public Checker<check::Location> { |
35 | | mutable std::unique_ptr<BugType> BT; |
36 | | mutable std::unique_ptr<BugType> TaintBT; |
37 | | |
38 | | enum OOB_Kind { OOB_Precedes, OOB_Excedes }; |
39 | | |
40 | | void reportOOB(CheckerContext &C, ProgramStateRef errorState, |
41 | | OOB_Kind kind) const; |
42 | | void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState, |
43 | | SVal TaintedSVal) const; |
44 | | |
45 | | static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); |
46 | | |
47 | | public: |
48 | | void checkLocation(SVal l, bool isLoad, const Stmt *S, |
49 | | CheckerContext &C) const; |
50 | | }; |
51 | | |
52 | | // FIXME: Eventually replace RegionRawOffset with this class. |
53 | | class RegionRawOffsetV2 { |
54 | | private: |
55 | | const SubRegion *baseRegion; |
56 | | NonLoc byteOffset; |
57 | | |
58 | | public: |
59 | | RegionRawOffsetV2(const SubRegion *base, NonLoc offset) |
60 | 71.6k | : baseRegion(base), byteOffset(offset) { assert(base); } |
61 | | |
62 | 71.6k | NonLoc getByteOffset() const { return byteOffset; } |
63 | 71.6k | const SubRegion *getRegion() const { return baseRegion; } |
64 | | |
65 | | static std::optional<RegionRawOffsetV2> |
66 | | computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location); |
67 | | |
68 | | void dump() const; |
69 | | void dumpToStream(raw_ostream &os) const; |
70 | | }; |
71 | | } |
72 | | |
73 | | // TODO: once the constraint manager is smart enough to handle non simplified |
74 | | // symbolic expressions remove this function. Note that this can not be used in |
75 | | // the constraint manager as is, since this does not handle overflows. It is |
76 | | // safe to assume, however, that memory offsets will not overflow. |
77 | | // NOTE: callers of this function need to be aware of the effects of overflows |
78 | | // and signed<->unsigned conversions! |
79 | | static std::pair<NonLoc, nonloc::ConcreteInt> |
80 | | getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, |
81 | 126k | SValBuilder &svalBuilder) { |
82 | 126k | std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); |
83 | 126k | if (SymVal && SymVal->isExpression()180 ) { |
84 | 97 | if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { |
85 | 93 | llvm::APSInt constant = |
86 | 93 | APSIntType(extent.getValue()).convert(SIE->getRHS()); |
87 | 93 | switch (SIE->getOpcode()) { |
88 | 71 | case BO_Mul: |
89 | | // The constant should never be 0 here, since it the result of scaling |
90 | | // based on the size of a type which is never 0. |
91 | 71 | if ((extent.getValue() % constant) != 0) |
92 | 0 | return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); |
93 | 71 | else |
94 | 71 | return getSimplifiedOffsets( |
95 | 71 | nonloc::SymbolVal(SIE->getLHS()), |
96 | 71 | svalBuilder.makeIntVal(extent.getValue() / constant), |
97 | 71 | svalBuilder); |
98 | 10 | case BO_Add: |
99 | 10 | return getSimplifiedOffsets( |
100 | 10 | nonloc::SymbolVal(SIE->getLHS()), |
101 | 10 | svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); |
102 | 12 | default: |
103 | 12 | break; |
104 | 93 | } |
105 | 93 | } |
106 | 97 | } |
107 | | |
108 | 126k | return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); |
109 | 126k | } |
110 | | |
111 | | // Evaluate the comparison Value < Threshold with the help of the custom |
112 | | // simplification algorithm defined for this checker. Return a pair of states, |
113 | | // where the first one corresponds to "value below threshold" and the second |
114 | | // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in |
115 | | // the case when the evaluation fails. |
116 | | static std::pair<ProgramStateRef, ProgramStateRef> |
117 | | compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, |
118 | 135k | SValBuilder &SVB) { |
119 | 135k | if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { |
120 | 126k | std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); |
121 | 126k | } |
122 | 135k | if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { |
123 | 126k | QualType T = Value.getType(SVB.getContext()); |
124 | 126k | if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()12 ) { |
125 | | // In this case we reduced the bound check to a comparison of the form |
126 | | // (symbol or value with unsigned type) < (negative number) |
127 | | // which is always false. We are handling these cases separately because |
128 | | // evalBinOpNN can perform a signed->unsigned conversion that turns the |
129 | | // negative number into a huge positive value and leads to wildly |
130 | | // inaccurate conclusions. |
131 | 2 | return {nullptr, State}; |
132 | 2 | } |
133 | 126k | } |
134 | 135k | auto BelowThreshold = |
135 | 135k | SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>(); |
136 | | |
137 | 135k | if (BelowThreshold) |
138 | 135k | return State->assume(*BelowThreshold); |
139 | | |
140 | 0 | return {nullptr, nullptr}; |
141 | 135k | } |
142 | | |
143 | | void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, |
144 | | const Stmt* LoadS, |
145 | 71.6k | CheckerContext &checkerContext) const { |
146 | | |
147 | | // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping |
148 | | // some new logic here that reasons directly about memory region extents. |
149 | | // Once that logic is more mature, we can bring it back to assumeInBound() |
150 | | // for all clients to use. |
151 | | // |
152 | | // The algorithm we are using here for bounds checking is to see if the |
153 | | // memory access is within the extent of the base region. Since we |
154 | | // have some flexibility in defining the base region, we can achieve |
155 | | // various levels of conservatism in our buffer overflow checking. |
156 | | |
157 | | // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as |
158 | | // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) |
159 | | // and incomplete analysis of these leads to false positives. As even |
160 | | // accurate reports would be confusing for the users, just disable reports |
161 | | // from these macros: |
162 | 71.6k | if (isFromCtypeMacro(LoadS, checkerContext.getASTContext())) |
163 | 6 | return; |
164 | | |
165 | 71.6k | ProgramStateRef state = checkerContext.getState(); |
166 | | |
167 | 71.6k | SValBuilder &svalBuilder = checkerContext.getSValBuilder(); |
168 | 71.6k | const std::optional<RegionRawOffsetV2> &RawOffset = |
169 | 71.6k | RegionRawOffsetV2::computeOffset(state, svalBuilder, location); |
170 | | |
171 | 71.6k | if (!RawOffset) |
172 | 0 | return; |
173 | | |
174 | 71.6k | NonLoc ByteOffset = RawOffset->getByteOffset(); |
175 | 71.6k | const SubRegion *Reg = RawOffset->getRegion(); |
176 | | |
177 | | // CHECK LOWER BOUND |
178 | 71.6k | const MemSpaceRegion *Space = Reg->getMemorySpace(); |
179 | 71.6k | if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space)8.32k )) { |
180 | | // A symbolic region in unknown space represents an unknown pointer that |
181 | | // may point into the middle of an array, so we don't look for underflows. |
182 | | // Both conditions are significant because we want to check underflows in |
183 | | // symbolic regions on the heap (which may be introduced by checkers like |
184 | | // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and |
185 | | // non-symbolic regions (e.g. a field subregion of a symbolic region) in |
186 | | // unknown space. |
187 | 63.3k | auto [state_precedesLowerBound, state_withinLowerBound] = |
188 | 63.3k | compareValueToThreshold(state, ByteOffset, |
189 | 63.3k | svalBuilder.makeZeroArrayIndex(), svalBuilder); |
190 | | |
191 | 63.3k | if (state_precedesLowerBound && !state_withinLowerBound55 ) { |
192 | | // We know that the index definitely precedes the lower bound. |
193 | 13 | reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); |
194 | 13 | return; |
195 | 13 | } |
196 | | |
197 | 63.3k | if (state_withinLowerBound) |
198 | 63.3k | state = state_withinLowerBound; |
199 | 63.3k | } |
200 | | |
201 | | // CHECK UPPER BOUND |
202 | 71.6k | DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder); |
203 | 71.6k | if (auto KnownSize = Size.getAs<NonLoc>()) { |
204 | 71.6k | auto [state_withinUpperBound, state_exceedsUpperBound] = |
205 | 71.6k | compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder); |
206 | | |
207 | 71.6k | if (state_exceedsUpperBound) { |
208 | 5.82k | if (!state_withinUpperBound) { |
209 | | // We know that the index definitely exceeds the upper bound. |
210 | 14 | reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); |
211 | 14 | return; |
212 | 14 | } |
213 | 5.81k | if (isTainted(state, ByteOffset)) { |
214 | | // Both cases are possible, but the index is tainted, so report. |
215 | 27 | reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset); |
216 | 27 | return; |
217 | 27 | } |
218 | 5.81k | } |
219 | | |
220 | 71.6k | if (state_withinUpperBound) |
221 | 71.6k | state = state_withinUpperBound; |
222 | 71.6k | } |
223 | | |
224 | 71.6k | checkerContext.addTransition(state); |
225 | 71.6k | } |
226 | | |
227 | | void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext, |
228 | | ProgramStateRef errorState, |
229 | 27 | SVal TaintedSVal) const { |
230 | 27 | ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); |
231 | 27 | if (!errorNode) |
232 | 0 | return; |
233 | | |
234 | 27 | if (!TaintBT) |
235 | 4 | TaintBT.reset( |
236 | 4 | new BugType(this, "Out-of-bound access", categories::TaintedData)); |
237 | | |
238 | 27 | SmallString<256> buf; |
239 | 27 | llvm::raw_svector_ostream os(buf); |
240 | 27 | os << "Out of bound memory access (index is tainted)"; |
241 | 27 | auto BR = |
242 | 27 | std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode); |
243 | | |
244 | | // Track back the propagation of taintedness. |
245 | 27 | for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) { |
246 | 27 | BR->markInteresting(Sym); |
247 | 27 | } |
248 | | |
249 | 27 | checkerContext.emitReport(std::move(BR)); |
250 | 27 | } |
251 | | |
252 | | void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext, |
253 | | ProgramStateRef errorState, |
254 | 27 | OOB_Kind kind) const { |
255 | | |
256 | 27 | ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); |
257 | 27 | if (!errorNode) |
258 | 0 | return; |
259 | | |
260 | 27 | if (!BT) |
261 | 2 | BT.reset(new BugType(this, "Out-of-bound access")); |
262 | | |
263 | | // FIXME: This diagnostics are preliminary. We should get far better |
264 | | // diagnostics for explaining buffer overruns. |
265 | | |
266 | 27 | SmallString<256> buf; |
267 | 27 | llvm::raw_svector_ostream os(buf); |
268 | 27 | os << "Out of bound memory access "; |
269 | 27 | switch (kind) { |
270 | 13 | case OOB_Precedes: |
271 | 13 | os << "(accessed memory precedes memory block)"; |
272 | 13 | break; |
273 | 14 | case OOB_Excedes: |
274 | 14 | os << "(access exceeds upper limit of memory block)"; |
275 | 14 | break; |
276 | 27 | } |
277 | 27 | auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode); |
278 | 27 | checkerContext.emitReport(std::move(BR)); |
279 | 27 | } |
280 | | |
281 | 71.6k | bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { |
282 | 71.6k | SourceLocation Loc = S->getBeginLoc(); |
283 | 71.6k | if (!Loc.isMacroID()) |
284 | 71.2k | return false; |
285 | | |
286 | 454 | StringRef MacroName = Lexer::getImmediateMacroName( |
287 | 454 | Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); |
288 | | |
289 | 454 | if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's'6 ) |
290 | 448 | return false; |
291 | | |
292 | 6 | return ((MacroName == "isalnum") || (MacroName == "isalpha") || |
293 | 6 | (MacroName == "isblank") || (MacroName == "isdigit") || |
294 | 6 | (MacroName == "isgraph")0 || (MacroName == "islower")0 || |
295 | 6 | (MacroName == "isnctrl")0 || (MacroName == "isprint")0 || |
296 | 6 | (MacroName == "ispunct")0 || (MacroName == "isspace")0 || |
297 | 6 | (MacroName == "isupper")0 || (MacroName == "isxdigit")0 ); |
298 | 454 | } |
299 | | |
300 | | #ifndef NDEBUG |
301 | 0 | LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { |
302 | 0 | dumpToStream(llvm::errs()); |
303 | 0 | } |
304 | | |
305 | 0 | void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { |
306 | 0 | os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; |
307 | 0 | } |
308 | | #endif |
309 | | |
310 | | /// For a given Location that can be represented as a symbolic expression |
311 | | /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block |
312 | | /// Arr and the distance of Location from the beginning of Arr (expressed in a |
313 | | /// NonLoc that specifies the number of CharUnits). Returns nullopt when these |
314 | | /// cannot be determined. |
315 | | std::optional<RegionRawOffsetV2> |
316 | | RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB, |
317 | 71.6k | SVal Location) { |
318 | 71.6k | QualType T = SVB.getArrayIndexType(); |
319 | 71.6k | auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { |
320 | | // We will use this utility to add and multiply values. |
321 | 16.2k | return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>(); |
322 | 16.2k | }; |
323 | | |
324 | 71.6k | const MemRegion *Region = Location.getAsRegion(); |
325 | 71.6k | NonLoc Offset = SVB.makeZeroArrayIndex(); |
326 | | |
327 | 79.8k | while (Region) { |
328 | 79.8k | if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) { |
329 | 8.13k | if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) { |
330 | 8.13k | QualType ElemType = ERegion->getElementType(); |
331 | | // If the element is an incomplete type, go no further. |
332 | 8.13k | if (ElemType->isIncompleteType()) |
333 | 0 | return std::nullopt; |
334 | | |
335 | | // Perform Offset += Index * sizeof(ElemType); then continue the offset |
336 | | // calculations with SuperRegion: |
337 | 8.13k | NonLoc Size = SVB.makeArrayIndex( |
338 | 8.13k | SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); |
339 | 8.13k | if (auto Delta = Calc(BO_Mul, *Index, Size)) { |
340 | 8.13k | if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) { |
341 | 8.13k | Offset = *NewOffset; |
342 | 8.13k | Region = ERegion->getSuperRegion(); |
343 | 8.13k | continue; |
344 | 8.13k | } |
345 | 8.13k | } |
346 | 8.13k | } |
347 | 71.6k | } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) { |
348 | | // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising |
349 | | // to see a MemSpaceRegion at this point. |
350 | | // FIXME: We may return with {<Region>, 0} even if we didn't handle any |
351 | | // ElementRegion layers. I think that this behavior was introduced |
352 | | // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so |
353 | | // it may be useful to review it in the future. |
354 | 71.6k | return RegionRawOffsetV2(SRegion, Offset); |
355 | 71.6k | } |
356 | 0 | return std::nullopt; |
357 | 79.8k | } |
358 | 0 | return std::nullopt; |
359 | 71.6k | } |
360 | | |
361 | 10 | void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { |
362 | 10 | mgr.registerChecker<ArrayBoundCheckerV2>(); |
363 | 10 | } |
364 | | |
365 | 20 | bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { |
366 | 20 | return true; |
367 | 20 | } |