Coverage Report

Created: 2023-09-12 09:32

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Line
Count
Source (jump to first uncovered line)
1
//== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10
// which looks for an out-of-bound array element access.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "clang/AST/CharUnits.h"
15
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16
#include "clang/StaticAnalyzer/Checkers/Taint.h"
17
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18
#include "clang/StaticAnalyzer/Core/Checker.h"
19
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
21
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
23
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
24
#include "llvm/ADT/SmallString.h"
25
#include "llvm/Support/raw_ostream.h"
26
#include <optional>
27
28
using namespace clang;
29
using namespace ento;
30
using namespace taint;
31
32
namespace {
33
class ArrayBoundCheckerV2 :
34
    public Checker<check::Location> {
35
  mutable std::unique_ptr<BugType> BT;
36
  mutable std::unique_ptr<BugType> TaintBT;
37
38
  enum OOB_Kind { OOB_Precedes, OOB_Excedes };
39
40
  void reportOOB(CheckerContext &C, ProgramStateRef errorState,
41
                 OOB_Kind kind) const;
42
  void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
43
                      SVal TaintedSVal) const;
44
45
  static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
46
47
public:
48
  void checkLocation(SVal l, bool isLoad, const Stmt *S,
49
                     CheckerContext &C) const;
50
};
51
52
// FIXME: Eventually replace RegionRawOffset with this class.
53
class RegionRawOffsetV2 {
54
private:
55
  const SubRegion *baseRegion;
56
  NonLoc byteOffset;
57
58
public:
59
  RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
60
71.6k
      : baseRegion(base), byteOffset(offset) { assert(base); }
61
62
71.6k
  NonLoc getByteOffset() const { return byteOffset; }
63
71.6k
  const SubRegion *getRegion() const { return baseRegion; }
64
65
  static std::optional<RegionRawOffsetV2>
66
  computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
67
68
  void dump() const;
69
  void dumpToStream(raw_ostream &os) const;
70
};
71
}
72
73
// TODO: once the constraint manager is smart enough to handle non simplified
74
// symbolic expressions remove this function. Note that this can not be used in
75
// the constraint manager as is, since this does not handle overflows. It is
76
// safe to assume, however, that memory offsets will not overflow.
77
// NOTE: callers of this function need to be aware of the effects of overflows
78
// and signed<->unsigned conversions!
79
static std::pair<NonLoc, nonloc::ConcreteInt>
80
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
81
126k
                     SValBuilder &svalBuilder) {
82
126k
  std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
83
126k
  if (SymVal && 
SymVal->isExpression()180
) {
84
97
    if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
85
93
      llvm::APSInt constant =
86
93
          APSIntType(extent.getValue()).convert(SIE->getRHS());
87
93
      switch (SIE->getOpcode()) {
88
71
      case BO_Mul:
89
        // The constant should never be 0 here, since it the result of scaling
90
        // based on the size of a type which is never 0.
91
71
        if ((extent.getValue() % constant) != 0)
92
0
          return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
93
71
        else
94
71
          return getSimplifiedOffsets(
95
71
              nonloc::SymbolVal(SIE->getLHS()),
96
71
              svalBuilder.makeIntVal(extent.getValue() / constant),
97
71
              svalBuilder);
98
10
      case BO_Add:
99
10
        return getSimplifiedOffsets(
100
10
            nonloc::SymbolVal(SIE->getLHS()),
101
10
            svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
102
12
      default:
103
12
        break;
104
93
      }
105
93
    }
106
97
  }
107
108
126k
  return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
109
126k
}
110
111
// Evaluate the comparison Value < Threshold with the help of the custom
112
// simplification algorithm defined for this checker. Return a pair of states,
113
// where the first one corresponds to "value below threshold" and the second
114
// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
115
// the case when the evaluation fails.
116
static std::pair<ProgramStateRef, ProgramStateRef>
117
compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
118
135k
                        SValBuilder &SVB) {
119
135k
  if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
120
126k
    std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
121
126k
  }
122
135k
  if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
123
126k
    QualType T = Value.getType(SVB.getContext());
124
126k
    if (T->isUnsignedIntegerType() && 
ConcreteThreshold->getValue().isNegative()12
) {
125
      // In this case we reduced the bound check to a comparison of the form
126
      //   (symbol or value with unsigned type) < (negative number)
127
      // which is always false. We are handling these cases separately because
128
      // evalBinOpNN can perform a signed->unsigned conversion that turns the
129
      // negative number into a huge positive value and leads to wildly
130
      // inaccurate conclusions.
131
2
      return {nullptr, State};
132
2
    }
133
126k
  }
134
135k
  auto BelowThreshold =
135
135k
      SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
136
137
135k
  if (BelowThreshold)
138
135k
    return State->assume(*BelowThreshold);
139
140
0
  return {nullptr, nullptr};
141
135k
}
142
143
void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
144
                                        const Stmt* LoadS,
145
71.6k
                                        CheckerContext &checkerContext) const {
146
147
  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
148
  // some new logic here that reasons directly about memory region extents.
149
  // Once that logic is more mature, we can bring it back to assumeInBound()
150
  // for all clients to use.
151
  //
152
  // The algorithm we are using here for bounds checking is to see if the
153
  // memory access is within the extent of the base region.  Since we
154
  // have some flexibility in defining the base region, we can achieve
155
  // various levels of conservatism in our buffer overflow checking.
156
157
  // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
158
  //   #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
159
  // and incomplete analysis of these leads to false positives. As even
160
  // accurate reports would be confusing for the users, just disable reports
161
  // from these macros:
162
71.6k
  if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
163
6
    return;
164
165
71.6k
  ProgramStateRef state = checkerContext.getState();
166
167
71.6k
  SValBuilder &svalBuilder = checkerContext.getSValBuilder();
168
71.6k
  const std::optional<RegionRawOffsetV2> &RawOffset =
169
71.6k
      RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
170
171
71.6k
  if (!RawOffset)
172
0
    return;
173
174
71.6k
  NonLoc ByteOffset = RawOffset->getByteOffset();
175
71.6k
  const SubRegion *Reg = RawOffset->getRegion();
176
177
  // CHECK LOWER BOUND
178
71.6k
  const MemSpaceRegion *Space = Reg->getMemorySpace();
179
71.6k
  if (!(isa<SymbolicRegion>(Reg) && 
isa<UnknownSpaceRegion>(Space)8.32k
)) {
180
    // A symbolic region in unknown space represents an unknown pointer that
181
    // may point into the middle of an array, so we don't look for underflows.
182
    // Both conditions are significant because we want to check underflows in
183
    // symbolic regions on the heap (which may be introduced by checkers like
184
    // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
185
    // non-symbolic regions (e.g. a field subregion of a symbolic region) in
186
    // unknown space.
187
63.3k
    auto [state_precedesLowerBound, state_withinLowerBound] =
188
63.3k
        compareValueToThreshold(state, ByteOffset,
189
63.3k
                                svalBuilder.makeZeroArrayIndex(), svalBuilder);
190
191
63.3k
    if (state_precedesLowerBound && 
!state_withinLowerBound55
) {
192
      // We know that the index definitely precedes the lower bound.
193
13
      reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
194
13
      return;
195
13
    }
196
197
63.3k
    if (state_withinLowerBound)
198
63.3k
      state = state_withinLowerBound;
199
63.3k
  }
200
201
  // CHECK UPPER BOUND
202
71.6k
  DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
203
71.6k
  if (auto KnownSize = Size.getAs<NonLoc>()) {
204
71.6k
    auto [state_withinUpperBound, state_exceedsUpperBound] =
205
71.6k
        compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
206
207
71.6k
    if (state_exceedsUpperBound) {
208
5.82k
      if (!state_withinUpperBound) {
209
        // We know that the index definitely exceeds the upper bound.
210
14
        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
211
14
        return;
212
14
      }
213
5.81k
      if (isTainted(state, ByteOffset)) {
214
        // Both cases are possible, but the index is tainted, so report.
215
27
        reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
216
27
        return;
217
27
      }
218
5.81k
    }
219
220
71.6k
    if (state_withinUpperBound)
221
71.6k
      state = state_withinUpperBound;
222
71.6k
  }
223
224
71.6k
  checkerContext.addTransition(state);
225
71.6k
}
226
227
void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
228
                                         ProgramStateRef errorState,
229
27
                                         SVal TaintedSVal) const {
230
27
  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
231
27
  if (!errorNode)
232
0
    return;
233
234
27
  if (!TaintBT)
235
4
    TaintBT.reset(
236
4
        new BugType(this, "Out-of-bound access", categories::TaintedData));
237
238
27
  SmallString<256> buf;
239
27
  llvm::raw_svector_ostream os(buf);
240
27
  os << "Out of bound memory access (index is tainted)";
241
27
  auto BR =
242
27
      std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);
243
244
  // Track back the propagation of taintedness.
245
27
  for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
246
27
    BR->markInteresting(Sym);
247
27
  }
248
249
27
  checkerContext.emitReport(std::move(BR));
250
27
}
251
252
void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
253
                                    ProgramStateRef errorState,
254
27
                                    OOB_Kind kind) const {
255
256
27
  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
257
27
  if (!errorNode)
258
0
    return;
259
260
27
  if (!BT)
261
2
    BT.reset(new BugType(this, "Out-of-bound access"));
262
263
  // FIXME: This diagnostics are preliminary.  We should get far better
264
  // diagnostics for explaining buffer overruns.
265
266
27
  SmallString<256> buf;
267
27
  llvm::raw_svector_ostream os(buf);
268
27
  os << "Out of bound memory access ";
269
27
  switch (kind) {
270
13
  case OOB_Precedes:
271
13
    os << "(accessed memory precedes memory block)";
272
13
    break;
273
14
  case OOB_Excedes:
274
14
    os << "(access exceeds upper limit of memory block)";
275
14
    break;
276
27
  }
277
27
  auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
278
27
  checkerContext.emitReport(std::move(BR));
279
27
}
280
281
71.6k
bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
282
71.6k
  SourceLocation Loc = S->getBeginLoc();
283
71.6k
  if (!Loc.isMacroID())
284
71.2k
    return false;
285
286
454
  StringRef MacroName = Lexer::getImmediateMacroName(
287
454
      Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
288
289
454
  if (MacroName.size() < 7 || MacroName[0] != 'i' || 
MacroName[1] != 's'6
)
290
448
    return false;
291
292
6
  return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
293
6
          (MacroName == "isblank") || (MacroName == "isdigit") ||
294
6
          
(MacroName == "isgraph")0
||
(MacroName == "islower")0
||
295
6
          
(MacroName == "isnctrl")0
||
(MacroName == "isprint")0
||
296
6
          
(MacroName == "ispunct")0
||
(MacroName == "isspace")0
||
297
6
          
(MacroName == "isupper")0
||
(MacroName == "isxdigit")0
);
298
454
}
299
300
#ifndef NDEBUG
301
0
LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
302
0
  dumpToStream(llvm::errs());
303
0
}
304
305
0
void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
306
0
  os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
307
0
}
308
#endif
309
310
/// For a given Location that can be represented as a symbolic expression
311
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
312
/// Arr and the distance of Location from the beginning of Arr (expressed in a
313
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
314
/// cannot be determined.
315
std::optional<RegionRawOffsetV2>
316
RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
317
71.6k
                                 SVal Location) {
318
71.6k
  QualType T = SVB.getArrayIndexType();
319
71.6k
  auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
320
    // We will use this utility to add and multiply values.
321
16.2k
    return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
322
16.2k
  };
323
324
71.6k
  const MemRegion *Region = Location.getAsRegion();
325
71.6k
  NonLoc Offset = SVB.makeZeroArrayIndex();
326
327
79.8k
  while (Region) {
328
79.8k
    if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
329
8.13k
      if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
330
8.13k
        QualType ElemType = ERegion->getElementType();
331
        // If the element is an incomplete type, go no further.
332
8.13k
        if (ElemType->isIncompleteType())
333
0
          return std::nullopt;
334
335
        // Perform Offset += Index * sizeof(ElemType); then continue the offset
336
        // calculations with SuperRegion:
337
8.13k
        NonLoc Size = SVB.makeArrayIndex(
338
8.13k
            SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
339
8.13k
        if (auto Delta = Calc(BO_Mul, *Index, Size)) {
340
8.13k
          if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
341
8.13k
            Offset = *NewOffset;
342
8.13k
            Region = ERegion->getSuperRegion();
343
8.13k
            continue;
344
8.13k
          }
345
8.13k
        }
346
8.13k
      }
347
71.6k
    } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
348
      // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
349
      // to see a MemSpaceRegion at this point.
350
      // FIXME: We may return with {<Region>, 0} even if we didn't handle any
351
      // ElementRegion layers. I think that this behavior was introduced
352
      // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
353
      // it may be useful to review it in the future.
354
71.6k
      return RegionRawOffsetV2(SRegion, Offset);
355
71.6k
    }
356
0
    return std::nullopt;
357
79.8k
  }
358
0
  return std::nullopt;
359
71.6k
}
360
361
10
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
362
10
  mgr.registerChecker<ArrayBoundCheckerV2>();
363
10
}
364
365
20
bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
366
20
  return true;
367
20
}