Coverage Report

Created: 2023-09-21 18:56

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
Line
Count
Source (jump to first uncovered line)
1
//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Defines basic, non-domain-specific mechanisms for tracking tainted values.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "clang/StaticAnalyzer/Checkers/Taint.h"
14
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
15
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
16
#include <optional>
17
18
using namespace clang;
19
using namespace ento;
20
using namespace taint;
21
22
// Fully tainted symbols.
23
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
24
25
// Partially tainted symbols.
26
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
27
                                       TaintTagType)
28
REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
29
30
void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
31
2
                       const char *Sep) {
32
2
  TaintMapTy TM = State->get<TaintMap>();
33
34
2
  if (!TM.isEmpty())
35
2
    Out << "Tainted symbols:" << NL;
36
37
2
  for (const auto &I : TM)
38
2
    Out << I.first << " : " << I.second << NL;
39
2
}
40
41
0
void taint::dumpTaint(ProgramStateRef State) {
42
0
  printTaint(State, llvm::errs());
43
0
}
44
45
ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
46
                                const LocationContext *LCtx,
47
0
                                TaintTagType Kind) {
48
0
  return addTaint(State, State->getSVal(S, LCtx), Kind);
49
0
}
50
51
ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
52
1.66k
                                TaintTagType Kind) {
53
1.66k
  SymbolRef Sym = V.getAsSymbol();
54
1.66k
  if (Sym)
55
1.55k
    return addTaint(State, Sym, Kind);
56
57
  // If the SVal represents a structure, try to mass-taint all values within the
58
  // structure. For now it only works efficiently on lazy compound values that
59
  // were conjured during a conservative evaluation of a function - either as
60
  // return values of functions that return structures or arrays by value, or as
61
  // values of structures or arrays passed into the function by reference,
62
  // directly or through pointer aliasing. Such lazy compound values are
63
  // characterized by having exactly one binding in their captured store within
64
  // their parent region, which is a conjured symbol default-bound to the base
65
  // region of the parent region.
66
102
  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
67
50
    if (std::optional<SVal> binding =
68
50
            State->getStateManager().getStoreManager().getDefaultBinding(
69
50
                *LCV)) {
70
50
      if (SymbolRef Sym = binding->getAsSymbol())
71
50
        return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
72
50
    }
73
50
  }
74
75
52
  const MemRegion *R = V.getAsRegion();
76
52
  return addTaint(State, R, Kind);
77
102
}
78
79
ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
80
52
                                TaintTagType Kind) {
81
52
  if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
82
0
    return addTaint(State, SR->getSymbol(), Kind);
83
52
  return State;
84
52
}
85
86
ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
87
1.57k
                                TaintTagType Kind) {
88
  // If this is a symbol cast, remove the cast before adding the taint. Taint
89
  // is cast agnostic.
90
1.57k
  while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
91
0
    Sym = SC->getOperand();
92
93
1.57k
  ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
94
1.57k
  assert(NewState);
95
1.57k
  return NewState;
96
1.57k
}
97
98
14
ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
99
14
  SymbolRef Sym = V.getAsSymbol();
100
14
  if (Sym)
101
7
    return removeTaint(State, Sym);
102
103
7
  const MemRegion *R = V.getAsRegion();
104
7
  return removeTaint(State, R);
105
14
}
106
107
7
ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
108
7
  if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
109
0
    return removeTaint(State, SR->getSymbol());
110
7
  return State;
111
7
}
112
113
7
ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
114
  // If this is a symbol cast, remove the cast before adding the taint. Taint
115
  // is cast agnostic.
116
7
  while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
117
0
    Sym = SC->getOperand();
118
119
7
  ProgramStateRef NewState = State->remove<TaintMap>(Sym);
120
7
  assert(NewState);
121
7
  return NewState;
122
7
}
123
124
ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
125
                                       SymbolRef ParentSym,
126
                                       const SubRegion *SubRegion,
127
50
                                       TaintTagType Kind) {
128
  // Ignore partial taint if the entire parent symbol is already tainted.
129
50
  if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
130
0
    if (*T == Kind)
131
0
      return State;
132
133
  // Partial taint applies if only a portion of the symbol is tainted.
134
50
  if (SubRegion == SubRegion->getBaseRegion())
135
20
    return addTaint(State, ParentSym, Kind);
136
137
30
  const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
138
30
  TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
139
30
  TaintedSubRegions Regs = SavedRegs ? 
*SavedRegs0
: F.getEmptyMap();
140
141
30
  Regs = F.add(Regs, SubRegion, Kind);
142
30
  ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
143
30
  assert(NewState);
144
30
  return NewState;
145
30
}
146
147
bool taint::isTainted(ProgramStateRef State, const Stmt *S,
148
1.40k
                      const LocationContext *LCtx, TaintTagType Kind) {
149
1.40k
  return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true)
150
1.40k
              .empty();
151
1.40k
}
152
153
15.5k
bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
154
15.5k
  return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true)
155
15.5k
              .empty();
156
15.5k
}
157
158
bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
159
0
                      TaintTagType K) {
160
0
  return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true)
161
0
              .empty();
162
0
}
163
164
0
bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
165
0
  return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true)
166
0
              .empty();
167
0
}
168
169
std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
170
                                                const Stmt *S,
171
                                                const LocationContext *LCtx,
172
0
                                                TaintTagType Kind) {
173
0
  return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false);
174
0
}
175
176
std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V,
177
4.65k
                                                TaintTagType Kind) {
178
4.65k
  return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false);
179
4.65k
}
180
181
std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
182
                                                SymbolRef Sym,
183
0
                                                TaintTagType Kind) {
184
0
  return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false);
185
0
}
186
187
std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
188
                                                const MemRegion *Reg,
189
0
                                                TaintTagType Kind) {
190
0
  return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false);
191
0
}
192
193
std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
194
                                                    const Stmt *S,
195
                                                    const LocationContext *LCtx,
196
                                                    TaintTagType Kind,
197
1.40k
                                                    bool returnFirstOnly) {
198
1.40k
  SVal val = State->getSVal(S, LCtx);
199
1.40k
  return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly);
200
1.40k
}
201
202
std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
203
                                                    SVal V, TaintTagType Kind,
204
24.2k
                                                    bool returnFirstOnly) {
205
24.2k
  if (SymbolRef Sym = V.getAsSymbol())
206
14.6k
    return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
207
9.65k
  if (const MemRegion *Reg = V.getAsRegion())
208
2.19k
    return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
209
7.45k
  return {};
210
9.65k
}
211
212
std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
213
                                                    const MemRegion *Reg,
214
                                                    TaintTagType K,
215
104k
                                                    bool returnFirstOnly) {
216
104k
  std::vector<SymbolRef> TaintedSymbols;
217
104k
  if (!Reg)
218
0
    return TaintedSymbols;
219
  // Element region (array element) is tainted if either the base or the offset
220
  // are tainted.
221
104k
  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
222
2.71k
    std::vector<SymbolRef> TaintedIndex =
223
2.71k
        getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly);
224
2.71k
    llvm::append_range(TaintedSymbols, TaintedIndex);
225
2.71k
    if (returnFirstOnly && 
!TaintedSymbols.empty()2.57k
)
226
8
      return TaintedSymbols; // return early if needed
227
2.70k
    std::vector<SymbolRef> TaintedSuperRegion =
228
2.70k
        getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
229
2.70k
    llvm::append_range(TaintedSymbols, TaintedSuperRegion);
230
2.70k
    if (returnFirstOnly && 
!TaintedSymbols.empty()2.56k
)
231
220
      return TaintedSymbols; // return early if needed
232
2.70k
  }
233
234
104k
  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
235
2.02k
    std::vector<SymbolRef> TaintedRegions =
236
2.02k
        getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly);
237
2.02k
    llvm::append_range(TaintedSymbols, TaintedRegions);
238
2.02k
    if (returnFirstOnly && 
!TaintedSymbols.empty()1.73k
)
239
220
      return TaintedSymbols; // return early if needed
240
2.02k
  }
241
242
103k
  if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) {
243
53.1k
    std::vector<SymbolRef> TaintedSubRegions =
244
53.1k
        getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
245
53.1k
    llvm::append_range(TaintedSymbols, TaintedSubRegions);
246
53.1k
    if (returnFirstOnly && 
!TaintedSymbols.empty()35.0k
)
247
4
      return TaintedSymbols; // return early if needed
248
53.1k
  }
249
250
103k
  return TaintedSymbols;
251
103k
}
252
253
std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
254
                                                    SymbolRef Sym,
255
                                                    TaintTagType Kind,
256
18.5k
                                                    bool returnFirstOnly) {
257
18.5k
  std::vector<SymbolRef> TaintedSymbols;
258
18.5k
  if (!Sym)
259
0
    return TaintedSymbols;
260
261
  // Traverse all the symbols this symbol depends on to see if any are tainted.
262
111k
  
for (SymbolRef SubSym : Sym->symbols())18.5k
{
263
111k
    if (!isa<SymbolData>(SubSym))
264
55.2k
      continue;
265
266
56.7k
    if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) {
267
5.27k
      if (*Tag == Kind) {
268
5.27k
        TaintedSymbols.push_back(SubSym);
269
5.27k
        if (returnFirstOnly)
270
2.27k
          return TaintedSymbols; // return early if needed
271
5.27k
      }
272
5.27k
    }
273
274
54.4k
    if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) {
275
      // If this is a SymbolDerived with a tainted parent, it's also tainted.
276
1.84k
      std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl(
277
1.84k
          State, SD->getParentSymbol(), Kind, returnFirstOnly);
278
1.84k
      llvm::append_range(TaintedSymbols, TaintedParents);
279
1.84k
      if (returnFirstOnly && 
!TaintedSymbols.empty()378
)
280
65
        return TaintedSymbols; // return early if needed
281
282
      // If this is a SymbolDerived with the same parent symbol as another
283
      // tainted SymbolDerived and a region that's a sub-region of that
284
      // tainted symbol, it's also tainted.
285
1.78k
      if (const TaintedSubRegions *Regs =
286
1.78k
              State->get<DerivedSymTaint>(SD->getParentSymbol())) {
287
125
        const TypedValueRegion *R = SD->getRegion();
288
125
        for (auto I : *Regs) {
289
          // FIXME: The logic to identify tainted regions could be more
290
          // complete. For example, this would not currently identify
291
          // overlapping fields in a union as tainted. To identify this we can
292
          // check for overlapping/nested byte offsets.
293
125
          if (Kind == I.second && R->isSubRegionOf(I.first)) {
294
35
            TaintedSymbols.push_back(SD->getParentSymbol());
295
35
            if (returnFirstOnly && 
!TaintedSymbols.empty()20
)
296
20
              return TaintedSymbols; // return early if needed
297
35
          }
298
125
        }
299
125
      }
300
1.78k
    }
301
302
    // If memory region is tainted, data is also tainted.
303
54.3k
    if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) {
304
46.1k
      std::vector<SymbolRef> TaintedRegions =
305
46.1k
          getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly);
306
46.1k
      llvm::append_range(TaintedSymbols, TaintedRegions);
307
46.1k
      if (returnFirstOnly && 
!TaintedSymbols.empty()28.3k
)
308
218
        return TaintedSymbols; // return early if needed
309
46.1k
    }
310
311
    // If this is a SymbolCast from a tainted value, it's also tainted.
312
54.1k
    if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) {
313
0
      std::vector<SymbolRef> TaintedCasts =
314
0
          getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly);
315
0
      llvm::append_range(TaintedSymbols, TaintedCasts);
316
0
      if (returnFirstOnly && !TaintedSymbols.empty())
317
0
        return TaintedSymbols; // return early if needed
318
0
    }
319
54.1k
  }
320
15.9k
  return TaintedSymbols;
321
18.5k
}