/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Defines basic, non-domain-specific mechanisms for tracking tainted values. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
14 | | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
15 | | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
16 | | #include <optional> |
17 | | |
18 | | using namespace clang; |
19 | | using namespace ento; |
20 | | using namespace taint; |
21 | | |
22 | | // Fully tainted symbols. |
23 | | REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) |
24 | | |
25 | | // Partially tainted symbols. |
26 | | REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, |
27 | | TaintTagType) |
28 | | REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) |
29 | | |
30 | | void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, |
31 | 2 | const char *Sep) { |
32 | 2 | TaintMapTy TM = State->get<TaintMap>(); |
33 | | |
34 | 2 | if (!TM.isEmpty()) |
35 | 2 | Out << "Tainted symbols:" << NL; |
36 | | |
37 | 2 | for (const auto &I : TM) |
38 | 2 | Out << I.first << " : " << I.second << NL; |
39 | 2 | } |
40 | | |
41 | 0 | void taint::dumpTaint(ProgramStateRef State) { |
42 | 0 | printTaint(State, llvm::errs()); |
43 | 0 | } |
44 | | |
45 | | ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, |
46 | | const LocationContext *LCtx, |
47 | 0 | TaintTagType Kind) { |
48 | 0 | return addTaint(State, State->getSVal(S, LCtx), Kind); |
49 | 0 | } |
50 | | |
51 | | ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, |
52 | 1.66k | TaintTagType Kind) { |
53 | 1.66k | SymbolRef Sym = V.getAsSymbol(); |
54 | 1.66k | if (Sym) |
55 | 1.55k | return addTaint(State, Sym, Kind); |
56 | | |
57 | | // If the SVal represents a structure, try to mass-taint all values within the |
58 | | // structure. For now it only works efficiently on lazy compound values that |
59 | | // were conjured during a conservative evaluation of a function - either as |
60 | | // return values of functions that return structures or arrays by value, or as |
61 | | // values of structures or arrays passed into the function by reference, |
62 | | // directly or through pointer aliasing. Such lazy compound values are |
63 | | // characterized by having exactly one binding in their captured store within |
64 | | // their parent region, which is a conjured symbol default-bound to the base |
65 | | // region of the parent region. |
66 | 102 | if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { |
67 | 50 | if (std::optional<SVal> binding = |
68 | 50 | State->getStateManager().getStoreManager().getDefaultBinding( |
69 | 50 | *LCV)) { |
70 | 50 | if (SymbolRef Sym = binding->getAsSymbol()) |
71 | 50 | return addPartialTaint(State, Sym, LCV->getRegion(), Kind); |
72 | 50 | } |
73 | 50 | } |
74 | | |
75 | 52 | const MemRegion *R = V.getAsRegion(); |
76 | 52 | return addTaint(State, R, Kind); |
77 | 102 | } |
78 | | |
79 | | ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, |
80 | 52 | TaintTagType Kind) { |
81 | 52 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) |
82 | 0 | return addTaint(State, SR->getSymbol(), Kind); |
83 | 52 | return State; |
84 | 52 | } |
85 | | |
86 | | ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, |
87 | 1.57k | TaintTagType Kind) { |
88 | | // If this is a symbol cast, remove the cast before adding the taint. Taint |
89 | | // is cast agnostic. |
90 | 1.57k | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) |
91 | 0 | Sym = SC->getOperand(); |
92 | | |
93 | 1.57k | ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); |
94 | 1.57k | assert(NewState); |
95 | 1.57k | return NewState; |
96 | 1.57k | } |
97 | | |
98 | 14 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { |
99 | 14 | SymbolRef Sym = V.getAsSymbol(); |
100 | 14 | if (Sym) |
101 | 7 | return removeTaint(State, Sym); |
102 | | |
103 | 7 | const MemRegion *R = V.getAsRegion(); |
104 | 7 | return removeTaint(State, R); |
105 | 14 | } |
106 | | |
107 | 7 | ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { |
108 | 7 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) |
109 | 0 | return removeTaint(State, SR->getSymbol()); |
110 | 7 | return State; |
111 | 7 | } |
112 | | |
113 | 7 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { |
114 | | // If this is a symbol cast, remove the cast before adding the taint. Taint |
115 | | // is cast agnostic. |
116 | 7 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) |
117 | 0 | Sym = SC->getOperand(); |
118 | | |
119 | 7 | ProgramStateRef NewState = State->remove<TaintMap>(Sym); |
120 | 7 | assert(NewState); |
121 | 7 | return NewState; |
122 | 7 | } |
123 | | |
124 | | ProgramStateRef taint::addPartialTaint(ProgramStateRef State, |
125 | | SymbolRef ParentSym, |
126 | | const SubRegion *SubRegion, |
127 | 50 | TaintTagType Kind) { |
128 | | // Ignore partial taint if the entire parent symbol is already tainted. |
129 | 50 | if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) |
130 | 0 | if (*T == Kind) |
131 | 0 | return State; |
132 | | |
133 | | // Partial taint applies if only a portion of the symbol is tainted. |
134 | 50 | if (SubRegion == SubRegion->getBaseRegion()) |
135 | 20 | return addTaint(State, ParentSym, Kind); |
136 | | |
137 | 30 | const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); |
138 | 30 | TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); |
139 | 30 | TaintedSubRegions Regs = SavedRegs ? *SavedRegs0 : F.getEmptyMap(); |
140 | | |
141 | 30 | Regs = F.add(Regs, SubRegion, Kind); |
142 | 30 | ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); |
143 | 30 | assert(NewState); |
144 | 30 | return NewState; |
145 | 30 | } |
146 | | |
147 | | bool taint::isTainted(ProgramStateRef State, const Stmt *S, |
148 | 1.40k | const LocationContext *LCtx, TaintTagType Kind) { |
149 | 1.40k | return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true) |
150 | 1.40k | .empty(); |
151 | 1.40k | } |
152 | | |
153 | 15.5k | bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { |
154 | 15.5k | return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true) |
155 | 15.5k | .empty(); |
156 | 15.5k | } |
157 | | |
158 | | bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, |
159 | 0 | TaintTagType K) { |
160 | 0 | return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true) |
161 | 0 | .empty(); |
162 | 0 | } |
163 | | |
164 | 0 | bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { |
165 | 0 | return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true) |
166 | 0 | .empty(); |
167 | 0 | } |
168 | | |
169 | | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
170 | | const Stmt *S, |
171 | | const LocationContext *LCtx, |
172 | 0 | TaintTagType Kind) { |
173 | 0 | return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false); |
174 | 0 | } |
175 | | |
176 | | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V, |
177 | 4.65k | TaintTagType Kind) { |
178 | 4.65k | return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false); |
179 | 4.65k | } |
180 | | |
181 | | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
182 | | SymbolRef Sym, |
183 | 0 | TaintTagType Kind) { |
184 | 0 | return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false); |
185 | 0 | } |
186 | | |
187 | | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
188 | | const MemRegion *Reg, |
189 | 0 | TaintTagType Kind) { |
190 | 0 | return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false); |
191 | 0 | } |
192 | | |
193 | | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
194 | | const Stmt *S, |
195 | | const LocationContext *LCtx, |
196 | | TaintTagType Kind, |
197 | 1.40k | bool returnFirstOnly) { |
198 | 1.40k | SVal val = State->getSVal(S, LCtx); |
199 | 1.40k | return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly); |
200 | 1.40k | } |
201 | | |
202 | | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
203 | | SVal V, TaintTagType Kind, |
204 | 24.2k | bool returnFirstOnly) { |
205 | 24.2k | if (SymbolRef Sym = V.getAsSymbol()) |
206 | 14.6k | return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly); |
207 | 9.65k | if (const MemRegion *Reg = V.getAsRegion()) |
208 | 2.19k | return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly); |
209 | 7.45k | return {}; |
210 | 9.65k | } |
211 | | |
212 | | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
213 | | const MemRegion *Reg, |
214 | | TaintTagType K, |
215 | 104k | bool returnFirstOnly) { |
216 | 104k | std::vector<SymbolRef> TaintedSymbols; |
217 | 104k | if (!Reg) |
218 | 0 | return TaintedSymbols; |
219 | | // Element region (array element) is tainted if either the base or the offset |
220 | | // are tainted. |
221 | 104k | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) { |
222 | 2.71k | std::vector<SymbolRef> TaintedIndex = |
223 | 2.71k | getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly); |
224 | 2.71k | llvm::append_range(TaintedSymbols, TaintedIndex); |
225 | 2.71k | if (returnFirstOnly && !TaintedSymbols.empty()2.57k ) |
226 | 8 | return TaintedSymbols; // return early if needed |
227 | 2.70k | std::vector<SymbolRef> TaintedSuperRegion = |
228 | 2.70k | getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly); |
229 | 2.70k | llvm::append_range(TaintedSymbols, TaintedSuperRegion); |
230 | 2.70k | if (returnFirstOnly && !TaintedSymbols.empty()2.56k ) |
231 | 220 | return TaintedSymbols; // return early if needed |
232 | 2.70k | } |
233 | | |
234 | 104k | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) { |
235 | 2.02k | std::vector<SymbolRef> TaintedRegions = |
236 | 2.02k | getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly); |
237 | 2.02k | llvm::append_range(TaintedSymbols, TaintedRegions); |
238 | 2.02k | if (returnFirstOnly && !TaintedSymbols.empty()1.73k ) |
239 | 220 | return TaintedSymbols; // return early if needed |
240 | 2.02k | } |
241 | | |
242 | 103k | if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) { |
243 | 53.1k | std::vector<SymbolRef> TaintedSubRegions = |
244 | 53.1k | getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly); |
245 | 53.1k | llvm::append_range(TaintedSymbols, TaintedSubRegions); |
246 | 53.1k | if (returnFirstOnly && !TaintedSymbols.empty()35.0k ) |
247 | 4 | return TaintedSymbols; // return early if needed |
248 | 53.1k | } |
249 | | |
250 | 103k | return TaintedSymbols; |
251 | 103k | } |
252 | | |
253 | | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
254 | | SymbolRef Sym, |
255 | | TaintTagType Kind, |
256 | 18.5k | bool returnFirstOnly) { |
257 | 18.5k | std::vector<SymbolRef> TaintedSymbols; |
258 | 18.5k | if (!Sym) |
259 | 0 | return TaintedSymbols; |
260 | | |
261 | | // Traverse all the symbols this symbol depends on to see if any are tainted. |
262 | 111k | for (SymbolRef SubSym : Sym->symbols())18.5k { |
263 | 111k | if (!isa<SymbolData>(SubSym)) |
264 | 55.2k | continue; |
265 | | |
266 | 56.7k | if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) { |
267 | 5.27k | if (*Tag == Kind) { |
268 | 5.27k | TaintedSymbols.push_back(SubSym); |
269 | 5.27k | if (returnFirstOnly) |
270 | 2.27k | return TaintedSymbols; // return early if needed |
271 | 5.27k | } |
272 | 5.27k | } |
273 | | |
274 | 54.4k | if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) { |
275 | | // If this is a SymbolDerived with a tainted parent, it's also tainted. |
276 | 1.84k | std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl( |
277 | 1.84k | State, SD->getParentSymbol(), Kind, returnFirstOnly); |
278 | 1.84k | llvm::append_range(TaintedSymbols, TaintedParents); |
279 | 1.84k | if (returnFirstOnly && !TaintedSymbols.empty()378 ) |
280 | 65 | return TaintedSymbols; // return early if needed |
281 | | |
282 | | // If this is a SymbolDerived with the same parent symbol as another |
283 | | // tainted SymbolDerived and a region that's a sub-region of that |
284 | | // tainted symbol, it's also tainted. |
285 | 1.78k | if (const TaintedSubRegions *Regs = |
286 | 1.78k | State->get<DerivedSymTaint>(SD->getParentSymbol())) { |
287 | 125 | const TypedValueRegion *R = SD->getRegion(); |
288 | 125 | for (auto I : *Regs) { |
289 | | // FIXME: The logic to identify tainted regions could be more |
290 | | // complete. For example, this would not currently identify |
291 | | // overlapping fields in a union as tainted. To identify this we can |
292 | | // check for overlapping/nested byte offsets. |
293 | 125 | if (Kind == I.second && R->isSubRegionOf(I.first)) { |
294 | 35 | TaintedSymbols.push_back(SD->getParentSymbol()); |
295 | 35 | if (returnFirstOnly && !TaintedSymbols.empty()20 ) |
296 | 20 | return TaintedSymbols; // return early if needed |
297 | 35 | } |
298 | 125 | } |
299 | 125 | } |
300 | 1.78k | } |
301 | | |
302 | | // If memory region is tainted, data is also tainted. |
303 | 54.3k | if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) { |
304 | 46.1k | std::vector<SymbolRef> TaintedRegions = |
305 | 46.1k | getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly); |
306 | 46.1k | llvm::append_range(TaintedSymbols, TaintedRegions); |
307 | 46.1k | if (returnFirstOnly && !TaintedSymbols.empty()28.3k ) |
308 | 218 | return TaintedSymbols; // return early if needed |
309 | 46.1k | } |
310 | | |
311 | | // If this is a SymbolCast from a tainted value, it's also tainted. |
312 | 54.1k | if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) { |
313 | 0 | std::vector<SymbolRef> TaintedCasts = |
314 | 0 | getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly); |
315 | 0 | llvm::append_range(TaintedSymbols, TaintedCasts); |
316 | 0 | if (returnFirstOnly && !TaintedSymbols.empty()) |
317 | 0 | return TaintedSymbols; // return early if needed |
318 | 0 | } |
319 | 54.1k | } |
320 | 15.9k | return TaintedSymbols; |
321 | 18.5k | } |