/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This checker defines the attack surface for generic taint propagation. |
10 | | // |
11 | | // The taint information produced by it might be useful to other checkers. For |
12 | | // example, checkers should report errors which involve tainted data more |
13 | | // aggressively, even if the involved symbols are under constrained. |
14 | | // |
15 | | //===----------------------------------------------------------------------===// |
16 | | |
17 | | #include "Yaml.h" |
18 | | #include "clang/AST/Attr.h" |
19 | | #include "clang/Basic/Builtins.h" |
20 | | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
21 | | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
22 | | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
23 | | #include "clang/StaticAnalyzer/Core/Checker.h" |
24 | | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
25 | | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
26 | | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
27 | | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
28 | | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
29 | | #include "llvm/ADT/StringExtras.h" |
30 | | #include "llvm/Support/YAMLTraits.h" |
31 | | |
32 | | #include <limits> |
33 | | #include <memory> |
34 | | #include <optional> |
35 | | #include <utility> |
36 | | #include <vector> |
37 | | |
38 | | #define DEBUG_TYPE "taint-checker" |
39 | | |
40 | | using namespace clang; |
41 | | using namespace ento; |
42 | | using namespace taint; |
43 | | |
44 | | using llvm::ImmutableSet; |
45 | | |
46 | | namespace { |
47 | | |
48 | | class GenericTaintChecker; |
49 | | |
50 | | /// Check for CWE-134: Uncontrolled Format String. |
51 | | constexpr llvm::StringLiteral MsgUncontrolledFormatString = |
52 | | "Untrusted data is used as a format string " |
53 | | "(CWE-134: Uncontrolled Format String)"; |
54 | | |
55 | | /// Check for: |
56 | | /// CERT/STR02-C. "Sanitize data passed to complex subsystems" |
57 | | /// CWE-78, "Failure to Sanitize Data into an OS Command" |
58 | | constexpr llvm::StringLiteral MsgSanitizeSystemArgs = |
59 | | "Untrusted data is passed to a system call " |
60 | | "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; |
61 | | |
62 | | /// Check if tainted data is used as a buffer size in strn.. functions, |
63 | | /// and allocators. |
64 | | constexpr llvm::StringLiteral MsgTaintedBufferSize = |
65 | | "Untrusted data is used to specify the buffer size " |
66 | | "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " |
67 | | "for character data and the null terminator)"; |
68 | | |
69 | | /// Check if tainted data is used as a custom sink's parameter. |
70 | | constexpr llvm::StringLiteral MsgCustomSink = |
71 | | "Untrusted data is passed to a user-defined sink"; |
72 | | |
73 | | using ArgIdxTy = int; |
74 | | using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; |
75 | | |
76 | | /// Denotes the return value. |
77 | | constexpr ArgIdxTy ReturnValueIndex{-1}; |
78 | | |
79 | 4.31k | static ArgIdxTy fromArgumentCount(unsigned Count) { |
80 | 4.31k | assert(Count <= |
81 | 4.31k | static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && |
82 | 4.31k | "ArgIdxTy is not large enough to represent the number of arguments."); |
83 | 4.31k | return Count; |
84 | 4.31k | } |
85 | | |
86 | | /// Check if the region the expression evaluates to is the standard input, |
87 | | /// and thus, is tainted. |
88 | | /// FIXME: Move this to Taint.cpp. |
89 | 6.02k | bool isStdin(SVal Val, const ASTContext &ACtx) { |
90 | | // FIXME: What if Val is NonParamVarRegion? |
91 | | |
92 | | // The region should be symbolic, we do not know it's value. |
93 | 6.02k | const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion()); |
94 | 6.02k | if (!SymReg) |
95 | 5.35k | return false; |
96 | | |
97 | | // Get it's symbol and find the declaration region it's pointing to. |
98 | 670 | const auto *DeclReg = |
99 | 670 | dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion()); |
100 | 670 | if (!DeclReg) |
101 | 185 | return false; |
102 | | |
103 | | // This region corresponds to a declaration, find out if it's a global/extern |
104 | | // variable named stdin with the proper type. |
105 | 485 | if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { |
106 | 485 | D = D->getCanonicalDecl(); |
107 | 485 | if (D->getName() == "stdin" && D->hasExternalStorage()66 && D->isExternC()66 ) { |
108 | 66 | const QualType FILETy = ACtx.getFILEType().getCanonicalType(); |
109 | 66 | const QualType Ty = D->getType().getCanonicalType(); |
110 | | |
111 | 66 | if (Ty->isPointerType()) |
112 | 66 | return Ty->getPointeeType() == FILETy; |
113 | 66 | } |
114 | 485 | } |
115 | 419 | return false; |
116 | 485 | } |
117 | | |
118 | 4.84k | SVal getPointeeOf(ProgramStateRef State, Loc LValue) { |
119 | 4.84k | const QualType ArgTy = LValue.getType(State->getStateManager().getContext()); |
120 | 4.84k | if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()4.75k ) |
121 | 4.80k | return State->getSVal(LValue); |
122 | | |
123 | | // Do not dereference void pointers. Treat them as byte pointers instead. |
124 | | // FIXME: we might want to consider more than just the first byte. |
125 | 36 | return State->getSVal(LValue, State->getStateManager().getContext().CharTy); |
126 | 4.84k | } |
127 | | |
128 | | /// Given a pointer/reference argument, return the value it refers to. |
129 | 7.82k | std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) { |
130 | 7.82k | if (auto LValue = Arg.getAs<Loc>()) |
131 | 4.84k | return getPointeeOf(State, *LValue); |
132 | 2.98k | return std::nullopt; |
133 | 7.82k | } |
134 | | |
135 | | /// Given a pointer, return the SVal of its pointee or if it is tainted, |
136 | | /// otherwise return the pointer's SVal if tainted. |
137 | | /// Also considers stdin as a taint source. |
138 | | std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State, |
139 | 6.92k | SVal Arg) { |
140 | 6.92k | if (auto Pointee = getPointeeOf(State, Arg)) |
141 | 3.94k | if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; |
142 | 1.24k | return Pointee; |
143 | | |
144 | 5.67k | if (isTainted(State, Arg)) |
145 | 524 | return Arg; |
146 | 5.15k | return std::nullopt; |
147 | 5.67k | } |
148 | | |
149 | 582 | bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) { |
150 | 582 | return getTaintedPointeeOrPointer(State, ExprSVal).has_value(); |
151 | 582 | } |
152 | | |
153 | | /// Helps in printing taint diagnostics. |
154 | | /// Marks the incoming parameters of a function interesting (to be printed) |
155 | | /// when the return value, or the outgoing parameters are tainted. |
156 | | const NoteTag *taintOriginTrackerTag(CheckerContext &C, |
157 | | std::vector<SymbolRef> TaintedSymbols, |
158 | | std::vector<ArgIdxTy> TaintedArgs, |
159 | 1.47k | const LocationContext *CallLocation) { |
160 | 1.47k | return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols), |
161 | 1.47k | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
162 | 1.98k | PathSensitiveBugReport &BR) -> std::string { |
163 | 1.98k | SmallString<256> Msg; |
164 | | // We give diagnostics only for taint related reports |
165 | 1.98k | if (!BR.isInteresting(CallLocation) || |
166 | 1.98k | BR.getBugType().getCategory() != categories::TaintedData309 ) { |
167 | 1.67k | return ""; |
168 | 1.67k | } |
169 | 309 | if (TaintedSymbols.empty()) |
170 | 239 | return "Taint originated here"; |
171 | | |
172 | 77 | for (auto Sym : TaintedSymbols)70 { |
173 | 77 | BR.markInteresting(Sym); |
174 | 77 | } |
175 | 70 | LLVM_DEBUG(for (auto Arg |
176 | 70 | : TaintedArgs) { |
177 | 70 | llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n"; |
178 | 70 | }); |
179 | 70 | return ""; |
180 | 309 | }); |
181 | 1.47k | } |
182 | | |
183 | | /// Helps in printing taint diagnostics. |
184 | | /// Marks the function interesting (to be printed) |
185 | | /// when the return value, or the outgoing parameters are tainted. |
186 | | const NoteTag *taintPropagationExplainerTag( |
187 | | CheckerContext &C, std::vector<SymbolRef> TaintedSymbols, |
188 | 1.18k | std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) { |
189 | 1.18k | assert(TaintedSymbols.size() == TaintedArgs.size()); |
190 | 1.18k | return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols), |
191 | 1.18k | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
192 | 1.85k | PathSensitiveBugReport &BR) -> std::string { |
193 | 1.85k | SmallString<256> Msg; |
194 | 1.85k | llvm::raw_svector_ostream Out(Msg); |
195 | | // We give diagnostics only for taint related reports |
196 | 1.85k | if (TaintedSymbols.empty() || |
197 | 1.85k | BR.getBugType().getCategory() != categories::TaintedData) { |
198 | 1.40k | return ""; |
199 | 1.40k | } |
200 | 444 | int nofTaintedArgs = 0; |
201 | 654 | for (auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) { |
202 | 654 | if (BR.isInteresting(Sym)) { |
203 | 313 | BR.markInteresting(CallLocation); |
204 | 313 | if (TaintedArgs[Idx] != ReturnValueIndex) { |
205 | 236 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " |
206 | 236 | << TaintedArgs[Idx] + 1 << "\n"); |
207 | 236 | if (nofTaintedArgs == 0) |
208 | 235 | Out << "Taint propagated to the "; |
209 | 1 | else |
210 | 1 | Out << ", "; |
211 | 236 | Out << TaintedArgs[Idx] + 1 |
212 | 236 | << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) << " argument"; |
213 | 236 | nofTaintedArgs++; |
214 | 236 | } else { |
215 | 77 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n"); |
216 | 77 | Out << "Taint propagated to the return value"; |
217 | 77 | } |
218 | 313 | } |
219 | 654 | } |
220 | 444 | return std::string(Out.str()); |
221 | 1.85k | }); |
222 | 1.18k | } |
223 | | |
224 | | /// ArgSet is used to describe arguments relevant for taint detection or |
225 | | /// taint application. A discrete set of argument indexes and a variadic |
226 | | /// argument list signified by a starting index are supported. |
227 | | class ArgSet { |
228 | | public: |
229 | 7.85k | ArgSet() = default; |
230 | | ArgSet(ArgVecTy &&DiscreteArgs, |
231 | | std::optional<ArgIdxTy> VariadicIndex = std::nullopt) |
232 | 5.94k | : DiscreteArgs(std::move(DiscreteArgs)), |
233 | 5.94k | VariadicIndex(std::move(VariadicIndex)) {} |
234 | | |
235 | 16.7k | bool contains(ArgIdxTy ArgIdx) const { |
236 | 16.7k | if (llvm::is_contained(DiscreteArgs, ArgIdx)) |
237 | 2.51k | return true; |
238 | | |
239 | 14.2k | return VariadicIndex && ArgIdx >= *VariadicIndex1.55k ; |
240 | 16.7k | } |
241 | | |
242 | 1.59k | bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex835 ; } |
243 | | |
244 | | private: |
245 | | ArgVecTy DiscreteArgs; |
246 | | std::optional<ArgIdxTy> VariadicIndex; |
247 | | }; |
248 | | |
249 | | /// A struct used to specify taint propagation rules for a function. |
250 | | /// |
251 | | /// If any of the possible taint source arguments is tainted, all of the |
252 | | /// destination arguments should also be tainted. If ReturnValueIndex is added |
253 | | /// to the dst list, the return value will be tainted. |
254 | | class GenericTaintRule { |
255 | | /// Arguments which are taints sinks and should be checked, and a report |
256 | | /// should be emitted if taint reaches these. |
257 | | ArgSet SinkArgs; |
258 | | /// Arguments which should be sanitized on function return. |
259 | | ArgSet FilterArgs; |
260 | | /// Arguments which can participate in taint propagation. If any of the |
261 | | /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should |
262 | | /// be tainted. |
263 | | ArgSet PropSrcArgs; |
264 | | ArgSet PropDstArgs; |
265 | | |
266 | | /// A message that explains why the call is sensitive to taint. |
267 | | std::optional<StringRef> SinkMsg; |
268 | | |
269 | | GenericTaintRule() = default; |
270 | | |
271 | | GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, |
272 | | std::optional<StringRef> SinkMsg = std::nullopt) |
273 | 3.45k | : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), |
274 | 3.45k | PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), |
275 | 3.45k | SinkMsg(SinkMsg) {} |
276 | | |
277 | | public: |
278 | | /// Make a rule that reports a warning if taint reaches any of \p FilterArgs |
279 | | /// arguments. |
280 | | static GenericTaintRule Sink(ArgSet &&SinkArgs, |
281 | 450 | std::optional<StringRef> Msg = std::nullopt) { |
282 | 450 | return {std::move(SinkArgs), {}, {}, {}, Msg}; |
283 | 450 | } |
284 | | |
285 | | /// Make a rule that sanitizes all FilterArgs arguments. |
286 | 12 | static GenericTaintRule Filter(ArgSet &&FilterArgs) { |
287 | 12 | return {{}, std::move(FilterArgs), {}, {}}; |
288 | 12 | } |
289 | | |
290 | | /// Make a rule that unconditionally taints all Args. |
291 | | /// If Func is provided, it must also return true for taint to propagate. |
292 | 623 | static GenericTaintRule Source(ArgSet &&SourceArgs) { |
293 | 623 | return {{}, {}, {}, std::move(SourceArgs)}; |
294 | 623 | } |
295 | | |
296 | | /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. |
297 | 2.23k | static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { |
298 | 2.23k | return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; |
299 | 2.23k | } |
300 | | |
301 | | /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. |
302 | | static GenericTaintRule |
303 | | SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs, |
304 | 130 | std::optional<StringRef> Msg = std::nullopt) { |
305 | 130 | return { |
306 | 130 | std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; |
307 | 130 | } |
308 | | |
309 | | /// Process a function which could either be a taint source, a taint sink, a |
310 | | /// taint filter or a taint propagator. |
311 | | void process(const GenericTaintChecker &Checker, const CallEvent &Call, |
312 | | CheckerContext &C) const; |
313 | | |
314 | | /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. |
315 | 19.5k | static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { |
316 | 19.5k | return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()6.25k |
317 | 19.5k | : Call.getArgExpr(ArgIdx)13.3k ; |
318 | 19.5k | }; |
319 | | |
320 | | /// Functions for custom taintedness propagation. |
321 | | static bool UntrustedEnv(CheckerContext &C); |
322 | | }; |
323 | | |
324 | | using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; |
325 | | |
326 | | /// Used to parse the configuration file. |
327 | | struct TaintConfiguration { |
328 | | using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; |
329 | | enum class VariadicType { None, Src, Dst }; |
330 | | |
331 | | struct Common { |
332 | | std::string Name; |
333 | | std::string Scope; |
334 | | }; |
335 | | |
336 | | struct Sink : Common { |
337 | | ArgVecTy SinkArgs; |
338 | | }; |
339 | | |
340 | | struct Filter : Common { |
341 | | ArgVecTy FilterArgs; |
342 | | }; |
343 | | |
344 | | struct Propagation : Common { |
345 | | ArgVecTy SrcArgs; |
346 | | ArgVecTy DstArgs; |
347 | | VariadicType VarType; |
348 | | ArgIdxTy VarIndex; |
349 | | }; |
350 | | |
351 | | std::vector<Propagation> Propagations; |
352 | | std::vector<Filter> Filters; |
353 | | std::vector<Sink> Sinks; |
354 | | |
355 | 5 | TaintConfiguration() = default; |
356 | | TaintConfiguration(const TaintConfiguration &) = default; |
357 | 4 | TaintConfiguration(TaintConfiguration &&) = default; |
358 | | TaintConfiguration &operator=(const TaintConfiguration &) = default; |
359 | | TaintConfiguration &operator=(TaintConfiguration &&) = default; |
360 | | }; |
361 | | |
362 | | struct GenericTaintRuleParser { |
363 | 26 | GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} |
364 | | /// Container type used to gather call identification objects grouped into |
365 | | /// pairs with their corresponding taint rules. It is temporary as it is used |
366 | | /// to finally initialize RuleLookupTy, which is considered to be immutable. |
367 | | using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; |
368 | | RulesContTy parseConfiguration(const std::string &Option, |
369 | | TaintConfiguration &&Config) const; |
370 | | |
371 | | private: |
372 | | using NamePartsTy = llvm::SmallVector<StringRef, 2>; |
373 | | |
374 | | /// Validate part of the configuration, which contains a list of argument |
375 | | /// indexes. |
376 | | void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; |
377 | | |
378 | | template <typename Config> static NamePartsTy parseNameParts(const Config &C); |
379 | | |
380 | | // Takes the config and creates a CallDescription for it and associates a Rule |
381 | | // with that. |
382 | | template <typename Config> |
383 | | static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, |
384 | | RulesContTy &Rules); |
385 | | |
386 | | void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, |
387 | | RulesContTy &Rules) const; |
388 | | void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, |
389 | | RulesContTy &Rules) const; |
390 | | void parseConfig(const std::string &Option, |
391 | | TaintConfiguration::Propagation &&P, |
392 | | RulesContTy &Rules) const; |
393 | | |
394 | | CheckerManager &Mgr; |
395 | | }; |
396 | | |
397 | | class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { |
398 | | public: |
399 | | void checkPreCall(const CallEvent &Call, CheckerContext &C) const; |
400 | | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
401 | | |
402 | | void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, |
403 | | const char *Sep) const override; |
404 | | |
405 | | /// Generate a report if the expression is tainted or points to tainted data. |
406 | | bool generateReportIfTainted(const Expr *E, StringRef Msg, |
407 | | CheckerContext &C) const; |
408 | | |
409 | | private: |
410 | | const BugType BT{this, "Use of Untrusted Data", categories::TaintedData}; |
411 | | |
412 | | bool checkUncontrolledFormatString(const CallEvent &Call, |
413 | | CheckerContext &C) const; |
414 | | |
415 | | void taintUnsafeSocketProtocol(const CallEvent &Call, |
416 | | CheckerContext &C) const; |
417 | | |
418 | | /// Default taint rules are initalized with the help of a CheckerContext to |
419 | | /// access the names of built-in functions like memcpy. |
420 | | void initTaintRules(CheckerContext &C) const; |
421 | | |
422 | | /// CallDescription currently cannot restrict matches to the global namespace |
423 | | /// only, which is why multiple CallDescriptionMaps are used, as we want to |
424 | | /// disambiguate global C functions from functions inside user-defined |
425 | | /// namespaces. |
426 | | // TODO: Remove separation to simplify matching logic once CallDescriptions |
427 | | // are more expressive. |
428 | | |
429 | | mutable std::optional<RuleLookupTy> StaticTaintRules; |
430 | | mutable std::optional<RuleLookupTy> DynamicTaintRules; |
431 | | }; |
432 | | } // end of anonymous namespace |
433 | | |
434 | | /// YAML serialization mapping. |
435 | | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) |
436 | | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) |
437 | | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) |
438 | | |
439 | | namespace llvm { |
440 | | namespace yaml { |
441 | | template <> struct MappingTraits<TaintConfiguration> { |
442 | 5 | static void mapping(IO &IO, TaintConfiguration &Config) { |
443 | 5 | IO.mapOptional("Propagations", Config.Propagations); |
444 | 5 | IO.mapOptional("Filters", Config.Filters); |
445 | 5 | IO.mapOptional("Sinks", Config.Sinks); |
446 | 5 | } |
447 | | }; |
448 | | |
449 | | template <> struct MappingTraits<TaintConfiguration::Sink> { |
450 | 9 | static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { |
451 | 9 | IO.mapRequired("Name", Sink.Name); |
452 | 9 | IO.mapOptional("Scope", Sink.Scope); |
453 | 9 | IO.mapRequired("Args", Sink.SinkArgs); |
454 | 9 | } |
455 | | }; |
456 | | |
457 | | template <> struct MappingTraits<TaintConfiguration::Filter> { |
458 | 12 | static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { |
459 | 12 | IO.mapRequired("Name", Filter.Name); |
460 | 12 | IO.mapOptional("Scope", Filter.Scope); |
461 | 12 | IO.mapRequired("Args", Filter.FilterArgs); |
462 | 12 | } |
463 | | }; |
464 | | |
465 | | template <> struct MappingTraits<TaintConfiguration::Propagation> { |
466 | 26 | static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { |
467 | 26 | IO.mapRequired("Name", Propagation.Name); |
468 | 26 | IO.mapOptional("Scope", Propagation.Scope); |
469 | 26 | IO.mapOptional("SrcArgs", Propagation.SrcArgs); |
470 | 26 | IO.mapOptional("DstArgs", Propagation.DstArgs); |
471 | 26 | IO.mapOptional("VariadicType", Propagation.VarType); |
472 | 26 | IO.mapOptional("VariadicIndex", Propagation.VarIndex); |
473 | 26 | } |
474 | | }; |
475 | | |
476 | | template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { |
477 | 9 | static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { |
478 | 9 | IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None); |
479 | 9 | IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src); |
480 | 9 | IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst); |
481 | 9 | } |
482 | | }; |
483 | | } // namespace yaml |
484 | | } // namespace llvm |
485 | | |
486 | | /// A set which is used to pass information from call pre-visit instruction |
487 | | /// to the call post-visit. The values are signed integers, which are either |
488 | | /// ReturnValueIndex, or indexes of the pointer/reference argument, which |
489 | | /// points to data, which should be tainted on return. |
490 | | REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, |
491 | | ImmutableSet<ArgIdxTy>) |
492 | | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) |
493 | | |
494 | | void GenericTaintRuleParser::validateArgVector(const std::string &Option, |
495 | 71 | const ArgVecTy &Args) const { |
496 | 71 | for (ArgIdxTy Arg : Args) { |
497 | 52 | if (Arg < ReturnValueIndex) { |
498 | 1 | Mgr.reportInvalidCheckerOptionValue( |
499 | 1 | Mgr.getChecker<GenericTaintChecker>(), Option, |
500 | 1 | "an argument number for propagation rules greater or equal to -1"); |
501 | 1 | } |
502 | 52 | } |
503 | 71 | } |
504 | | |
505 | | template <typename Config> |
506 | | GenericTaintRuleParser::NamePartsTy |
507 | 46 | GenericTaintRuleParser::parseNameParts(const Config &C) { |
508 | 46 | NamePartsTy NameParts; |
509 | 46 | if (!C.Scope.empty()) { |
510 | | // If the Scope argument contains multiple "::" parts, those are considered |
511 | | // namespace identifiers. |
512 | 21 | StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1, |
513 | 21 | /*KeepEmpty*/ false); |
514 | 21 | } |
515 | 46 | NameParts.emplace_back(C.Name); |
516 | 46 | return NameParts; |
517 | 46 | } GenericTaintChecker.cpp:llvm::SmallVector<llvm::StringRef, 2u> (anonymous namespace)::GenericTaintRuleParser::parseNameParts<(anonymous namespace)::TaintConfiguration::Filter>((anonymous namespace)::TaintConfiguration::Filter const&) Line | Count | Source | 507 | 12 | GenericTaintRuleParser::parseNameParts(const Config &C) { | 508 | 12 | NamePartsTy NameParts; | 509 | 12 | if (!C.Scope.empty()) { | 510 | | // If the Scope argument contains multiple "::" parts, those are considered | 511 | | // namespace identifiers. | 512 | 6 | StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1, | 513 | 6 | /*KeepEmpty*/ false); | 514 | 6 | } | 515 | 12 | NameParts.emplace_back(C.Name); | 516 | 12 | return NameParts; | 517 | 12 | } |
GenericTaintChecker.cpp:llvm::SmallVector<llvm::StringRef, 2u> (anonymous namespace)::GenericTaintRuleParser::parseNameParts<(anonymous namespace)::TaintConfiguration::Sink>((anonymous namespace)::TaintConfiguration::Sink const&) Line | Count | Source | 507 | 9 | GenericTaintRuleParser::parseNameParts(const Config &C) { | 508 | 9 | NamePartsTy NameParts; | 509 | 9 | if (!C.Scope.empty()) { | 510 | | // If the Scope argument contains multiple "::" parts, those are considered | 511 | | // namespace identifiers. | 512 | 6 | StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1, | 513 | 6 | /*KeepEmpty*/ false); | 514 | 6 | } | 515 | 9 | NameParts.emplace_back(C.Name); | 516 | 9 | return NameParts; | 517 | 9 | } |
GenericTaintChecker.cpp:llvm::SmallVector<llvm::StringRef, 2u> (anonymous namespace)::GenericTaintRuleParser::parseNameParts<(anonymous namespace)::TaintConfiguration::Propagation>((anonymous namespace)::TaintConfiguration::Propagation const&) Line | Count | Source | 507 | 25 | GenericTaintRuleParser::parseNameParts(const Config &C) { | 508 | 25 | NamePartsTy NameParts; | 509 | 25 | if (!C.Scope.empty()) { | 510 | | // If the Scope argument contains multiple "::" parts, those are considered | 511 | | // namespace identifiers. | 512 | 9 | StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1, | 513 | 9 | /*KeepEmpty*/ false); | 514 | 9 | } | 515 | 25 | NameParts.emplace_back(C.Name); | 516 | 25 | return NameParts; | 517 | 25 | } |
|
518 | | |
519 | | template <typename Config> |
520 | | void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, |
521 | | GenericTaintRule &&Rule, |
522 | 46 | RulesContTy &Rules) { |
523 | 46 | NamePartsTy NameParts = parseNameParts(C); |
524 | 46 | Rules.emplace_back(CallDescription(NameParts), std::move(Rule)); |
525 | 46 | } GenericTaintChecker.cpp:void (anonymous namespace)::GenericTaintRuleParser::consumeRulesFromConfig<(anonymous namespace)::TaintConfiguration::Filter>((anonymous namespace)::TaintConfiguration::Filter const&, (anonymous namespace)::GenericTaintRule&&, std::__1::vector<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule>, std::__1::allocator<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule> > >&) Line | Count | Source | 522 | 12 | RulesContTy &Rules) { | 523 | 12 | NamePartsTy NameParts = parseNameParts(C); | 524 | 12 | Rules.emplace_back(CallDescription(NameParts), std::move(Rule)); | 525 | 12 | } |
GenericTaintChecker.cpp:void (anonymous namespace)::GenericTaintRuleParser::consumeRulesFromConfig<(anonymous namespace)::TaintConfiguration::Sink>((anonymous namespace)::TaintConfiguration::Sink const&, (anonymous namespace)::GenericTaintRule&&, std::__1::vector<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule>, std::__1::allocator<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule> > >&) Line | Count | Source | 522 | 9 | RulesContTy &Rules) { | 523 | 9 | NamePartsTy NameParts = parseNameParts(C); | 524 | 9 | Rules.emplace_back(CallDescription(NameParts), std::move(Rule)); | 525 | 9 | } |
GenericTaintChecker.cpp:void (anonymous namespace)::GenericTaintRuleParser::consumeRulesFromConfig<(anonymous namespace)::TaintConfiguration::Propagation>((anonymous namespace)::TaintConfiguration::Propagation const&, (anonymous namespace)::GenericTaintRule&&, std::__1::vector<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule>, std::__1::allocator<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule> > >&) Line | Count | Source | 522 | 25 | RulesContTy &Rules) { | 523 | 25 | NamePartsTy NameParts = parseNameParts(C); | 524 | 25 | Rules.emplace_back(CallDescription(NameParts), std::move(Rule)); | 525 | 25 | } |
|
526 | | |
527 | | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
528 | | TaintConfiguration::Sink &&S, |
529 | 9 | RulesContTy &Rules) const { |
530 | 9 | validateArgVector(Option, S.SinkArgs); |
531 | 9 | consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)), |
532 | 9 | Rules); |
533 | 9 | } |
534 | | |
535 | | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
536 | | TaintConfiguration::Filter &&S, |
537 | 12 | RulesContTy &Rules) const { |
538 | 12 | validateArgVector(Option, S.FilterArgs); |
539 | 12 | consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)), |
540 | 12 | Rules); |
541 | 12 | } |
542 | | |
543 | | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
544 | | TaintConfiguration::Propagation &&P, |
545 | 25 | RulesContTy &Rules) const { |
546 | 25 | validateArgVector(Option, P.SrcArgs); |
547 | 25 | validateArgVector(Option, P.DstArgs); |
548 | 25 | bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; |
549 | 25 | bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; |
550 | 25 | std::optional<ArgIdxTy> JustVarIndex = P.VarIndex; |
551 | | |
552 | 25 | ArgSet SrcDesc(std::move(P.SrcArgs), |
553 | 25 | IsSrcVariadic ? JustVarIndex3 : std::nullopt22 ); |
554 | 25 | ArgSet DstDesc(std::move(P.DstArgs), |
555 | 25 | IsDstVariadic ? JustVarIndex6 : std::nullopt19 ); |
556 | | |
557 | 25 | consumeRulesFromConfig( |
558 | 25 | P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); |
559 | 25 | } |
560 | | |
561 | | GenericTaintRuleParser::RulesContTy |
562 | | GenericTaintRuleParser::parseConfiguration(const std::string &Option, |
563 | 4 | TaintConfiguration &&Config) const { |
564 | | |
565 | 4 | RulesContTy Rules; |
566 | | |
567 | 4 | for (auto &F : Config.Filters) |
568 | 12 | parseConfig(Option, std::move(F), Rules); |
569 | | |
570 | 4 | for (auto &S : Config.Sinks) |
571 | 9 | parseConfig(Option, std::move(S), Rules); |
572 | | |
573 | 4 | for (auto &P : Config.Propagations) |
574 | 25 | parseConfig(Option, std::move(P), Rules); |
575 | | |
576 | 4 | return Rules; |
577 | 4 | } |
578 | | |
579 | 2.72k | void GenericTaintChecker::initTaintRules(CheckerContext &C) const { |
580 | | // Check for exact name match for functions without builtin substitutes. |
581 | | // Use qualified name, because these are C functions without namespace. |
582 | | |
583 | 2.72k | if (StaticTaintRules || DynamicTaintRules26 ) |
584 | 2.70k | return; |
585 | | |
586 | 26 | using RulesConstructionTy = |
587 | 26 | std::vector<std::pair<CallDescription, GenericTaintRule>>; |
588 | 26 | using TR = GenericTaintRule; |
589 | | |
590 | 26 | const Builtin::Context &BI = C.getASTContext().BuiltinInfo; |
591 | | |
592 | 26 | RulesConstructionTy GlobalCRules{ |
593 | | // Sources |
594 | 26 | {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})}, |
595 | 26 | {{{"fopen"}}, TR::Source({{ReturnValueIndex}})}, |
596 | 26 | {{{"freopen"}}, TR::Source({{ReturnValueIndex}})}, |
597 | 26 | {{{"getch"}}, TR::Source({{ReturnValueIndex}})}, |
598 | 26 | {{{"getchar"}}, TR::Source({{ReturnValueIndex}})}, |
599 | 26 | {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, |
600 | 26 | {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})}, |
601 | 26 | {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})}, |
602 | 26 | {{{"scanf"}}, TR::Source({{}, 1})}, |
603 | 26 | {{{"scanf_s"}}, TR::Source({{}, {1}})}, |
604 | 26 | {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})}, |
605 | | // Sometimes the line between taint sources and propagators is blurry. |
606 | | // _IO_getc is choosen to be a source, but could also be a propagator. |
607 | | // This way it is simpler, as modeling it as a propagator would require |
608 | | // to model the possible sources of _IO_FILE * values, which the _IO_getc |
609 | | // function takes as parameters. |
610 | 26 | {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, |
611 | 26 | {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, |
612 | 26 | {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, |
613 | 26 | {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, |
614 | 26 | {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, |
615 | 26 | {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})}, |
616 | 26 | {{{"gethostname"}}, TR::Source({{0}})}, |
617 | 26 | {{{"getnameinfo"}}, TR::Source({{2, 4}})}, |
618 | 26 | {{{"getseuserbyname"}}, TR::Source({{1, 2}})}, |
619 | 26 | {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, |
620 | 26 | {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})}, |
621 | 26 | {{{"getlogin_r"}}, TR::Source({{0}})}, |
622 | | |
623 | | // Props |
624 | 26 | {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
625 | 26 | {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
626 | 26 | {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
627 | 26 | {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
628 | 26 | {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
629 | 26 | {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
630 | 26 | {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, |
631 | 26 | {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, |
632 | 26 | {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, |
633 | 26 | {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})}, |
634 | 26 | {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, |
635 | | |
636 | 26 | {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
637 | 26 | {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
638 | 26 | {{{"getdelim"}}, TR::Prop({{3}}, {{0}})}, |
639 | 26 | {{{"getline"}}, TR::Prop({{2}}, {{0}})}, |
640 | 26 | {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
641 | 26 | {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, |
642 | 26 | {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, |
643 | 26 | {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
644 | 26 | {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
645 | 26 | {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
646 | 26 | {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
647 | 26 | {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, |
648 | 26 | {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
649 | 26 | {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
650 | | |
651 | 26 | {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
652 | 26 | {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
653 | | |
654 | 26 | {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
655 | 26 | {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
656 | 26 | {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, |
657 | 26 | {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
658 | 26 | {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
659 | 26 | {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
660 | | |
661 | 26 | {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
662 | 26 | {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
663 | 26 | {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
664 | | |
665 | 26 | {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
666 | 26 | {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
667 | 26 | {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
668 | | // If memmem was called with a tainted needle and the search was |
669 | | // successful, that would mean that the value pointed by the return value |
670 | | // has the same content as the needle. If we choose to go by the policy of |
671 | | // content equivalence implies taintedness equivalence, that would mean |
672 | | // haystack should be considered a propagation source argument. |
673 | 26 | {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
674 | | |
675 | | // The comment for memmem above also applies to strstr. |
676 | 26 | {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
677 | 26 | {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
678 | | |
679 | 26 | {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
680 | | |
681 | 26 | {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
682 | 26 | {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
683 | | |
684 | | // FIXME: In case of arrays, only the first element of the array gets |
685 | | // tainted. |
686 | 26 | {{{"qsort"}}, TR::Prop({{0}}, {{0}})}, |
687 | 26 | {{{"qsort_r"}}, TR::Prop({{0}}, {{0}})}, |
688 | | |
689 | 26 | {{{"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
690 | 26 | {{{"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
691 | 26 | {{{"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, |
692 | 26 | {{{"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, |
693 | 26 | {{{"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
694 | 26 | {{{"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
695 | 26 | {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
696 | 26 | {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
697 | 26 | {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
698 | | |
699 | | // strlen, wcslen, strnlen and alike intentionally don't propagate taint. |
700 | | // See the details here: https://github.com/llvm/llvm-project/pull/66086 |
701 | | |
702 | 26 | {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
703 | 26 | {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
704 | 26 | {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
705 | 26 | {{{"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
706 | | |
707 | 26 | {{{"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
708 | 26 | {{{"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
709 | 26 | {{{"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
710 | 26 | {{{"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
711 | 26 | {{{"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
712 | 26 | {{{"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
713 | 26 | {{{"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
714 | 26 | {{{"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
715 | 26 | {{{"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
716 | 26 | {{{"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
717 | 26 | {{{"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
718 | 26 | {{{"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
719 | 26 | {{{"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
720 | | |
721 | 26 | {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}}, |
722 | 26 | TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, |
723 | 26 | {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}}, |
724 | 26 | TR::Prop({{1, 2}}, {{0}})}, |
725 | 26 | {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}}, |
726 | 26 | TR::Prop({{1, 2}}, {{0}})}, |
727 | 26 | {{CDF_MaybeBuiltin, {{"snprintf"}}}, |
728 | 26 | TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, |
729 | 26 | {{CDF_MaybeBuiltin, {{"sprintf"}}}, |
730 | 26 | TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, |
731 | 26 | {{CDF_MaybeBuiltin, {{"strcpy"}}}, |
732 | 26 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
733 | 26 | {{CDF_MaybeBuiltin, {{"stpcpy"}}}, |
734 | 26 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
735 | 26 | {{CDF_MaybeBuiltin, {{"strcat"}}}, |
736 | 26 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
737 | 26 | {{CDF_MaybeBuiltin, {{"wcsncat"}}}, |
738 | 26 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
739 | 26 | {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
740 | 26 | {{CDF_MaybeBuiltin, {{"strdupa"}}}, |
741 | 26 | TR::Prop({{0}}, {{ReturnValueIndex}})}, |
742 | 26 | {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
743 | | |
744 | | // Sinks |
745 | 26 | {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
746 | 26 | {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
747 | 26 | {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
748 | 26 | {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
749 | 26 | {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
750 | 26 | {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
751 | 26 | {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
752 | 26 | {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
753 | 26 | {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
754 | 26 | {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, |
755 | 26 | {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, |
756 | 26 | {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, |
757 | 26 | {{CDF_MaybeBuiltin, {{"memccpy"}}}, |
758 | 26 | TR::Sink({{3}}, MsgTaintedBufferSize)}, |
759 | 26 | {{CDF_MaybeBuiltin, {{"realloc"}}}, |
760 | 26 | TR::Sink({{1}}, MsgTaintedBufferSize)}, |
761 | 26 | {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, |
762 | 26 | {{{{"setproctitle_fast"}}}, |
763 | 26 | TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, |
764 | | |
765 | | // SinkProps |
766 | 26 | {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)}, |
767 | 26 | TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, |
768 | 26 | MsgTaintedBufferSize)}, |
769 | 26 | {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}}, |
770 | 26 | TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, |
771 | 26 | MsgTaintedBufferSize)}, |
772 | 26 | {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}}, |
773 | 26 | TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, |
774 | 26 | MsgTaintedBufferSize)}, |
775 | 26 | {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}}, |
776 | 26 | TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}}, |
777 | 26 | MsgTaintedBufferSize)}, |
778 | 26 | {{CDF_MaybeBuiltin, {{"bcopy"}}}, |
779 | 26 | TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}}; |
780 | | |
781 | | // `getenv` returns taint only in untrusted environments. |
782 | 26 | if (TR::UntrustedEnv(C)) { |
783 | | // void setproctitle_init(int argc, char *argv[], char *envp[]) |
784 | 25 | GlobalCRules.push_back( |
785 | 25 | {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)}); |
786 | 25 | GlobalCRules.push_back({{{"getenv"}}, TR::Source({{ReturnValueIndex}})}); |
787 | 25 | } |
788 | | |
789 | 26 | StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), |
790 | 26 | std::make_move_iterator(GlobalCRules.end())); |
791 | | |
792 | | // User-provided taint configuration. |
793 | 26 | CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); |
794 | 26 | assert(Mgr); |
795 | 26 | GenericTaintRuleParser ConfigParser{*Mgr}; |
796 | 26 | std::string Option{"Config"}; |
797 | 26 | StringRef ConfigFile = |
798 | 26 | Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option); |
799 | 26 | std::optional<TaintConfiguration> Config = |
800 | 26 | getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile); |
801 | 26 | if (!Config) { |
802 | | // We don't have external taint config, no parsing required. |
803 | 22 | DynamicTaintRules = RuleLookupTy{}; |
804 | 22 | return; |
805 | 22 | } |
806 | | |
807 | 4 | GenericTaintRuleParser::RulesContTy Rules{ |
808 | 4 | ConfigParser.parseConfiguration(Option, std::move(*Config))}; |
809 | | |
810 | 4 | DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), |
811 | 4 | std::make_move_iterator(Rules.end())); |
812 | 4 | } |
813 | | |
814 | | void GenericTaintChecker::checkPreCall(const CallEvent &Call, |
815 | 2.72k | CheckerContext &C) const { |
816 | 2.72k | initTaintRules(C); |
817 | | |
818 | | // FIXME: this should be much simpler. |
819 | 2.72k | if (const auto *Rule = |
820 | 2.72k | Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) |
821 | 1.54k | Rule->process(*this, Call, C); |
822 | 1.18k | else if (const auto *Rule = DynamicTaintRules->lookup(Call)) |
823 | 50 | Rule->process(*this, Call, C); |
824 | | |
825 | | // FIXME: These edge cases are to be eliminated from here eventually. |
826 | | // |
827 | | // Additional check that is not supported by CallDescription. |
828 | | // TODO: Make CallDescription be able to match attributes such as printf-like |
829 | | // arguments. |
830 | 2.72k | checkUncontrolledFormatString(Call, C); |
831 | | |
832 | | // TODO: Modeling sockets should be done in a specific checker. |
833 | | // Socket is a source, which taints the return value. |
834 | 2.72k | taintUnsafeSocketProtocol(Call, C); |
835 | 2.72k | } |
836 | | |
837 | | void GenericTaintChecker::checkPostCall(const CallEvent &Call, |
838 | 2.73k | CheckerContext &C) const { |
839 | | // Set the marked values as tainted. The return value only accessible from |
840 | | // checkPostStmt. |
841 | 2.73k | ProgramStateRef State = C.getState(); |
842 | 2.73k | const StackFrameContext *CurrentFrame = C.getStackFrame(); |
843 | | |
844 | | // Depending on what was tainted at pre-visit, we determined a set of |
845 | | // arguments which should be tainted after the function returns. These are |
846 | | // stored in the state as TaintArgsOnPostVisit set. |
847 | 2.73k | TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); |
848 | | |
849 | 2.73k | const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame); |
850 | 2.73k | if (!TaintArgs) |
851 | 1.55k | return; |
852 | 1.18k | assert(!TaintArgs->isEmpty()); |
853 | | |
854 | 1.18k | LLVM_DEBUG(for (ArgIdxTy I |
855 | 1.18k | : *TaintArgs) { |
856 | 1.18k | llvm::dbgs() << "PostCall<"; |
857 | 1.18k | Call.dump(llvm::dbgs()); |
858 | 1.18k | llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; |
859 | 1.18k | }); |
860 | | |
861 | 1.18k | const NoteTag *InjectionTag = nullptr; |
862 | 1.18k | std::vector<SymbolRef> TaintedSymbols; |
863 | 1.18k | std::vector<ArgIdxTy> TaintedIndexes; |
864 | 1.63k | for (ArgIdxTy ArgNum : *TaintArgs) { |
865 | | // Special handling for the tainted return value. |
866 | 1.63k | if (ArgNum == ReturnValueIndex) { |
867 | 740 | State = addTaint(State, Call.getReturnValue()); |
868 | 740 | std::vector<SymbolRef> TaintedSyms = |
869 | 740 | getTaintedSymbols(State, Call.getReturnValue()); |
870 | 740 | if (!TaintedSyms.empty()) { |
871 | 739 | TaintedSymbols.push_back(TaintedSyms[0]); |
872 | 739 | TaintedIndexes.push_back(ArgNum); |
873 | 739 | } |
874 | 740 | continue; |
875 | 740 | } |
876 | | // The arguments are pointer arguments. The data they are pointing at is |
877 | | // tainted after the call. |
878 | 898 | if (auto V = getPointeeOf(State, Call.getArgSVal(ArgNum))) { |
879 | 888 | State = addTaint(State, *V); |
880 | 888 | std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, *V); |
881 | 888 | if (!TaintedSyms.empty()) { |
882 | 787 | TaintedSymbols.push_back(TaintedSyms[0]); |
883 | 787 | TaintedIndexes.push_back(ArgNum); |
884 | 787 | } |
885 | 888 | } |
886 | 898 | } |
887 | | // Create a NoteTag callback, which prints to the user where the taintedness |
888 | | // was propagated to. |
889 | 1.18k | InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes, |
890 | 1.18k | Call.getCalleeStackFrame(0)); |
891 | | // Clear up the taint info from the state. |
892 | 1.18k | State = State->remove<TaintArgsOnPostVisit>(CurrentFrame); |
893 | 1.18k | C.addTransition(State, InjectionTag); |
894 | 1.18k | } |
895 | | |
896 | | void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, |
897 | 2 | const char *NL, const char *Sep) const { |
898 | 2 | printTaint(State, Out, NL, Sep); |
899 | 2 | } |
900 | | |
901 | | void GenericTaintRule::process(const GenericTaintChecker &Checker, |
902 | 1.59k | const CallEvent &Call, CheckerContext &C) const { |
903 | 1.59k | ProgramStateRef State = C.getState(); |
904 | 1.59k | const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); |
905 | | |
906 | | /// Iterate every call argument, and get their corresponding Expr and SVal. |
907 | 6.25k | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { |
908 | 25.8k | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I19.5k ) { |
909 | 19.5k | const Expr *E = GetArgExpr(I, Call); |
910 | 19.5k | Fun(I, E, C.getSVal(E)); |
911 | 19.5k | } |
912 | 6.25k | }; GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_1>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_1&&) const Line | Count | Source | 907 | 1.59k | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { | 908 | 6.61k | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I5.02k ) { | 909 | 5.02k | const Expr *E = GetArgExpr(I, Call); | 910 | 5.02k | Fun(I, E, C.getSVal(E)); | 911 | 5.02k | } | 912 | 1.59k | }; |
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_2>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_2&&) const Line | Count | Source | 907 | 1.59k | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { | 908 | 6.61k | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I5.02k ) { | 909 | 5.02k | const Expr *E = GetArgExpr(I, Call); | 910 | 5.02k | Fun(I, E, C.getSVal(E)); | 911 | 5.02k | } | 912 | 1.59k | }; |
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_3>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_3&&) const Line | Count | Source | 907 | 1.59k | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { | 908 | 6.61k | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I5.02k ) { | 909 | 5.02k | const Expr *E = GetArgExpr(I, Call); | 910 | 5.02k | Fun(I, E, C.getSVal(E)); | 911 | 5.02k | } | 912 | 1.59k | }; |
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_4>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_4&&) const Line | Count | Source | 907 | 1.47k | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { | 908 | 5.99k | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I4.51k ) { | 909 | 4.51k | const Expr *E = GetArgExpr(I, Call); | 910 | 4.51k | Fun(I, E, C.getSVal(E)); | 911 | 4.51k | } | 912 | 1.47k | }; |
|
913 | | |
914 | | /// Check for taint sinks. |
915 | 5.02k | ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { |
916 | | // Add taintedness to stdin parameters |
917 | 5.02k | if (isStdin(C.getSVal(E), C.getASTContext())) { |
918 | 33 | State = addTaint(State, C.getSVal(E)); |
919 | 33 | } |
920 | 5.02k | if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State, C.getSVal(E))582 ) |
921 | 300 | Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C); |
922 | 5.02k | }); |
923 | | |
924 | | /// Check for taint filters. |
925 | 5.02k | ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) { |
926 | 5.02k | if (FilterArgs.contains(I)) { |
927 | 7 | State = removeTaint(State, S); |
928 | 7 | if (auto P = getPointeeOf(State, S)) |
929 | 7 | State = removeTaint(State, *P); |
930 | 7 | } |
931 | 5.02k | }); |
932 | | |
933 | | /// Check for taint propagation sources. |
934 | | /// A rule will make the destination variables tainted if PropSrcArgs |
935 | | /// is empty (taints the destination |
936 | | /// arguments unconditionally), or if any of its signified |
937 | | /// args are tainted in context of the current CallEvent. |
938 | 1.59k | bool IsMatching = PropSrcArgs.isEmpty(); |
939 | 1.59k | std::vector<SymbolRef> TaintedSymbols; |
940 | 1.59k | std::vector<ArgIdxTy> TaintedIndexes; |
941 | 1.59k | ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, |
942 | 5.02k | &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { |
943 | 5.02k | std::optional<SVal> TaintedSVal = |
944 | 5.02k | getTaintedPointeeOrPointer(State, C.getSVal(E)); |
945 | 5.02k | IsMatching = |
946 | 5.02k | IsMatching || (2.16k PropSrcArgs.contains(I)2.16k && TaintedSVal.has_value()953 ); |
947 | | |
948 | | // We track back tainted arguments except for stdin |
949 | 5.02k | if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())999 ) { |
950 | 966 | std::vector<SymbolRef> TaintedArgSyms = |
951 | 966 | getTaintedSymbols(State, *TaintedSVal); |
952 | 966 | if (!TaintedArgSyms.empty()) { |
953 | 966 | llvm::append_range(TaintedSymbols, TaintedArgSyms); |
954 | 966 | TaintedIndexes.push_back(I); |
955 | 966 | } |
956 | 966 | } |
957 | 5.02k | }); |
958 | | |
959 | | // Early return for propagation rules which dont match. |
960 | | // Matching propagations, Sinks and Filters will pass this point. |
961 | 1.59k | if (!IsMatching) |
962 | 119 | return; |
963 | | |
964 | 4.51k | const auto WouldEscape = [](SVal V, QualType Ty) -> bool 1.47k { |
965 | 4.51k | if (!isa<Loc>(V)) |
966 | 2.32k | return false; |
967 | | |
968 | 2.19k | const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified()0 ; |
969 | 2.19k | const bool IsNonConstPtr = |
970 | 2.19k | Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); |
971 | | |
972 | 2.19k | return IsNonConstRef || IsNonConstPtr; |
973 | 4.51k | }; |
974 | | |
975 | | /// Propagate taint where it is necessary. |
976 | 1.47k | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
977 | 1.47k | ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); |
978 | 1.47k | ForEachCallArg( |
979 | 4.51k | [&](ArgIdxTy I, const Expr *E, SVal V) { |
980 | 4.51k | if (PropDstArgs.contains(I)) { |
981 | 1.52k | LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); |
982 | 1.52k | llvm::dbgs() |
983 | 1.52k | << "> prepares tainting arg index: " << I << '\n';); |
984 | 1.52k | Result = F.add(Result, I); |
985 | 1.52k | } |
986 | | |
987 | | // Taint property gets lost if the variable is passed as a |
988 | | // non-const pointer or reference to a function which is |
989 | | // not inlined. For matching rules we want to preserve the taintedness. |
990 | | // TODO: We should traverse all reachable memory regions via the |
991 | | // escaping parameter. Instead of doing that we simply mark only the |
992 | | // referred memory region as tainted. |
993 | 4.51k | if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, V)974 ) { |
994 | 157 | LLVM_DEBUG(if (!Result.contains(I)) { |
995 | 157 | llvm::dbgs() << "PreCall<"; |
996 | 157 | Call.dump(llvm::dbgs()); |
997 | 157 | llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; |
998 | 157 | }); |
999 | 157 | Result = F.add(Result, I); |
1000 | 157 | } |
1001 | 4.51k | }); |
1002 | | |
1003 | 1.47k | if (!Result.isEmpty()) |
1004 | 1.15k | State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); |
1005 | 1.47k | const NoteTag *InjectionTag = taintOriginTrackerTag( |
1006 | 1.47k | C, std::move(TaintedSymbols), std::move(TaintedIndexes), |
1007 | 1.47k | Call.getCalleeStackFrame(0)); |
1008 | 1.47k | C.addTransition(State, InjectionTag); |
1009 | 1.47k | } |
1010 | | |
1011 | 26 | bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { |
1012 | 26 | return !C.getAnalysisManager() |
1013 | 26 | .getAnalyzerOptions() |
1014 | 26 | .ShouldAssumeControlledEnvironment; |
1015 | 26 | } |
1016 | | |
1017 | | bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, |
1018 | 346 | CheckerContext &C) const { |
1019 | 346 | assert(E); |
1020 | 346 | std::optional<SVal> TaintedSVal = |
1021 | 346 | getTaintedPointeeOrPointer(C.getState(), C.getSVal(E)); |
1022 | | |
1023 | 346 | if (!TaintedSVal) |
1024 | 31 | return false; |
1025 | | |
1026 | | // Generate diagnostic. |
1027 | 315 | if (ExplodedNode *N = C.generateNonFatalErrorNode()) { |
1028 | 315 | auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); |
1029 | 315 | report->addRange(E->getSourceRange()); |
1030 | 387 | for (auto TaintedSym : getTaintedSymbols(C.getState(), *TaintedSVal)) { |
1031 | 387 | report->markInteresting(TaintedSym); |
1032 | 387 | } |
1033 | | |
1034 | 315 | C.emitReport(std::move(report)); |
1035 | 315 | return true; |
1036 | 315 | } |
1037 | 0 | return false; |
1038 | 315 | } |
1039 | | |
1040 | | /// TODO: remove checking for printf format attributes and socket whitelisting |
1041 | | /// from GenericTaintChecker, and that means the following functions: |
1042 | | /// getPrintfFormatArgumentNum, |
1043 | | /// GenericTaintChecker::checkUncontrolledFormatString, |
1044 | | /// GenericTaintChecker::taintUnsafeSocketProtocol |
1045 | | |
1046 | | static bool getPrintfFormatArgumentNum(const CallEvent &Call, |
1047 | | const CheckerContext &C, |
1048 | 2.72k | ArgIdxTy &ArgNum) { |
1049 | | // Find if the function contains a format string argument. |
1050 | | // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, |
1051 | | // vsnprintf, syslog, custom annotated functions. |
1052 | 2.72k | const Decl *CallDecl = Call.getDecl(); |
1053 | 2.72k | if (!CallDecl) |
1054 | 6 | return false; |
1055 | 2.72k | const FunctionDecl *FDecl = CallDecl->getAsFunction(); |
1056 | 2.72k | if (!FDecl) |
1057 | 1 | return false; |
1058 | | |
1059 | 2.71k | const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); |
1060 | | |
1061 | 2.71k | for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { |
1062 | 417 | ArgNum = Format->getFormatIdx() - 1; |
1063 | 417 | if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum46 ) |
1064 | 46 | return true; |
1065 | 417 | } |
1066 | | |
1067 | 2.67k | return false; |
1068 | 2.71k | } |
1069 | | |
1070 | | bool GenericTaintChecker::checkUncontrolledFormatString( |
1071 | 2.72k | const CallEvent &Call, CheckerContext &C) const { |
1072 | | // Check if the function contains a format string argument. |
1073 | 2.72k | ArgIdxTy ArgNum = 0; |
1074 | 2.72k | if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) |
1075 | 2.68k | return false; |
1076 | | |
1077 | | // If either the format string content or the pointer itself are tainted, |
1078 | | // warn. |
1079 | 46 | return generateReportIfTainted(Call.getArgExpr(ArgNum), |
1080 | 46 | MsgUncontrolledFormatString, C); |
1081 | 2.72k | } |
1082 | | |
1083 | | void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, |
1084 | 2.72k | CheckerContext &C) const { |
1085 | 2.72k | if (Call.getNumArgs() < 1) |
1086 | 121 | return; |
1087 | 2.60k | const IdentifierInfo *ID = Call.getCalleeIdentifier(); |
1088 | 2.60k | if (!ID) |
1089 | 2 | return; |
1090 | 2.60k | if (!ID->getName().equals("socket")) |
1091 | 2.56k | return; |
1092 | | |
1093 | 35 | SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); |
1094 | 35 | StringRef DomName = C.getMacroNameOrSpelling(DomLoc); |
1095 | | // Allow internal communication protocols. |
1096 | 35 | bool SafeProtocol = DomName.equals("AF_SYSTEM") || |
1097 | 35 | DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") || |
1098 | 35 | DomName.equals("AF_RESERVED_36")30 ; |
1099 | 35 | if (SafeProtocol) |
1100 | 5 | return; |
1101 | | |
1102 | 30 | ProgramStateRef State = C.getState(); |
1103 | 30 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
1104 | 30 | ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex); |
1105 | 30 | State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); |
1106 | 30 | C.addTransition(State); |
1107 | 30 | } |
1108 | | |
1109 | | /// Checker registration |
1110 | 26 | void ento::registerGenericTaintChecker(CheckerManager &Mgr) { |
1111 | 26 | Mgr.registerChecker<GenericTaintChecker>(); |
1112 | 26 | } |
1113 | | |
1114 | 52 | bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { |
1115 | 52 | return true; |
1116 | 52 | } |