Coverage Report

Created: 2023-09-21 18:56

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Line
Count
Source (jump to first uncovered line)
1
//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This checker defines the attack surface for generic taint propagation.
10
//
11
// The taint information produced by it might be useful to other checkers. For
12
// example, checkers should report errors which involve tainted data more
13
// aggressively, even if the involved symbols are under constrained.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "Yaml.h"
18
#include "clang/AST/Attr.h"
19
#include "clang/Basic/Builtins.h"
20
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21
#include "clang/StaticAnalyzer/Checkers/Taint.h"
22
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23
#include "clang/StaticAnalyzer/Core/Checker.h"
24
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
25
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
26
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
27
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
28
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
29
#include "llvm/ADT/StringExtras.h"
30
#include "llvm/Support/YAMLTraits.h"
31
32
#include <limits>
33
#include <memory>
34
#include <optional>
35
#include <utility>
36
#include <vector>
37
38
#define DEBUG_TYPE "taint-checker"
39
40
using namespace clang;
41
using namespace ento;
42
using namespace taint;
43
44
using llvm::ImmutableSet;
45
46
namespace {
47
48
class GenericTaintChecker;
49
50
/// Check for CWE-134: Uncontrolled Format String.
51
constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52
    "Untrusted data is used as a format string "
53
    "(CWE-134: Uncontrolled Format String)";
54
55
/// Check for:
56
/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
57
/// CWE-78, "Failure to Sanitize Data into an OS Command"
58
constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59
    "Untrusted data is passed to a system call "
60
    "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
61
62
/// Check if tainted data is used as a buffer size in strn.. functions,
63
/// and allocators.
64
constexpr llvm::StringLiteral MsgTaintedBufferSize =
65
    "Untrusted data is used to specify the buffer size "
66
    "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
67
    "for character data and the null terminator)";
68
69
/// Check if tainted data is used as a custom sink's parameter.
70
constexpr llvm::StringLiteral MsgCustomSink =
71
    "Untrusted data is passed to a user-defined sink";
72
73
using ArgIdxTy = int;
74
using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
75
76
/// Denotes the return value.
77
constexpr ArgIdxTy ReturnValueIndex{-1};
78
79
4.31k
static ArgIdxTy fromArgumentCount(unsigned Count) {
80
4.31k
  assert(Count <=
81
4.31k
             static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) &&
82
4.31k
         "ArgIdxTy is not large enough to represent the number of arguments.");
83
4.31k
  return Count;
84
4.31k
}
85
86
/// Check if the region the expression evaluates to is the standard input,
87
/// and thus, is tainted.
88
/// FIXME: Move this to Taint.cpp.
89
6.02k
bool isStdin(SVal Val, const ASTContext &ACtx) {
90
  // FIXME: What if Val is NonParamVarRegion?
91
92
  // The region should be symbolic, we do not know it's value.
93
6.02k
  const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
94
6.02k
  if (!SymReg)
95
5.35k
    return false;
96
97
  // Get it's symbol and find the declaration region it's pointing to.
98
670
  const auto *DeclReg =
99
670
      dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
100
670
  if (!DeclReg)
101
185
    return false;
102
103
  // This region corresponds to a declaration, find out if it's a global/extern
104
  // variable named stdin with the proper type.
105
485
  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
106
485
    D = D->getCanonicalDecl();
107
485
    if (D->getName() == "stdin" && 
D->hasExternalStorage()66
&&
D->isExternC()66
) {
108
66
      const QualType FILETy = ACtx.getFILEType().getCanonicalType();
109
66
      const QualType Ty = D->getType().getCanonicalType();
110
111
66
      if (Ty->isPointerType())
112
66
        return Ty->getPointeeType() == FILETy;
113
66
    }
114
485
  }
115
419
  return false;
116
485
}
117
118
4.84k
SVal getPointeeOf(ProgramStateRef State, Loc LValue) {
119
4.84k
  const QualType ArgTy = LValue.getType(State->getStateManager().getContext());
120
4.84k
  if (!ArgTy->isPointerType() || 
!ArgTy->getPointeeType()->isVoidType()4.75k
)
121
4.80k
    return State->getSVal(LValue);
122
123
  // Do not dereference void pointers. Treat them as byte pointers instead.
124
  // FIXME: we might want to consider more than just the first byte.
125
36
  return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
126
4.84k
}
127
128
/// Given a pointer/reference argument, return the value it refers to.
129
7.82k
std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) {
130
7.82k
  if (auto LValue = Arg.getAs<Loc>())
131
4.84k
    return getPointeeOf(State, *LValue);
132
2.98k
  return std::nullopt;
133
7.82k
}
134
135
/// Given a pointer, return the SVal of its pointee or if it is tainted,
136
/// otherwise return the pointer's SVal if tainted.
137
/// Also considers stdin as a taint source.
138
std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State,
139
6.92k
                                               SVal Arg) {
140
6.92k
  if (auto Pointee = getPointeeOf(State, Arg))
141
3.94k
    if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
142
1.24k
      return Pointee;
143
144
5.67k
  if (isTainted(State, Arg))
145
524
    return Arg;
146
5.15k
  return std::nullopt;
147
5.67k
}
148
149
582
bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) {
150
582
  return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
151
582
}
152
153
/// Helps in printing taint diagnostics.
154
/// Marks the incoming parameters of a function interesting (to be printed)
155
/// when the return value, or the outgoing parameters are tainted.
156
const NoteTag *taintOriginTrackerTag(CheckerContext &C,
157
                                     std::vector<SymbolRef> TaintedSymbols,
158
                                     std::vector<ArgIdxTy> TaintedArgs,
159
1.47k
                                     const LocationContext *CallLocation) {
160
1.47k
  return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
161
1.47k
                       TaintedArgs = std::move(TaintedArgs), CallLocation](
162
1.98k
                          PathSensitiveBugReport &BR) -> std::string {
163
1.98k
    SmallString<256> Msg;
164
    // We give diagnostics only for taint related reports
165
1.98k
    if (!BR.isInteresting(CallLocation) ||
166
1.98k
        
BR.getBugType().getCategory() != categories::TaintedData309
) {
167
1.67k
      return "";
168
1.67k
    }
169
309
    if (TaintedSymbols.empty())
170
239
      return "Taint originated here";
171
172
77
    
for (auto Sym : TaintedSymbols)70
{
173
77
      BR.markInteresting(Sym);
174
77
    }
175
70
    LLVM_DEBUG(for (auto Arg
176
70
                    : TaintedArgs) {
177
70
      llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n";
178
70
    });
179
70
    return "";
180
309
  });
181
1.47k
}
182
183
/// Helps in printing taint diagnostics.
184
/// Marks the function interesting (to be printed)
185
/// when the return value, or the outgoing parameters are tainted.
186
const NoteTag *taintPropagationExplainerTag(
187
    CheckerContext &C, std::vector<SymbolRef> TaintedSymbols,
188
1.18k
    std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) {
189
1.18k
  assert(TaintedSymbols.size() == TaintedArgs.size());
190
1.18k
  return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
191
1.18k
                       TaintedArgs = std::move(TaintedArgs), CallLocation](
192
1.85k
                          PathSensitiveBugReport &BR) -> std::string {
193
1.85k
    SmallString<256> Msg;
194
1.85k
    llvm::raw_svector_ostream Out(Msg);
195
    // We give diagnostics only for taint related reports
196
1.85k
    if (TaintedSymbols.empty() ||
197
1.85k
        BR.getBugType().getCategory() != categories::TaintedData) {
198
1.40k
      return "";
199
1.40k
    }
200
444
    int nofTaintedArgs = 0;
201
654
    for (auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
202
654
      if (BR.isInteresting(Sym)) {
203
313
        BR.markInteresting(CallLocation);
204
313
        if (TaintedArgs[Idx] != ReturnValueIndex) {
205
236
          LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument "
206
236
                                  << TaintedArgs[Idx] + 1 << "\n");
207
236
          if (nofTaintedArgs == 0)
208
235
            Out << "Taint propagated to the ";
209
1
          else
210
1
            Out << ", ";
211
236
          Out << TaintedArgs[Idx] + 1
212
236
              << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) << " argument";
213
236
          nofTaintedArgs++;
214
236
        } else {
215
77
          LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n");
216
77
          Out << "Taint propagated to the return value";
217
77
        }
218
313
      }
219
654
    }
220
444
    return std::string(Out.str());
221
1.85k
  });
222
1.18k
}
223
224
/// ArgSet is used to describe arguments relevant for taint detection or
225
/// taint application. A discrete set of argument indexes and a variadic
226
/// argument list signified by a starting index are supported.
227
class ArgSet {
228
public:
229
7.85k
  ArgSet() = default;
230
  ArgSet(ArgVecTy &&DiscreteArgs,
231
         std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
232
5.94k
      : DiscreteArgs(std::move(DiscreteArgs)),
233
5.94k
        VariadicIndex(std::move(VariadicIndex)) {}
234
235
16.7k
  bool contains(ArgIdxTy ArgIdx) const {
236
16.7k
    if (llvm::is_contained(DiscreteArgs, ArgIdx))
237
2.51k
      return true;
238
239
14.2k
    return VariadicIndex && 
ArgIdx >= *VariadicIndex1.55k
;
240
16.7k
  }
241
242
1.59k
  bool isEmpty() const { return DiscreteArgs.empty() && 
!VariadicIndex835
; }
243
244
private:
245
  ArgVecTy DiscreteArgs;
246
  std::optional<ArgIdxTy> VariadicIndex;
247
};
248
249
/// A struct used to specify taint propagation rules for a function.
250
///
251
/// If any of the possible taint source arguments is tainted, all of the
252
/// destination arguments should also be tainted. If ReturnValueIndex is added
253
/// to the dst list, the return value will be tainted.
254
class GenericTaintRule {
255
  /// Arguments which are taints sinks and should be checked, and a report
256
  /// should be emitted if taint reaches these.
257
  ArgSet SinkArgs;
258
  /// Arguments which should be sanitized on function return.
259
  ArgSet FilterArgs;
260
  /// Arguments which can participate in taint propagation. If any of the
261
  /// arguments in PropSrcArgs is tainted, all arguments in  PropDstArgs should
262
  /// be tainted.
263
  ArgSet PropSrcArgs;
264
  ArgSet PropDstArgs;
265
266
  /// A message that explains why the call is sensitive to taint.
267
  std::optional<StringRef> SinkMsg;
268
269
  GenericTaintRule() = default;
270
271
  GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
272
                   std::optional<StringRef> SinkMsg = std::nullopt)
273
3.45k
      : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
274
3.45k
        PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
275
3.45k
        SinkMsg(SinkMsg) {}
276
277
public:
278
  /// Make a rule that reports a warning if taint reaches any of \p FilterArgs
279
  /// arguments.
280
  static GenericTaintRule Sink(ArgSet &&SinkArgs,
281
450
                               std::optional<StringRef> Msg = std::nullopt) {
282
450
    return {std::move(SinkArgs), {}, {}, {}, Msg};
283
450
  }
284
285
  /// Make a rule that sanitizes all FilterArgs arguments.
286
12
  static GenericTaintRule Filter(ArgSet &&FilterArgs) {
287
12
    return {{}, std::move(FilterArgs), {}, {}};
288
12
  }
289
290
  /// Make a rule that unconditionally taints all Args.
291
  /// If Func is provided, it must also return true for taint to propagate.
292
623
  static GenericTaintRule Source(ArgSet &&SourceArgs) {
293
623
    return {{}, {}, {}, std::move(SourceArgs)};
294
623
  }
295
296
  /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
297
2.23k
  static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
298
2.23k
    return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
299
2.23k
  }
300
301
  /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
302
  static GenericTaintRule
303
  SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
304
130
           std::optional<StringRef> Msg = std::nullopt) {
305
130
    return {
306
130
        std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
307
130
  }
308
309
  /// Process a function which could either be a taint source, a taint sink, a
310
  /// taint filter or a taint propagator.
311
  void process(const GenericTaintChecker &Checker, const CallEvent &Call,
312
               CheckerContext &C) const;
313
314
  /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s.
315
19.5k
  static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) {
316
19.5k
    return ArgIdx == ReturnValueIndex ? 
Call.getOriginExpr()6.25k
317
19.5k
                                      : 
Call.getArgExpr(ArgIdx)13.3k
;
318
19.5k
  };
319
320
  /// Functions for custom taintedness propagation.
321
  static bool UntrustedEnv(CheckerContext &C);
322
};
323
324
using RuleLookupTy = CallDescriptionMap<GenericTaintRule>;
325
326
/// Used to parse the configuration file.
327
struct TaintConfiguration {
328
  using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
329
  enum class VariadicType { None, Src, Dst };
330
331
  struct Common {
332
    std::string Name;
333
    std::string Scope;
334
  };
335
336
  struct Sink : Common {
337
    ArgVecTy SinkArgs;
338
  };
339
340
  struct Filter : Common {
341
    ArgVecTy FilterArgs;
342
  };
343
344
  struct Propagation : Common {
345
    ArgVecTy SrcArgs;
346
    ArgVecTy DstArgs;
347
    VariadicType VarType;
348
    ArgIdxTy VarIndex;
349
  };
350
351
  std::vector<Propagation> Propagations;
352
  std::vector<Filter> Filters;
353
  std::vector<Sink> Sinks;
354
355
5
  TaintConfiguration() = default;
356
  TaintConfiguration(const TaintConfiguration &) = default;
357
4
  TaintConfiguration(TaintConfiguration &&) = default;
358
  TaintConfiguration &operator=(const TaintConfiguration &) = default;
359
  TaintConfiguration &operator=(TaintConfiguration &&) = default;
360
};
361
362
struct GenericTaintRuleParser {
363
26
  GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {}
364
  /// Container type used to gather call identification objects grouped into
365
  /// pairs with their corresponding taint rules. It is temporary as it is used
366
  /// to finally initialize RuleLookupTy, which is considered to be immutable.
367
  using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
368
  RulesContTy parseConfiguration(const std::string &Option,
369
                                 TaintConfiguration &&Config) const;
370
371
private:
372
  using NamePartsTy = llvm::SmallVector<StringRef, 2>;
373
374
  /// Validate part of the configuration, which contains a list of argument
375
  /// indexes.
376
  void validateArgVector(const std::string &Option, const ArgVecTy &Args) const;
377
378
  template <typename Config> static NamePartsTy parseNameParts(const Config &C);
379
380
  // Takes the config and creates a CallDescription for it and associates a Rule
381
  // with that.
382
  template <typename Config>
383
  static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule,
384
                                     RulesContTy &Rules);
385
386
  void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P,
387
                   RulesContTy &Rules) const;
388
  void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P,
389
                   RulesContTy &Rules) const;
390
  void parseConfig(const std::string &Option,
391
                   TaintConfiguration::Propagation &&P,
392
                   RulesContTy &Rules) const;
393
394
  CheckerManager &Mgr;
395
};
396
397
class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
398
public:
399
  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
400
  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
401
402
  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
403
                  const char *Sep) const override;
404
405
  /// Generate a report if the expression is tainted or points to tainted data.
406
  bool generateReportIfTainted(const Expr *E, StringRef Msg,
407
                               CheckerContext &C) const;
408
409
private:
410
  const BugType BT{this, "Use of Untrusted Data", categories::TaintedData};
411
412
  bool checkUncontrolledFormatString(const CallEvent &Call,
413
                                     CheckerContext &C) const;
414
415
  void taintUnsafeSocketProtocol(const CallEvent &Call,
416
                                 CheckerContext &C) const;
417
418
  /// Default taint rules are initalized with the help of a CheckerContext to
419
  /// access the names of built-in functions like memcpy.
420
  void initTaintRules(CheckerContext &C) const;
421
422
  /// CallDescription currently cannot restrict matches to the global namespace
423
  /// only, which is why multiple CallDescriptionMaps are used, as we want to
424
  /// disambiguate global C functions from functions inside user-defined
425
  /// namespaces.
426
  // TODO: Remove separation to simplify matching logic once CallDescriptions
427
  // are more expressive.
428
429
  mutable std::optional<RuleLookupTy> StaticTaintRules;
430
  mutable std::optional<RuleLookupTy> DynamicTaintRules;
431
};
432
} // end of anonymous namespace
433
434
/// YAML serialization mapping.
435
LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
436
LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
437
LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
438
439
namespace llvm {
440
namespace yaml {
441
template <> struct MappingTraits<TaintConfiguration> {
442
5
  static void mapping(IO &IO, TaintConfiguration &Config) {
443
5
    IO.mapOptional("Propagations", Config.Propagations);
444
5
    IO.mapOptional("Filters", Config.Filters);
445
5
    IO.mapOptional("Sinks", Config.Sinks);
446
5
  }
447
};
448
449
template <> struct MappingTraits<TaintConfiguration::Sink> {
450
9
  static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
451
9
    IO.mapRequired("Name", Sink.Name);
452
9
    IO.mapOptional("Scope", Sink.Scope);
453
9
    IO.mapRequired("Args", Sink.SinkArgs);
454
9
  }
455
};
456
457
template <> struct MappingTraits<TaintConfiguration::Filter> {
458
12
  static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
459
12
    IO.mapRequired("Name", Filter.Name);
460
12
    IO.mapOptional("Scope", Filter.Scope);
461
12
    IO.mapRequired("Args", Filter.FilterArgs);
462
12
  }
463
};
464
465
template <> struct MappingTraits<TaintConfiguration::Propagation> {
466
26
  static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
467
26
    IO.mapRequired("Name", Propagation.Name);
468
26
    IO.mapOptional("Scope", Propagation.Scope);
469
26
    IO.mapOptional("SrcArgs", Propagation.SrcArgs);
470
26
    IO.mapOptional("DstArgs", Propagation.DstArgs);
471
26
    IO.mapOptional("VariadicType", Propagation.VarType);
472
26
    IO.mapOptional("VariadicIndex", Propagation.VarIndex);
473
26
  }
474
};
475
476
template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
477
9
  static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) {
478
9
    IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None);
479
9
    IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src);
480
9
    IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst);
481
9
  }
482
};
483
} // namespace yaml
484
} // namespace llvm
485
486
/// A set which is used to pass information from call pre-visit instruction
487
/// to the call post-visit. The values are signed integers, which are either
488
/// ReturnValueIndex, or indexes of the pointer/reference argument, which
489
/// points to data, which should be tainted on return.
490
REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,
491
                               ImmutableSet<ArgIdxTy>)
492
REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)
493
494
void GenericTaintRuleParser::validateArgVector(const std::string &Option,
495
71
                                               const ArgVecTy &Args) const {
496
71
  for (ArgIdxTy Arg : Args) {
497
52
    if (Arg < ReturnValueIndex) {
498
1
      Mgr.reportInvalidCheckerOptionValue(
499
1
          Mgr.getChecker<GenericTaintChecker>(), Option,
500
1
          "an argument number for propagation rules greater or equal to -1");
501
1
    }
502
52
  }
503
71
}
504
505
template <typename Config>
506
GenericTaintRuleParser::NamePartsTy
507
46
GenericTaintRuleParser::parseNameParts(const Config &C) {
508
46
  NamePartsTy NameParts;
509
46
  if (!C.Scope.empty()) {
510
    // If the Scope argument contains multiple "::" parts, those are considered
511
    // namespace identifiers.
512
21
    StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1,
513
21
                             /*KeepEmpty*/ false);
514
21
  }
515
46
  NameParts.emplace_back(C.Name);
516
46
  return NameParts;
517
46
}
GenericTaintChecker.cpp:llvm::SmallVector<llvm::StringRef, 2u> (anonymous namespace)::GenericTaintRuleParser::parseNameParts<(anonymous namespace)::TaintConfiguration::Filter>((anonymous namespace)::TaintConfiguration::Filter const&)
Line
Count
Source
507
12
GenericTaintRuleParser::parseNameParts(const Config &C) {
508
12
  NamePartsTy NameParts;
509
12
  if (!C.Scope.empty()) {
510
    // If the Scope argument contains multiple "::" parts, those are considered
511
    // namespace identifiers.
512
6
    StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1,
513
6
                             /*KeepEmpty*/ false);
514
6
  }
515
12
  NameParts.emplace_back(C.Name);
516
12
  return NameParts;
517
12
}
GenericTaintChecker.cpp:llvm::SmallVector<llvm::StringRef, 2u> (anonymous namespace)::GenericTaintRuleParser::parseNameParts<(anonymous namespace)::TaintConfiguration::Sink>((anonymous namespace)::TaintConfiguration::Sink const&)
Line
Count
Source
507
9
GenericTaintRuleParser::parseNameParts(const Config &C) {
508
9
  NamePartsTy NameParts;
509
9
  if (!C.Scope.empty()) {
510
    // If the Scope argument contains multiple "::" parts, those are considered
511
    // namespace identifiers.
512
6
    StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1,
513
6
                             /*KeepEmpty*/ false);
514
6
  }
515
9
  NameParts.emplace_back(C.Name);
516
9
  return NameParts;
517
9
}
GenericTaintChecker.cpp:llvm::SmallVector<llvm::StringRef, 2u> (anonymous namespace)::GenericTaintRuleParser::parseNameParts<(anonymous namespace)::TaintConfiguration::Propagation>((anonymous namespace)::TaintConfiguration::Propagation const&)
Line
Count
Source
507
25
GenericTaintRuleParser::parseNameParts(const Config &C) {
508
25
  NamePartsTy NameParts;
509
25
  if (!C.Scope.empty()) {
510
    // If the Scope argument contains multiple "::" parts, those are considered
511
    // namespace identifiers.
512
9
    StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1,
513
9
                             /*KeepEmpty*/ false);
514
9
  }
515
25
  NameParts.emplace_back(C.Name);
516
25
  return NameParts;
517
25
}
518
519
template <typename Config>
520
void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C,
521
                                                    GenericTaintRule &&Rule,
522
46
                                                    RulesContTy &Rules) {
523
46
  NamePartsTy NameParts = parseNameParts(C);
524
46
  Rules.emplace_back(CallDescription(NameParts), std::move(Rule));
525
46
}
GenericTaintChecker.cpp:void (anonymous namespace)::GenericTaintRuleParser::consumeRulesFromConfig<(anonymous namespace)::TaintConfiguration::Filter>((anonymous namespace)::TaintConfiguration::Filter const&, (anonymous namespace)::GenericTaintRule&&, std::__1::vector<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule>, std::__1::allocator<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule> > >&)
Line
Count
Source
522
12
                                                    RulesContTy &Rules) {
523
12
  NamePartsTy NameParts = parseNameParts(C);
524
12
  Rules.emplace_back(CallDescription(NameParts), std::move(Rule));
525
12
}
GenericTaintChecker.cpp:void (anonymous namespace)::GenericTaintRuleParser::consumeRulesFromConfig<(anonymous namespace)::TaintConfiguration::Sink>((anonymous namespace)::TaintConfiguration::Sink const&, (anonymous namespace)::GenericTaintRule&&, std::__1::vector<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule>, std::__1::allocator<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule> > >&)
Line
Count
Source
522
9
                                                    RulesContTy &Rules) {
523
9
  NamePartsTy NameParts = parseNameParts(C);
524
9
  Rules.emplace_back(CallDescription(NameParts), std::move(Rule));
525
9
}
GenericTaintChecker.cpp:void (anonymous namespace)::GenericTaintRuleParser::consumeRulesFromConfig<(anonymous namespace)::TaintConfiguration::Propagation>((anonymous namespace)::TaintConfiguration::Propagation const&, (anonymous namespace)::GenericTaintRule&&, std::__1::vector<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule>, std::__1::allocator<std::__1::pair<clang::ento::CallDescription, (anonymous namespace)::GenericTaintRule> > >&)
Line
Count
Source
522
25
                                                    RulesContTy &Rules) {
523
25
  NamePartsTy NameParts = parseNameParts(C);
524
25
  Rules.emplace_back(CallDescription(NameParts), std::move(Rule));
525
25
}
526
527
void GenericTaintRuleParser::parseConfig(const std::string &Option,
528
                                         TaintConfiguration::Sink &&S,
529
9
                                         RulesContTy &Rules) const {
530
9
  validateArgVector(Option, S.SinkArgs);
531
9
  consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
532
9
                         Rules);
533
9
}
534
535
void GenericTaintRuleParser::parseConfig(const std::string &Option,
536
                                         TaintConfiguration::Filter &&S,
537
12
                                         RulesContTy &Rules) const {
538
12
  validateArgVector(Option, S.FilterArgs);
539
12
  consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
540
12
                         Rules);
541
12
}
542
543
void GenericTaintRuleParser::parseConfig(const std::string &Option,
544
                                         TaintConfiguration::Propagation &&P,
545
25
                                         RulesContTy &Rules) const {
546
25
  validateArgVector(Option, P.SrcArgs);
547
25
  validateArgVector(Option, P.DstArgs);
548
25
  bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
549
25
  bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
550
25
  std::optional<ArgIdxTy> JustVarIndex = P.VarIndex;
551
552
25
  ArgSet SrcDesc(std::move(P.SrcArgs),
553
25
                 IsSrcVariadic ? 
JustVarIndex3
:
std::nullopt22
);
554
25
  ArgSet DstDesc(std::move(P.DstArgs),
555
25
                 IsDstVariadic ? 
JustVarIndex6
:
std::nullopt19
);
556
557
25
  consumeRulesFromConfig(
558
25
      P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
559
25
}
560
561
GenericTaintRuleParser::RulesContTy
562
GenericTaintRuleParser::parseConfiguration(const std::string &Option,
563
4
                                           TaintConfiguration &&Config) const {
564
565
4
  RulesContTy Rules;
566
567
4
  for (auto &F : Config.Filters)
568
12
    parseConfig(Option, std::move(F), Rules);
569
570
4
  for (auto &S : Config.Sinks)
571
9
    parseConfig(Option, std::move(S), Rules);
572
573
4
  for (auto &P : Config.Propagations)
574
25
    parseConfig(Option, std::move(P), Rules);
575
576
4
  return Rules;
577
4
}
578
579
2.72k
void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
580
  // Check for exact name match for functions without builtin substitutes.
581
  // Use qualified name, because these are C functions without namespace.
582
583
2.72k
  if (StaticTaintRules || 
DynamicTaintRules26
)
584
2.70k
    return;
585
586
26
  using RulesConstructionTy =
587
26
      std::vector<std::pair<CallDescription, GenericTaintRule>>;
588
26
  using TR = GenericTaintRule;
589
590
26
  const Builtin::Context &BI = C.getASTContext().BuiltinInfo;
591
592
26
  RulesConstructionTy GlobalCRules{
593
      // Sources
594
26
      {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})},
595
26
      {{{"fopen"}}, TR::Source({{ReturnValueIndex}})},
596
26
      {{{"freopen"}}, TR::Source({{ReturnValueIndex}})},
597
26
      {{{"getch"}}, TR::Source({{ReturnValueIndex}})},
598
26
      {{{"getchar"}}, TR::Source({{ReturnValueIndex}})},
599
26
      {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
600
26
      {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})},
601
26
      {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})},
602
26
      {{{"scanf"}}, TR::Source({{}, 1})},
603
26
      {{{"scanf_s"}}, TR::Source({{}, {1}})},
604
26
      {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})},
605
      // Sometimes the line between taint sources and propagators is blurry.
606
      // _IO_getc is choosen to be a source, but could also be a propagator.
607
      // This way it is simpler, as modeling it as a propagator would require
608
      // to model the possible sources of _IO_FILE * values, which the _IO_getc
609
      // function takes as parameters.
610
26
      {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
611
26
      {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
612
26
      {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
613
26
      {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
614
26
      {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
615
26
      {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})},
616
26
      {{{"gethostname"}}, TR::Source({{0}})},
617
26
      {{{"getnameinfo"}}, TR::Source({{2, 4}})},
618
26
      {{{"getseuserbyname"}}, TR::Source({{1, 2}})},
619
26
      {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
620
26
      {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})},
621
26
      {{{"getlogin_r"}}, TR::Source({{0}})},
622
623
      // Props
624
26
      {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
625
26
      {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
626
26
      {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
627
26
      {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
628
26
      {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
629
26
      {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
630
26
      {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
631
26
      {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
632
26
      {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
633
26
      {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
634
26
      {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
635
636
26
      {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
637
26
      {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
638
26
      {{{"getdelim"}}, TR::Prop({{3}}, {{0}})},
639
26
      {{{"getline"}}, TR::Prop({{2}}, {{0}})},
640
26
      {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
641
26
      {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
642
26
      {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
643
26
      {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644
26
      {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
645
26
      {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646
26
      {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
647
26
      {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
648
26
      {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
649
26
      {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
650
651
26
      {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
652
26
      {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
653
654
26
      {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655
26
      {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
656
26
      {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
657
26
      {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
658
26
      {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
659
26
      {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
660
661
26
      {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
662
26
      {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
663
26
      {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
664
665
26
      {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
666
26
      {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
667
26
      {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
668
      // If memmem was called with a tainted needle and the search was
669
      // successful, that would mean that the value pointed by the return value
670
      // has the same content as the needle. If we choose to go by the policy of
671
      // content equivalence implies taintedness equivalence, that would mean
672
      // haystack should be considered a propagation source argument.
673
26
      {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
674
675
      // The comment for memmem above also applies to strstr.
676
26
      {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
677
26
      {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
678
679
26
      {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
680
681
26
      {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
682
26
      {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
683
684
      // FIXME: In case of arrays, only the first element of the array gets
685
      // tainted.
686
26
      {{{"qsort"}}, TR::Prop({{0}}, {{0}})},
687
26
      {{{"qsort_r"}}, TR::Prop({{0}}, {{0}})},
688
689
26
      {{{"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
690
26
      {{{"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
691
26
      {{{"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
692
26
      {{{"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
693
26
      {{{"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
694
26
      {{{"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
695
26
      {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
696
26
      {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
697
26
      {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
698
699
      // strlen, wcslen, strnlen and alike intentionally don't propagate taint.
700
      // See the details here: https://github.com/llvm/llvm-project/pull/66086
701
702
26
      {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
703
26
      {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
704
26
      {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
705
26
      {{{"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
706
707
26
      {{{"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
708
26
      {{{"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
709
26
      {{{"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710
26
      {{{"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
711
26
      {{{"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712
26
      {{{"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
713
26
      {{{"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
714
26
      {{{"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
715
26
      {{{"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716
26
      {{{"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
717
26
      {{{"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718
26
      {{{"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
719
26
      {{{"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
720
721
26
      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}},
722
26
       TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
723
26
      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}},
724
26
       TR::Prop({{1, 2}}, {{0}})},
725
26
      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}},
726
26
       TR::Prop({{1, 2}}, {{0}})},
727
26
      {{CDF_MaybeBuiltin, {{"snprintf"}}},
728
26
       TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
729
26
      {{CDF_MaybeBuiltin, {{"sprintf"}}},
730
26
       TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
731
26
      {{CDF_MaybeBuiltin, {{"strcpy"}}},
732
26
       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
733
26
      {{CDF_MaybeBuiltin, {{"stpcpy"}}},
734
26
       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
735
26
      {{CDF_MaybeBuiltin, {{"strcat"}}},
736
26
       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
737
26
      {{CDF_MaybeBuiltin, {{"wcsncat"}}},
738
26
       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
739
26
      {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
740
26
      {{CDF_MaybeBuiltin, {{"strdupa"}}},
741
26
       TR::Prop({{0}}, {{ReturnValueIndex}})},
742
26
      {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
743
744
      // Sinks
745
26
      {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
746
26
      {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
747
26
      {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
748
26
      {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
749
26
      {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
750
26
      {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
751
26
      {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
752
26
      {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
753
26
      {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
754
26
      {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
755
26
      {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
756
26
      {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
757
26
      {{CDF_MaybeBuiltin, {{"memccpy"}}},
758
26
       TR::Sink({{3}}, MsgTaintedBufferSize)},
759
26
      {{CDF_MaybeBuiltin, {{"realloc"}}},
760
26
       TR::Sink({{1}}, MsgTaintedBufferSize)},
761
26
      {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
762
26
      {{{{"setproctitle_fast"}}},
763
26
       TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
764
765
      // SinkProps
766
26
      {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
767
26
       TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
768
26
                    MsgTaintedBufferSize)},
769
26
      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
770
26
       TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
771
26
                    MsgTaintedBufferSize)},
772
26
      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
773
26
       TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
774
26
                    MsgTaintedBufferSize)},
775
26
      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
776
26
       TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
777
26
                    MsgTaintedBufferSize)},
778
26
      {{CDF_MaybeBuiltin, {{"bcopy"}}},
779
26
       TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
780
781
  // `getenv` returns taint only in untrusted environments.
782
26
  if (TR::UntrustedEnv(C)) {
783
    // void setproctitle_init(int argc, char *argv[], char *envp[])
784
25
    GlobalCRules.push_back(
785
25
        {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
786
25
    GlobalCRules.push_back({{{"getenv"}}, TR::Source({{ReturnValueIndex}})});
787
25
  }
788
789
26
  StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
790
26
                           std::make_move_iterator(GlobalCRules.end()));
791
792
  // User-provided taint configuration.
793
26
  CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager();
794
26
  assert(Mgr);
795
26
  GenericTaintRuleParser ConfigParser{*Mgr};
796
26
  std::string Option{"Config"};
797
26
  StringRef ConfigFile =
798
26
      Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option);
799
26
  std::optional<TaintConfiguration> Config =
800
26
      getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile);
801
26
  if (!Config) {
802
    // We don't have external taint config, no parsing required.
803
22
    DynamicTaintRules = RuleLookupTy{};
804
22
    return;
805
22
  }
806
807
4
  GenericTaintRuleParser::RulesContTy Rules{
808
4
      ConfigParser.parseConfiguration(Option, std::move(*Config))};
809
810
4
  DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
811
4
                            std::make_move_iterator(Rules.end()));
812
4
}
813
814
void GenericTaintChecker::checkPreCall(const CallEvent &Call,
815
2.72k
                                       CheckerContext &C) const {
816
2.72k
  initTaintRules(C);
817
818
  // FIXME: this should be much simpler.
819
2.72k
  if (const auto *Rule =
820
2.72k
          Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr)
821
1.54k
    Rule->process(*this, Call, C);
822
1.18k
  else if (const auto *Rule = DynamicTaintRules->lookup(Call))
823
50
    Rule->process(*this, Call, C);
824
825
  // FIXME: These edge cases are to be eliminated from here eventually.
826
  //
827
  // Additional check that is not supported by CallDescription.
828
  // TODO: Make CallDescription be able to match attributes such as printf-like
829
  // arguments.
830
2.72k
  checkUncontrolledFormatString(Call, C);
831
832
  // TODO: Modeling sockets should be done in a specific checker.
833
  // Socket is a source, which taints the return value.
834
2.72k
  taintUnsafeSocketProtocol(Call, C);
835
2.72k
}
836
837
void GenericTaintChecker::checkPostCall(const CallEvent &Call,
838
2.73k
                                        CheckerContext &C) const {
839
  // Set the marked values as tainted. The return value only accessible from
840
  // checkPostStmt.
841
2.73k
  ProgramStateRef State = C.getState();
842
2.73k
  const StackFrameContext *CurrentFrame = C.getStackFrame();
843
844
  // Depending on what was tainted at pre-visit, we determined a set of
845
  // arguments which should be tainted after the function returns. These are
846
  // stored in the state as TaintArgsOnPostVisit set.
847
2.73k
  TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
848
849
2.73k
  const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
850
2.73k
  if (!TaintArgs)
851
1.55k
    return;
852
1.18k
  assert(!TaintArgs->isEmpty());
853
854
1.18k
  LLVM_DEBUG(for (ArgIdxTy I
855
1.18k
                  : *TaintArgs) {
856
1.18k
    llvm::dbgs() << "PostCall<";
857
1.18k
    Call.dump(llvm::dbgs());
858
1.18k
    llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
859
1.18k
  });
860
861
1.18k
  const NoteTag *InjectionTag = nullptr;
862
1.18k
  std::vector<SymbolRef> TaintedSymbols;
863
1.18k
  std::vector<ArgIdxTy> TaintedIndexes;
864
1.63k
  for (ArgIdxTy ArgNum : *TaintArgs) {
865
    // Special handling for the tainted return value.
866
1.63k
    if (ArgNum == ReturnValueIndex) {
867
740
      State = addTaint(State, Call.getReturnValue());
868
740
      std::vector<SymbolRef> TaintedSyms =
869
740
          getTaintedSymbols(State, Call.getReturnValue());
870
740
      if (!TaintedSyms.empty()) {
871
739
        TaintedSymbols.push_back(TaintedSyms[0]);
872
739
        TaintedIndexes.push_back(ArgNum);
873
739
      }
874
740
      continue;
875
740
    }
876
    // The arguments are pointer arguments. The data they are pointing at is
877
    // tainted after the call.
878
898
    if (auto V = getPointeeOf(State, Call.getArgSVal(ArgNum))) {
879
888
      State = addTaint(State, *V);
880
888
      std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, *V);
881
888
      if (!TaintedSyms.empty()) {
882
787
        TaintedSymbols.push_back(TaintedSyms[0]);
883
787
        TaintedIndexes.push_back(ArgNum);
884
787
      }
885
888
    }
886
898
  }
887
  // Create a NoteTag callback, which prints to the user where the taintedness
888
  // was propagated to.
889
1.18k
  InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes,
890
1.18k
                                              Call.getCalleeStackFrame(0));
891
  // Clear up the taint info from the state.
892
1.18k
  State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
893
1.18k
  C.addTransition(State, InjectionTag);
894
1.18k
}
895
896
void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
897
2
                                     const char *NL, const char *Sep) const {
898
2
  printTaint(State, Out, NL, Sep);
899
2
}
900
901
void GenericTaintRule::process(const GenericTaintChecker &Checker,
902
1.59k
                               const CallEvent &Call, CheckerContext &C) const {
903
1.59k
  ProgramStateRef State = C.getState();
904
1.59k
  const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
905
906
  /// Iterate every call argument, and get their corresponding Expr and SVal.
907
6.25k
  const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
908
25.8k
    for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; 
++I19.5k
) {
909
19.5k
      const Expr *E = GetArgExpr(I, Call);
910
19.5k
      Fun(I, E, C.getSVal(E));
911
19.5k
    }
912
6.25k
  };
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_1>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_1&&) const
Line
Count
Source
907
1.59k
  const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
908
6.61k
    for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; 
++I5.02k
) {
909
5.02k
      const Expr *E = GetArgExpr(I, Call);
910
5.02k
      Fun(I, E, C.getSVal(E));
911
5.02k
    }
912
1.59k
  };
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_2>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_2&&) const
Line
Count
Source
907
1.59k
  const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
908
6.61k
    for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; 
++I5.02k
) {
909
5.02k
      const Expr *E = GetArgExpr(I, Call);
910
5.02k
      Fun(I, E, C.getSVal(E));
911
5.02k
    }
912
1.59k
  };
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_3>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_3&&) const
Line
Count
Source
907
1.59k
  const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
908
6.61k
    for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; 
++I5.02k
) {
909
5.02k
      const Expr *E = GetArgExpr(I, Call);
910
5.02k
      Fun(I, E, C.getSVal(E));
911
5.02k
    }
912
1.59k
  };
GenericTaintChecker.cpp:auto (anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_0::operator()<(anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_4>((anonymous namespace)::GenericTaintRule::process((anonymous namespace)::GenericTaintChecker const&, clang::ento::CallEvent const&, clang::ento::CheckerContext&) const::$_4&&) const
Line
Count
Source
907
1.47k
  const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
908
5.99k
    for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; 
++I4.51k
) {
909
4.51k
      const Expr *E = GetArgExpr(I, Call);
910
4.51k
      Fun(I, E, C.getSVal(E));
911
4.51k
    }
912
1.47k
  };
913
914
  /// Check for taint sinks.
915
5.02k
  ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
916
    // Add taintedness to stdin parameters
917
5.02k
    if (isStdin(C.getSVal(E), C.getASTContext())) {
918
33
      State = addTaint(State, C.getSVal(E));
919
33
    }
920
5.02k
    if (SinkArgs.contains(I) && 
isTaintedOrPointsToTainted(State, C.getSVal(E))582
)
921
300
      Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
922
5.02k
  });
923
924
  /// Check for taint filters.
925
5.02k
  ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) {
926
5.02k
    if (FilterArgs.contains(I)) {
927
7
      State = removeTaint(State, S);
928
7
      if (auto P = getPointeeOf(State, S))
929
7
        State = removeTaint(State, *P);
930
7
    }
931
5.02k
  });
932
933
  /// Check for taint propagation sources.
934
  /// A rule will make the destination variables tainted if PropSrcArgs
935
  /// is empty (taints the destination
936
  /// arguments unconditionally), or if any of its signified
937
  /// args are tainted in context of the current CallEvent.
938
1.59k
  bool IsMatching = PropSrcArgs.isEmpty();
939
1.59k
  std::vector<SymbolRef> TaintedSymbols;
940
1.59k
  std::vector<ArgIdxTy> TaintedIndexes;
941
1.59k
  ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols,
942
5.02k
                  &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) {
943
5.02k
    std::optional<SVal> TaintedSVal =
944
5.02k
        getTaintedPointeeOrPointer(State, C.getSVal(E));
945
5.02k
    IsMatching =
946
5.02k
        IsMatching || 
(2.16k
PropSrcArgs.contains(I)2.16k
&&
TaintedSVal.has_value()953
);
947
948
    // We track back tainted arguments except for stdin
949
5.02k
    if (TaintedSVal && 
!isStdin(*TaintedSVal, C.getASTContext())999
) {
950
966
      std::vector<SymbolRef> TaintedArgSyms =
951
966
          getTaintedSymbols(State, *TaintedSVal);
952
966
      if (!TaintedArgSyms.empty()) {
953
966
        llvm::append_range(TaintedSymbols, TaintedArgSyms);
954
966
        TaintedIndexes.push_back(I);
955
966
      }
956
966
    }
957
5.02k
  });
958
959
  // Early return for propagation rules which dont match.
960
  // Matching propagations, Sinks and Filters will pass this point.
961
1.59k
  if (!IsMatching)
962
119
    return;
963
964
4.51k
  
const auto WouldEscape = [](SVal V, QualType Ty) -> bool 1.47k
{
965
4.51k
    if (!isa<Loc>(V))
966
2.32k
      return false;
967
968
2.19k
    const bool IsNonConstRef = Ty->isReferenceType() && 
!Ty.isConstQualified()0
;
969
2.19k
    const bool IsNonConstPtr =
970
2.19k
        Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
971
972
2.19k
    return IsNonConstRef || IsNonConstPtr;
973
4.51k
  };
974
975
  /// Propagate taint where it is necessary.
976
1.47k
  auto &F = State->getStateManager().get_context<ArgIdxFactory>();
977
1.47k
  ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
978
1.47k
  ForEachCallArg(
979
4.51k
      [&](ArgIdxTy I, const Expr *E, SVal V) {
980
4.51k
        if (PropDstArgs.contains(I)) {
981
1.52k
          LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
982
1.52k
                     llvm::dbgs()
983
1.52k
                     << "> prepares tainting arg index: " << I << '\n';);
984
1.52k
          Result = F.add(Result, I);
985
1.52k
        }
986
987
        // Taint property gets lost if the variable is passed as a
988
        // non-const pointer or reference to a function which is
989
        // not inlined. For matching rules we want to preserve the taintedness.
990
        // TODO: We should traverse all reachable memory regions via the
991
        // escaping parameter. Instead of doing that we simply mark only the
992
        // referred memory region as tainted.
993
4.51k
        if (WouldEscape(V, E->getType()) && 
getTaintedPointeeOrPointer(State, V)974
) {
994
157
          LLVM_DEBUG(if (!Result.contains(I)) {
995
157
            llvm::dbgs() << "PreCall<";
996
157
            Call.dump(llvm::dbgs());
997
157
            llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';
998
157
          });
999
157
          Result = F.add(Result, I);
1000
157
        }
1001
4.51k
      });
1002
1003
1.47k
  if (!Result.isEmpty())
1004
1.15k
    State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
1005
1.47k
  const NoteTag *InjectionTag = taintOriginTrackerTag(
1006
1.47k
      C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1007
1.47k
      Call.getCalleeStackFrame(0));
1008
1.47k
  C.addTransition(State, InjectionTag);
1009
1.47k
}
1010
1011
26
bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
1012
26
  return !C.getAnalysisManager()
1013
26
              .getAnalyzerOptions()
1014
26
              .ShouldAssumeControlledEnvironment;
1015
26
}
1016
1017
bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
1018
346
                                                  CheckerContext &C) const {
1019
346
  assert(E);
1020
346
  std::optional<SVal> TaintedSVal =
1021
346
      getTaintedPointeeOrPointer(C.getState(), C.getSVal(E));
1022
1023
346
  if (!TaintedSVal)
1024
31
    return false;
1025
1026
  // Generate diagnostic.
1027
315
  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
1028
315
    auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
1029
315
    report->addRange(E->getSourceRange());
1030
387
    for (auto TaintedSym : getTaintedSymbols(C.getState(), *TaintedSVal)) {
1031
387
      report->markInteresting(TaintedSym);
1032
387
    }
1033
1034
315
    C.emitReport(std::move(report));
1035
315
    return true;
1036
315
  }
1037
0
  return false;
1038
315
}
1039
1040
/// TODO: remove checking for printf format attributes and socket whitelisting
1041
/// from GenericTaintChecker, and that means the following functions:
1042
/// getPrintfFormatArgumentNum,
1043
/// GenericTaintChecker::checkUncontrolledFormatString,
1044
/// GenericTaintChecker::taintUnsafeSocketProtocol
1045
1046
static bool getPrintfFormatArgumentNum(const CallEvent &Call,
1047
                                       const CheckerContext &C,
1048
2.72k
                                       ArgIdxTy &ArgNum) {
1049
  // Find if the function contains a format string argument.
1050
  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
1051
  // vsnprintf, syslog, custom annotated functions.
1052
2.72k
  const Decl *CallDecl = Call.getDecl();
1053
2.72k
  if (!CallDecl)
1054
6
    return false;
1055
2.72k
  const FunctionDecl *FDecl = CallDecl->getAsFunction();
1056
2.72k
  if (!FDecl)
1057
1
    return false;
1058
1059
2.71k
  const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
1060
1061
2.71k
  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
1062
417
    ArgNum = Format->getFormatIdx() - 1;
1063
417
    if ((Format->getType()->getName() == "printf") && 
CallNumArgs > ArgNum46
)
1064
46
      return true;
1065
417
  }
1066
1067
2.67k
  return false;
1068
2.71k
}
1069
1070
bool GenericTaintChecker::checkUncontrolledFormatString(
1071
2.72k
    const CallEvent &Call, CheckerContext &C) const {
1072
  // Check if the function contains a format string argument.
1073
2.72k
  ArgIdxTy ArgNum = 0;
1074
2.72k
  if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
1075
2.68k
    return false;
1076
1077
  // If either the format string content or the pointer itself are tainted,
1078
  // warn.
1079
46
  return generateReportIfTainted(Call.getArgExpr(ArgNum),
1080
46
                                 MsgUncontrolledFormatString, C);
1081
2.72k
}
1082
1083
void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,
1084
2.72k
                                                    CheckerContext &C) const {
1085
2.72k
  if (Call.getNumArgs() < 1)
1086
121
    return;
1087
2.60k
  const IdentifierInfo *ID = Call.getCalleeIdentifier();
1088
2.60k
  if (!ID)
1089
2
    return;
1090
2.60k
  if (!ID->getName().equals("socket"))
1091
2.56k
    return;
1092
1093
35
  SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
1094
35
  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
1095
  // Allow internal communication protocols.
1096
35
  bool SafeProtocol = DomName.equals("AF_SYSTEM") ||
1097
35
                      DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") ||
1098
35
                      
DomName.equals("AF_RESERVED_36")30
;
1099
35
  if (SafeProtocol)
1100
5
    return;
1101
1102
30
  ProgramStateRef State = C.getState();
1103
30
  auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1104
30
  ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);
1105
30
  State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
1106
30
  C.addTransition(State);
1107
30
}
1108
1109
/// Checker registration
1110
26
void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
1111
26
  Mgr.registerChecker<GenericTaintChecker>();
1112
26
}
1113
1114
52
bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
1115
52
  return true;
1116
52
}