Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Line
Count
Source (jump to first uncovered line)
1
//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This checker defines the attack surface for generic taint propagation.
10
//
11
// The taint information produced by it might be useful to other checkers. For
12
// example, checkers should report errors which involve tainted data more
13
// aggressively, even if the involved symbols are under constrained.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "Taint.h"
18
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19
#include "clang/AST/Attr.h"
20
#include "clang/Basic/Builtins.h"
21
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
22
#include "clang/StaticAnalyzer/Core/Checker.h"
23
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
24
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
26
#include <climits>
27
#include <initializer_list>
28
#include <utility>
29
30
using namespace clang;
31
using namespace ento;
32
using namespace taint;
33
34
namespace {
35
class GenericTaintChecker
36
    : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
37
public:
38
0
  static void *getTag() {
39
0
    static int Tag;
40
0
    return &Tag;
41
0
  }
42
43
  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
44
45
  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
46
47
  void printState(raw_ostream &Out, ProgramStateRef State,
48
                  const char *NL, const char *Sep) const override;
49
50
private:
51
  static const unsigned InvalidArgIndex = UINT_MAX;
52
  /// Denotes the return vale.
53
  static const unsigned ReturnValueIndex = UINT_MAX - 1;
54
55
  mutable std::unique_ptr<BugType> BT;
56
47
  void initBugType() const {
57
47
    if (!BT)
58
3
      BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
59
47
  }
60
61
  /// Catch taint related bugs. Check if tainted data is passed to a
62
  /// system call etc.
63
  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
64
65
  /// Add taint sources on a pre-visit.
66
  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
67
68
  /// Propagate taint generated at pre-visit.
69
  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
70
71
  /// Check if the region the expression evaluates to is the standard input,
72
  /// and thus, is tainted.
73
  static bool isStdin(const Expr *E, CheckerContext &C);
74
75
  /// Given a pointer argument, return the value it points to.
76
  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
77
78
  /// Check for CWE-134: Uncontrolled Format String.
79
  static const char MsgUncontrolledFormatString[];
80
  bool checkUncontrolledFormatString(const CallExpr *CE,
81
                                     CheckerContext &C) const;
82
83
  /// Check for:
84
  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
85
  /// CWE-78, "Failure to Sanitize Data into an OS Command"
86
  static const char MsgSanitizeSystemArgs[];
87
  bool checkSystemCall(const CallExpr *CE, StringRef Name,
88
                       CheckerContext &C) const;
89
90
  /// Check if tainted data is used as a buffer size ins strn.. functions,
91
  /// and allocators.
92
  static const char MsgTaintedBufferSize[];
93
  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
94
                              CheckerContext &C) const;
95
96
  /// Generate a report if the expression is tainted or points to tainted data.
97
  bool generateReportIfTainted(const Expr *E, const char Msg[],
98
                               CheckerContext &C) const;
99
100
  using ArgVector = SmallVector<unsigned, 2>;
101
102
  /// A struct used to specify taint propagation rules for a function.
103
  ///
104
  /// If any of the possible taint source arguments is tainted, all of the
105
  /// destination arguments should also be tainted. Use InvalidArgIndex in the
106
  /// src list to specify that all of the arguments can introduce taint. Use
107
  /// InvalidArgIndex in the dst arguments to signify that all the non-const
108
  /// pointer and reference arguments might be tainted on return. If
109
  /// ReturnValueIndex is added to the dst list, the return value will be
110
  /// tainted.
111
  struct TaintPropagationRule {
112
    enum class VariadicType { None, Src, Dst };
113
114
    using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
115
                                         CheckerContext &C);
116
117
    /// List of arguments which can be taint sources and should be checked.
118
    ArgVector SrcArgs;
119
    /// List of arguments which should be tainted on function return.
120
    ArgVector DstArgs;
121
    /// Index for the first variadic parameter if exist.
122
    unsigned VariadicIndex;
123
    /// Show when a function has variadic parameters. If it has, it marks all
124
    /// of them as source or destination.
125
    VariadicType VarType;
126
    /// Special function for tainted source determination. If defined, it can
127
    /// override the default behavior.
128
    PropagationFuncType PropagationFunc;
129
130
    TaintPropagationRule()
131
        : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
132
1.29k
          PropagationFunc(nullptr) {}
133
134
    TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135
                         std::initializer_list<unsigned> &&Dst,
136
                         VariadicType Var = VariadicType::None,
137
                         unsigned VarIndex = InvalidArgIndex,
138
                         PropagationFuncType Func = nullptr)
139
        : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
140
21.6k
          VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
141
142
    /// Get the propagation rule for a given function.
143
    static TaintPropagationRule
144
    getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
145
                            CheckerContext &C);
146
147
0
    void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
148
0
    void addDstArg(unsigned A) { DstArgs.push_back(A); }
149
150
2.07k
    bool isNull() const {
151
2.07k
      return SrcArgs.empty() && 
DstArgs.empty()1.90k
&&
152
2.07k
             
VariadicType::None == VarType1.86k
;
153
2.07k
    }
154
155
0
    bool isDestinationArgument(unsigned ArgNum) const {
156
0
      return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
157
0
    }
158
159
    static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
160
169
                                           CheckerContext &C) {
161
169
      if (isTainted(State, E, C.getLocationContext()) || 
isStdin(E, C)150
)
162
38
        return true;
163
131
164
131
      if (!E->getType().getTypePtr()->isPointerType())
165
45
        return false;
166
86
167
86
      Optional<SVal> V = getPointedToSVal(C, E);
168
86
      return (V && isTainted(State, *V));
169
86
    }
170
171
    /// Pre-process a function which propagates taint according to the
172
    /// taint rule.
173
    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
174
175
    // Functions for custom taintedness propagation.
176
    static bool postSocket(bool IsTainted, const CallExpr *CE,
177
                           CheckerContext &C);
178
  };
179
};
180
181
const unsigned GenericTaintChecker::ReturnValueIndex;
182
const unsigned GenericTaintChecker::InvalidArgIndex;
183
184
const char GenericTaintChecker::MsgUncontrolledFormatString[] =
185
    "Untrusted data is used as a format string "
186
    "(CWE-134: Uncontrolled Format String)";
187
188
const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
189
    "Untrusted data is passed to a system call "
190
    "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
191
192
const char GenericTaintChecker::MsgTaintedBufferSize[] =
193
    "Untrusted data is used to specify the buffer size "
194
    "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
195
    "for character data and the null terminator)";
196
197
} // end of anonymous namespace
198
199
/// A set which is used to pass information from call pre-visit instruction
200
/// to the call post-visit. The values are unsigned integers, which are either
201
/// ReturnValueIndex, or indexes of the pointer/reference argument, which
202
/// points to data, which should be tainted on return.
203
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
204
205
GenericTaintChecker::TaintPropagationRule
206
GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
207
744
    const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
208
744
  // TODO: Currently, we might lose precision here: we always mark a return
209
744
  // value as tainted even if it's just a pointer, pointing to tainted data.
210
744
211
744
  // Check for exact name match for functions without builtin substitutes.
212
744
  TaintPropagationRule Rule =
213
744
      llvm::StringSwitch<TaintPropagationRule>(Name)
214
744
          // Source functions
215
744
          // TODO: Add support for vfscanf & family.
216
744
          .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
217
744
          .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
218
744
          .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
219
744
          .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
220
744
          .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
221
744
          .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
222
744
          .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
223
744
          .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224
744
          .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
225
744
          .Case("socket",
226
744
                TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
227
744
                                     InvalidArgIndex,
228
744
                                     &TaintPropagationRule::postSocket))
229
744
          .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
230
744
          // Propagating functions
231
744
          .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
232
744
          .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
233
744
          .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
234
744
          .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
235
744
          .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
236
744
          .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
237
744
          .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
238
744
          .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
239
744
          .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
240
744
          .Case("getdelim", TaintPropagationRule({3}, {0}))
241
744
          .Case("getline", TaintPropagationRule({2}, {0}))
242
744
          .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
243
744
          .Case("pread",
244
744
                TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
245
744
          .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
246
744
          .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
247
744
          .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
248
744
          .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
249
744
          .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
250
744
          .Default(TaintPropagationRule());
251
744
252
744
  if (!Rule.isNull())
253
112
    return Rule;
254
632
255
632
  // Check if it's one of the memory setting/copying functions.
256
632
  // This check is specialized but faster then calling isCLibraryFunction.
257
632
  unsigned BId = 0;
258
632
  if ((BId = FDecl->getMemoryFunctionKind()))
259
247
    switch (BId) {
260
247
    case Builtin::BImemcpy:
261
45
    case Builtin::BImemmove:
262
45
    case Builtin::BIstrncpy:
263
45
    case Builtin::BIstrncat:
264
45
      return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
265
45
    case Builtin::BIstrlcpy:
266
0
    case Builtin::BIstrlcat:
267
0
      return TaintPropagationRule({1, 2}, {0});
268
2
    case Builtin::BIstrndup:
269
2
      return TaintPropagationRule({0, 1}, {ReturnValueIndex});
270
0
271
200
    default:
272
200
      break;
273
585
    };
274
585
275
585
  // Process all other functions which could be defined as builtins.
276
585
  if (Rule.isNull()) {
277
585
    if (C.isCLibraryFunction(FDecl, "snprintf"))
278
5
      return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
279
5
                                  3);
280
580
    else if (C.isCLibraryFunction(FDecl, "sprintf"))
281
2
      return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
282
2
                                  2);
283
578
    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
284
578
             
C.isCLibraryFunction(FDecl, "stpcpy")568
||
285
578
             
C.isCLibraryFunction(FDecl, "strcat")559
)
286
31
      return TaintPropagationRule({1}, {0, ReturnValueIndex});
287
547
    else if (C.isCLibraryFunction(FDecl, "bcopy"))
288
0
      return TaintPropagationRule({0, 2}, {1});
289
547
    else if (C.isCLibraryFunction(FDecl, "strdup") ||
290
547
             
C.isCLibraryFunction(FDecl, "strdupa")546
)
291
1
      return TaintPropagationRule({0}, {ReturnValueIndex});
292
546
    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
293
0
      return TaintPropagationRule({0}, {ReturnValueIndex});
294
546
  }
295
546
296
546
  // Skipping the following functions, since they might be used for cleansing
297
546
  // or smart memory copy:
298
546
  // - memccpy - copying until hitting a special character.
299
546
300
546
  return TaintPropagationRule();
301
546
}
302
303
void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
304
802
                                       CheckerContext &C) const {
305
802
  // Check for taintedness related errors first: system call, uncontrolled
306
802
  // format string, tainted buffer size.
307
802
  if (checkPre(CE, C))
308
47
    return;
309
755
310
755
  // Marks the function's arguments and/or return value tainted if it present in
311
755
  // the list.
312
755
  addSourcesPre(CE, C);
313
755
}
314
315
void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
316
765
                                        CheckerContext &C) const {
317
765
  // Set the marked values as tainted. The return value only accessible from
318
765
  // checkPostStmt.
319
765
  propagateFromPre(CE, C);
320
765
}
321
322
void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
323
2
                                     const char *NL, const char *Sep) const {
324
2
  printTaint(State, Out, NL, Sep);
325
2
}
326
327
void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
328
755
                                        CheckerContext &C) const {
329
755
  ProgramStateRef State = nullptr;
330
755
  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
331
755
  if (!FDecl || 
FDecl->getKind() != Decl::Function754
)
332
11
    return;
333
744
334
744
  StringRef Name = C.getCalleeName(FDecl);
335
744
  if (Name.empty())
336
0
    return;
337
744
338
744
  // First, try generating a propagation rule for this function.
339
744
  TaintPropagationRule Rule =
340
744
      TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
341
744
  if (!Rule.isNull()) {
342
198
    State = Rule.process(CE, C);
343
198
    if (!State)
344
0
      return;
345
198
    C.addTransition(State);
346
198
    return;
347
198
  }
348
546
349
546
  if (!State)
350
546
    return;
351
0
  C.addTransition(State);
352
0
}
353
354
bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
355
765
                                           CheckerContext &C) const {
356
765
  ProgramStateRef State = C.getState();
357
765
358
765
  // Depending on what was tainted at pre-visit, we determined a set of
359
765
  // arguments which should be tainted after the function returns. These are
360
765
  // stored in the state as TaintArgsOnPostVisit set.
361
765
  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
362
765
  if (TaintArgs.isEmpty())
363
639
    return false;
364
126
365
167
  
for (unsigned ArgNum : TaintArgs)126
{
366
167
    // Special handling for the tainted return value.
367
167
    if (ArgNum == ReturnValueIndex) {
368
60
      State = addTaint(State, CE, C.getLocationContext());
369
60
      continue;
370
60
    }
371
107
372
107
    // The arguments are pointer arguments. The data they are pointing at is
373
107
    // tainted after the call.
374
107
    if (CE->getNumArgs() < (ArgNum + 1))
375
0
      return false;
376
107
    const Expr *Arg = CE->getArg(ArgNum);
377
107
    Optional<SVal> V = getPointedToSVal(C, Arg);
378
107
    if (V)
379
107
      State = addTaint(State, *V);
380
107
  }
381
126
382
126
  // Clear up the taint info from the state.
383
126
  State = State->remove<TaintArgsOnPostVisit>();
384
126
385
126
  if (State != C.getState()) {
386
126
    C.addTransition(State);
387
126
    return true;
388
126
  }
389
0
  return false;
390
0
}
391
392
bool GenericTaintChecker::checkPre(const CallExpr *CE,
393
802
                                   CheckerContext &C) const {
394
802
395
802
  if (checkUncontrolledFormatString(CE, C))
396
16
    return true;
397
786
398
786
  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
399
786
  if (!FDecl || 
FDecl->getKind() != Decl::Function785
)
400
11
    return false;
401
775
402
775
  StringRef Name = C.getCalleeName(FDecl);
403
775
  if (Name.empty())
404
0
    return false;
405
775
406
775
  if (checkSystemCall(CE, Name, C))
407
15
    return true;
408
760
409
760
  if (checkTaintedBufferSize(CE, FDecl, C))
410
16
    return true;
411
744
412
744
  return false;
413
744
}
414
415
Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
416
288
                                                     const Expr *Arg) {
417
288
  ProgramStateRef State = C.getState();
418
288
  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
419
288
  if (AddrVal.isUnknownOrUndef())
420
0
    return None;
421
288
422
288
  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
423
288
  if (!AddrLoc)
424
50
    return None;
425
238
426
238
  QualType ArgTy = Arg->getType().getCanonicalType();
427
238
  if (!ArgTy->isPointerType())
428
0
    return None;
429
238
430
238
  QualType ValTy = ArgTy->getPointeeType();
431
238
432
238
  // Do not dereference void pointers. Treat them as byte pointers instead.
433
238
  // FIXME: we might want to consider more than just the first byte.
434
238
  if (ValTy->isVoidType())
435
23
    ValTy = C.getASTContext().CharTy;
436
238
437
238
  return State->getSVal(*AddrLoc, ValTy);
438
238
}
439
440
ProgramStateRef
441
GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
442
198
                                                   CheckerContext &C) const {
443
198
  ProgramStateRef State = C.getState();
444
198
445
198
  // Check for taint in arguments.
446
198
  bool IsTainted = true;
447
198
  for (unsigned ArgNum : SrcArgs) {
448
169
    if (ArgNum >= CE->getNumArgs())
449
3
      return State;
450
166
    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
451
57
      break;
452
166
  }
453
198
454
198
  // Check for taint in variadic arguments.
455
198
  
if (195
!IsTainted195
&&
VariadicType::Src == VarType67
) {
456
3
    // Check if any of the arguments is tainted
457
4
    for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); 
++i1
) {
458
3
      if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
459
2
        break;
460
3
    }
461
3
  }
462
195
463
195
  if (PropagationFunc)
464
12
    IsTainted = PropagationFunc(IsTainted, CE, C);
465
195
466
195
  if (!IsTainted)
467
67
    return State;
468
128
469
128
  // Mark the arguments which should be tainted after the function returns.
470
128
  for (unsigned ArgNum : DstArgs) {
471
100
    // Should mark the return value?
472
100
    if (ArgNum == ReturnValueIndex) {
473
60
      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
474
60
      continue;
475
60
    }
476
40
477
40
    // Mark the given argument.
478
40
    assert(ArgNum < CE->getNumArgs());
479
40
    State = State->add<TaintArgsOnPostVisit>(ArgNum);
480
40
  }
481
128
482
128
  // Mark all variadic arguments tainted if present.
483
128
  if (VariadicType::Dst == VarType) {
484
64
    // For all pointer and references that were passed in:
485
64
    //   If they are not pointing to const data, mark data as tainted.
486
64
    //   TODO: So far we are just going one level down; ideally we'd need to
487
64
    //         recurse here.
488
133
    for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); 
++i69
) {
489
69
      const Expr *Arg = CE->getArg(i);
490
69
      // Process pointer argument.
491
69
      const Type *ArgTy = Arg->getType().getTypePtr();
492
69
      QualType PType = ArgTy->getPointeeType();
493
69
      if ((!PType.isNull() && 
!PType.isConstQualified()67
) ||
494
69
          
(2
ArgTy->isReferenceType()2
&&
!Arg->getType().isConstQualified()0
))
495
67
        State = State->add<TaintArgsOnPostVisit>(i);
496
69
    }
497
64
  }
498
128
499
128
  return State;
500
128
}
501
502
// If argument 0(protocol domain) is network, the return value should get taint.
503
bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
504
                                                           const CallExpr *CE,
505
12
                                                           CheckerContext &C) {
506
12
  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
507
12
  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
508
12
  // White list the internal communication protocols.
509
12
  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
510
12
      DomName.equals("AF_UNIX") || 
DomName.equals("AF_RESERVED_36")10
)
511
2
    return false;
512
10
513
10
  return true;
514
10
}
515
516
150
bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
517
150
  ProgramStateRef State = C.getState();
518
150
  SVal Val = C.getSVal(E);
519
150
520
150
  // stdin is a pointer, so it would be a region.
521
150
  const MemRegion *MemReg = Val.getAsRegion();
522
150
523
150
  // The region should be symbolic, we do not know it's value.
524
150
  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
525
150
  if (!SymReg)
526
93
    return false;
527
57
528
57
  // Get it's symbol and find the declaration region it's pointing to.
529
57
  const SymbolRegionValue *Sm =
530
57
      dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
531
57
  if (!Sm)
532
0
    return false;
533
57
  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
534
57
  if (!DeclReg)
535
0
    return false;
536
57
537
57
  // This region corresponds to a declaration, find out if it's a global/extern
538
57
  // variable named stdin with the proper type.
539
57
  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
540
57
    D = D->getCanonicalDecl();
541
57
    if ((D->getName().find("stdin") != StringRef::npos) && 
D->isExternC()19
) {
542
19
      const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
543
19
      if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
544
19
                       C.getASTContext().getFILEType().getCanonicalType())
545
19
        return true;
546
38
    }
547
57
  }
548
38
  return false;
549
38
}
550
551
static bool getPrintfFormatArgumentNum(const CallExpr *CE,
552
                                       const CheckerContext &C,
553
802
                                       unsigned int &ArgNum) {
554
802
  // Find if the function contains a format string argument.
555
802
  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
556
802
  // vsnprintf, syslog, custom annotated functions.
557
802
  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
558
802
  if (!FDecl)
559
1
    return false;
560
801
  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
561
82
    ArgNum = Format->getFormatIdx() - 1;
562
82
    if ((Format->getType()->getName() == "printf") && 
CE->getNumArgs() > ArgNum16
)
563
16
      return true;
564
82
  }
565
801
566
801
  // Or if a function is named setproctitle (this is a heuristic).
567
801
  
if (785
C.getCalleeName(CE).find("setproctitle") != StringRef::npos785
) {
568
12
    ArgNum = 0;
569
12
    return true;
570
12
  }
571
773
572
773
  return false;
573
773
}
574
575
bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
576
                                                  const char Msg[],
577
95
                                                  CheckerContext &C) const {
578
95
  assert(E);
579
95
580
95
  // Check for taint.
581
95
  ProgramStateRef State = C.getState();
582
95
  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
583
95
  SVal TaintedSVal;
584
95
  if (PointedToSVal && 
isTainted(State, *PointedToSVal)45
)
585
31
    TaintedSVal = *PointedToSVal;
586
64
  else if (isTainted(State, E, C.getLocationContext()))
587
16
    TaintedSVal = C.getSVal(E);
588
48
  else
589
48
    return false;
590
47
591
47
  // Generate diagnostic.
592
47
  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
593
47
    initBugType();
594
47
    auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
595
47
    report->addRange(E->getSourceRange());
596
47
    report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
597
47
    C.emitReport(std::move(report));
598
47
    return true;
599
47
  }
600
0
  return false;
601
0
}
602
603
bool GenericTaintChecker::checkUncontrolledFormatString(
604
802
    const CallExpr *CE, CheckerContext &C) const {
605
802
  // Check if the function contains a format string argument.
606
802
  unsigned int ArgNum = 0;
607
802
  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
608
774
    return false;
609
28
610
28
  // If either the format string content or the pointer itself are tainted,
611
28
  // warn.
612
28
  return generateReportIfTainted(CE->getArg(ArgNum),
613
28
                                 MsgUncontrolledFormatString, C);
614
28
}
615
616
bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
617
775
                                          CheckerContext &C) const {
618
775
  // TODO: It might make sense to run this check on demand. In some cases,
619
775
  // we should check if the environment has been cleansed here. We also might
620
775
  // need to know if the user was reset before these calls(seteuid).
621
775
  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
622
775
                        .Case("system", 0)
623
775
                        .Case("popen", 0)
624
775
                        .Case("execl", 0)
625
775
                        .Case("execle", 0)
626
775
                        .Case("execlp", 0)
627
775
                        .Case("execv", 0)
628
775
                        .Case("execvp", 0)
629
775
                        .Case("execvP", 0)
630
775
                        .Case("execve", 0)
631
775
                        .Case("dlopen", 0)
632
775
                        .Default(UINT_MAX);
633
775
634
775
  if (ArgNum == UINT_MAX || 
CE->getNumArgs() < (ArgNum + 1)18
)
635
758
    return false;
636
17
637
17
  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
638
17
}
639
640
// TODO: Should this check be a part of the CString checker?
641
// If yes, should taint be a global setting?
642
bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
643
                                                 const FunctionDecl *FDecl,
644
760
                                                 CheckerContext &C) const {
645
760
  // If the function has a buffer size argument, set ArgNum.
646
760
  unsigned ArgNum = InvalidArgIndex;
647
760
  unsigned BId = 0;
648
760
  if ((BId = FDecl->getMemoryFunctionKind()))
649
255
    switch (BId) {
650
255
    case Builtin::BImemcpy:
651
32
    case Builtin::BImemmove:
652
32
    case Builtin::BIstrncpy:
653
32
      ArgNum = 2;
654
32
      break;
655
32
    case Builtin::BIstrndup:
656
2
      ArgNum = 1;
657
2
      break;
658
221
    default:
659
221
      break;
660
760
    };
661
760
662
760
  if (ArgNum == InvalidArgIndex) {
663
726
    if (C.isCLibraryFunction(FDecl, "malloc") ||
664
726
        
C.isCLibraryFunction(FDecl, "calloc")712
||
665
726
        
C.isCLibraryFunction(FDecl, "alloca")710
)
666
16
      ArgNum = 0;
667
710
    else if (C.isCLibraryFunction(FDecl, "memccpy"))
668
1
      ArgNum = 3;
669
709
    else if (C.isCLibraryFunction(FDecl, "realloc"))
670
0
      ArgNum = 1;
671
709
    else if (C.isCLibraryFunction(FDecl, "bcopy"))
672
2
      ArgNum = 2;
673
726
  }
674
760
675
760
  return ArgNum != InvalidArgIndex && 
CE->getNumArgs() > ArgNum53
&&
676
760
         
generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)50
;
677
760
}
678
679
14
void ento::registerGenericTaintChecker(CheckerManager &mgr) {
680
14
  mgr.registerChecker<GenericTaintChecker>();
681
14
}
682
683
14
bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
684
14
  return true;
685
14
}