Coverage Report

Created: 2023-05-31 04:38

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10
#include "clang/AST/Decl.h"
11
#include "clang/AST/RecursiveASTVisitor.h"
12
#include "clang/ASTMatchers/ASTMatchFinder.h"
13
#include "clang/Lex/Lexer.h"
14
#include "clang/Lex/Preprocessor.h"
15
#include "llvm/ADT/SmallVector.h"
16
#include <memory>
17
#include <optional>
18
#include <sstream>
19
#include <queue>
20
21
using namespace llvm;
22
using namespace clang;
23
using namespace ast_matchers;
24
25
namespace clang::ast_matchers {
26
// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
27
// except for those belonging to a different callable of "n".
28
class MatchDescendantVisitor
29
    : public RecursiveASTVisitor<MatchDescendantVisitor> {
30
public:
31
  typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase;
32
33
  // Creates an AST visitor that matches `Matcher` on all
34
  // descendants of a given node "n" except for the ones
35
  // belonging to a different callable of "n".
36
  MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
37
                         internal::ASTMatchFinder *Finder,
38
                         internal::BoundNodesTreeBuilder *Builder,
39
                         internal::ASTMatchFinder::BindKind Bind,
40
                         const bool ignoreUnevaluatedContext)
41
      : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
42
484
        Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {}
43
44
  // Returns true if a match is found in a subtree of `DynNode`, which belongs
45
  // to the same callable of `DynNode`.
46
484
  bool findMatch(const DynTypedNode &DynNode) {
47
484
    Matches = false;
48
484
    if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
49
484
      TraverseStmt(const_cast<Stmt *>(StmtNode));
50
484
      *Builder = ResultBindings;
51
484
      return Matches;
52
484
    }
53
0
    return false;
54
484
  }
55
56
  // The following are overriding methods from the base visitor class.
57
  // They are public only to allow CRTP to work. They are *not *part
58
  // of the public API of this class.
59
60
  // For the matchers so far used in safe buffers, we only need to match
61
  // `Stmt`s.  To override more as needed.
62
63
1.22k
  bool TraverseDecl(Decl *Node) {
64
1.22k
    if (!Node)
65
0
      return true;
66
1.22k
    if (!match(*Node))
67
0
      return false;
68
    // To skip callables:
69
1.22k
    if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node))
70
23
      return true;
71
    // Traverse descendants
72
1.20k
    return VisitorBase::TraverseDecl(Node);
73
1.22k
  }
74
75
16
  bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) {
76
    // These are unevaluated, except the result expression.
77
16
    if(ignoreUnevaluatedContext)
78
8
      return TraverseStmt(Node->getResultExpr());
79
8
    return VisitorBase::TraverseGenericSelectionExpr(Node);
80
16
  }
81
82
16
  bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) {
83
    // Unevaluated context.
84
16
    if(ignoreUnevaluatedContext)
85
8
      return true;
86
8
    return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
87
16
  }
88
89
12
  bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) {
90
    // Unevaluated context.
91
12
    if(ignoreUnevaluatedContext)
92
6
      return true;
93
6
    return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
94
12
  }
95
96
10
  bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) {
97
    // Unevaluated context.
98
10
    if(ignoreUnevaluatedContext)
99
4
      return true;
100
6
    return VisitorBase::TraverseDecltypeTypeLoc(Node);
101
10
  }
102
103
4
  bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) {
104
    // Unevaluated context.
105
4
    if(ignoreUnevaluatedContext)
106
2
      return true;
107
2
    return VisitorBase::TraverseCXXNoexceptExpr(Node);
108
4
  }
109
110
14
  bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) {
111
    // Unevaluated context.
112
14
    if(ignoreUnevaluatedContext)
113
7
      return true;
114
7
    return VisitorBase::TraverseCXXTypeidExpr(Node);
115
14
  }
116
117
12.4k
  bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
118
12.4k
    if (!Node)
119
449
      return true;
120
11.9k
    if (!match(*Node))
121
0
      return false;
122
    // To skip callables:
123
11.9k
    if (isa<LambdaExpr>(Node))
124
12
      return true;
125
11.9k
    return VisitorBase::TraverseStmt(Node);
126
11.9k
  }
127
128
0
  bool shouldVisitTemplateInstantiations() const { return true; }
129
1.22k
  bool shouldVisitImplicitCode() const {
130
    // TODO: let's ignore implicit code for now
131
1.22k
    return false;
132
1.22k
  }
133
134
private:
135
  // Sets 'Matched' to true if 'Matcher' matches 'Node'
136
  //
137
  // Returns 'true' if traversal should continue after this function
138
  // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
139
13.1k
  template <typename T> bool match(const T &Node) {
140
13.1k
    internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
141
142
13.1k
    if (Matcher->matches(DynTypedNode::create(Node), Finder,
143
13.1k
                         &RecursiveBuilder)) {
144
2.05k
      ResultBindings.addMatch(RecursiveBuilder);
145
2.05k
      Matches = true;
146
2.05k
      if (Bind != internal::ASTMatchFinder::BK_All)
147
0
        return false; // Abort as soon as a match is found.
148
2.05k
    }
149
13.1k
    return true;
150
13.1k
  }
bool clang::ast_matchers::MatchDescendantVisitor::match<clang::Stmt>(clang::Stmt const&)
Line
Count
Source
139
11.9k
  template <typename T> bool match(const T &Node) {
140
11.9k
    internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
141
142
11.9k
    if (Matcher->matches(DynTypedNode::create(Node), Finder,
143
11.9k
                         &RecursiveBuilder)) {
144
2.05k
      ResultBindings.addMatch(RecursiveBuilder);
145
2.05k
      Matches = true;
146
2.05k
      if (Bind != internal::ASTMatchFinder::BK_All)
147
0
        return false; // Abort as soon as a match is found.
148
2.05k
    }
149
11.9k
    return true;
150
11.9k
  }
bool clang::ast_matchers::MatchDescendantVisitor::match<clang::Decl>(clang::Decl const&)
Line
Count
Source
139
1.22k
  template <typename T> bool match(const T &Node) {
140
1.22k
    internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
141
142
1.22k
    if (Matcher->matches(DynTypedNode::create(Node), Finder,
143
1.22k
                         &RecursiveBuilder)) {
144
0
      ResultBindings.addMatch(RecursiveBuilder);
145
0
      Matches = true;
146
0
      if (Bind != internal::ASTMatchFinder::BK_All)
147
0
        return false; // Abort as soon as a match is found.
148
0
    }
149
1.22k
    return true;
150
1.22k
  }
151
152
  const internal::DynTypedMatcher *const Matcher;
153
  internal::ASTMatchFinder *const Finder;
154
  internal::BoundNodesTreeBuilder *const Builder;
155
  internal::BoundNodesTreeBuilder ResultBindings;
156
  const internal::ASTMatchFinder::BindKind Bind;
157
  bool Matches;
158
  bool ignoreUnevaluatedContext;
159
};
160
161
// Because we're dealing with raw pointers, let's define what we mean by that.
162
5.97k
static auto hasPointerType() {
163
5.97k
    return hasType(hasCanonicalType(pointerType()));
164
5.97k
}
165
166
858
static auto hasArrayType() {
167
858
    return hasType(hasCanonicalType(arrayType()));
168
858
}
169
170
297
AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) {
171
297
  const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
172
173
297
  MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
174
297
  return Visitor.findMatch(DynTypedNode::create(Node));
175
297
}
176
177
187
AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) {
178
187
  const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
179
180
187
  MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
181
187
  return Visitor.findMatch(DynTypedNode::create(Node));
182
187
}
183
184
// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
185
AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
186
678
              Handler) {
187
678
  return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
188
678
}
189
190
2.48k
AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
191
2.48k
  return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
192
2.48k
}
193
194
// Matches a `UnaryOperator` whose operator is pre-increment:
195
37
AST_MATCHER(UnaryOperator, isPreInc) {
196
37
  return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc;
197
37
}  
198
199
// Returns a matcher that matches any expression 'e' such that `innerMatcher`
200
// matches 'e' and 'e' is in an Unspecified Lvalue Context.
201
561
static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
202
  // clang-format off
203
561
  return
204
561
    expr(anyOf(
205
561
      implicitCastExpr(
206
561
        hasCastKind(CastKind::CK_LValueToRValue),
207
561
        castSubExpr(innerMatcher)),
208
561
      binaryOperator(
209
561
        hasAnyOperatorName("="),
210
561
        hasLHS(innerMatcher)
211
561
      )
212
561
    ));
213
// clang-format off
214
561
}
215
216
217
// Returns a matcher that matches any expression `e` such that `InnerMatcher`
218
// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
219
static internal::Matcher<Stmt>
220
561
isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
221
  // A UPC can be
222
  // 1. an argument of a function call (except the callee has [[unsafe_...]]
223
  //    attribute), or
224
  // 2. the operand of a pointer-to-(integer or bool) cast operation; or
225
  // 3. the operand of a comparator operation; or
226
  // 4. the operand of a pointer subtraction operation
227
  //    (i.e., computing the distance between two pointers); or ...
228
229
561
  auto CallArgMatcher =
230
561
      callExpr(forEachArgumentWithParam(InnerMatcher,
231
561
                  hasPointerType() /* array also decays to pointer type*/),
232
561
          unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
233
234
561
  auto CastOperandMatcher =
235
561
      castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
236
561
         hasCastKind(CastKind::CK_PointerToBoolean)),
237
561
         castSubExpr(allOf(hasPointerType(), InnerMatcher)));
238
239
561
  auto CompOperandMatcher =
240
561
      binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
241
561
                     eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)),
242
561
                            hasRHS(allOf(hasPointerType(), InnerMatcher))));
243
244
  // A matcher that matches pointer subtractions:
245
561
  auto PtrSubtractionMatcher =
246
561
      binaryOperator(hasOperatorName("-"),
247
         // Note that here we need both LHS and RHS to be
248
         // pointer. Then the inner matcher can match any of
249
         // them:
250
561
         allOf(hasLHS(hasPointerType()),
251
561
         hasRHS(hasPointerType())),
252
561
         eachOf(hasLHS(InnerMatcher),
253
561
          hasRHS(InnerMatcher)));
254
255
561
  return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
256
561
        PtrSubtractionMatcher));
257
  // FIXME: any more cases? (UPC excludes the RHS of an assignment.  For now we
258
  // don't have to check that.)
259
561
}
260
261
// Returns a matcher that matches any expression 'e' such that `innerMatcher`
262
// matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
263
// 'e' isn't evaluated to an RValue). For example, consider the following code:
264
//    int *p = new int[4];
265
//    int *q = new int[4];
266
//    if ((p = q)) {}
267
//    p = q;
268
// The expression `p = q` in the conditional of the `if` statement
269
// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
270
// in the assignment statement is in an untyped context.
271
static internal::Matcher<Stmt>
272
187
isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
273
  // An unspecified context can be
274
  // 1. A compound statement,
275
  // 2. The body of an if statement
276
  // 3. Body of a loop
277
187
  auto CompStmt = compoundStmt(forEach(InnerMatcher));
278
187
  auto IfStmtThen = ifStmt(hasThen(InnerMatcher));
279
187
  auto IfStmtElse = ifStmt(hasElse(InnerMatcher));
280
  // FIXME: Handle loop bodies.
281
187
  return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
282
187
}
283
} // namespace clang::ast_matchers
284
285
namespace {
286
// Because the analysis revolves around variables and their types, we'll need to
287
// track uses of variables (aka DeclRefExprs).
288
using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
289
290
// Convenience typedef.
291
using FixItList = SmallVector<FixItHint, 4>;
292
293
// Defined below.
294
class Strategy;
295
} // namespace
296
297
namespace {
298
/// Gadget is an individual operation in the code that may be of interest to
299
/// this analysis. Each (non-abstract) subclass corresponds to a specific
300
/// rigid AST structure that constitutes an operation on a pointer-type object.
301
/// Discovery of a gadget in the code corresponds to claiming that we understand
302
/// what this part of code is doing well enough to potentially improve it.
303
/// Gadgets can be warning (immediately deserving a warning) or fixable (not
304
/// always deserving a warning per se, but requires our attention to identify
305
/// it warrants a fixit).
306
class Gadget {
307
public:
308
  enum class Kind {
309
#define GADGET(x) x,
310
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
311
  };
312
313
  /// Common type of ASTMatchers used for discovering gadgets.
314
  /// Useful for implementing the static matcher() methods
315
  /// that are expected from all non-abstract subclasses.
316
  using Matcher = decltype(stmt());
317
318
1.02k
  Gadget(Kind K) : K(K) {}
319
320
0
  Kind getKind() const { return K; }
321
322
  virtual bool isWarningGadget() const = 0;
323
  virtual const Stmt *getBaseStmt() const = 0;
324
325
  /// Returns the list of pointer-type variables on which this gadget performs
326
  /// its operation. Typically, there's only one variable. This isn't a list
327
  /// of all DeclRefExprs in the gadget's AST!
328
  virtual DeclUseList getClaimedVarUseSites() const = 0;
329
330
1.02k
  virtual ~Gadget() = default;
331
332
private:
333
  Kind K;
334
};
335
336
337
/// Warning gadgets correspond to unsafe code patterns that warrants
338
/// an immediate warning.
339
class WarningGadget : public Gadget {
340
public:
341
639
  WarningGadget(Kind K) : Gadget(K) {}
342
343
0
  static bool classof(const Gadget *G) { return G->isWarningGadget(); }
344
0
  bool isWarningGadget() const final { return true; }
345
};
346
347
/// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
348
/// properly recognized in order to emit fixes. For example, if a raw pointer-type
349
/// variable is replaced by a safe C++ container, every use of such variable must be
350
/// carefully considered and possibly updated.
351
class FixableGadget : public Gadget {
352
public:
353
384
  FixableGadget(Kind K) : Gadget(K) {}
354
355
0
  static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
356
0
  bool isWarningGadget() const final { return false; }
357
358
  /// Returns a fixit that would fix the current gadget according to
359
  /// the current strategy. Returns std::nullopt if the fix cannot be produced;
360
  /// returns an empty list if no fixes are necessary.
361
0
  virtual std::optional<FixItList> getFixits(const Strategy &) const {
362
0
    return std::nullopt;
363
0
  }
364
  
365
  /// Returns a list of two elements where the first element is the LHS of a pointer assignment
366
  /// statement and the second element is the RHS. This two-element list represents the fact that
367
  /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
368
  /// later to group all those variables whose types must be modified together to prevent type
369
  /// mismatches.
370
  virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
371
240
  getStrategyImplications() const {
372
240
    return std::nullopt;
373
240
  }
374
};
375
376
using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
377
using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
378
379
/// An increment of a pointer-type value is unsafe as it may run the pointer
380
/// out of bounds.
381
class IncrementGadget : public WarningGadget {
382
  static constexpr const char *const OpTag = "op";
383
  const UnaryOperator *Op;
384
385
public:
386
  IncrementGadget(const MatchFinder::MatchResult &Result)
387
      : WarningGadget(Kind::Increment),
388
34
        Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
389
390
0
  static bool classof(const Gadget *G) {
391
0
    return G->getKind() == Kind::Increment;
392
0
  }
393
394
297
  static Matcher matcher() {
395
297
    return stmt(unaryOperator(
396
297
      hasOperatorName("++"),
397
297
      hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
398
297
    ).bind(OpTag));
399
297
  }
400
401
34
  const UnaryOperator *getBaseStmt() const override { return Op; }
402
403
32
  DeclUseList getClaimedVarUseSites() const override {
404
32
    SmallVector<const DeclRefExpr *, 2> Uses;
405
32
    if (const auto *DRE =
406
32
            dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
407
31
      Uses.push_back(DRE);
408
31
    }
409
410
32
    return std::move(Uses);
411
32
  }
412
};
413
414
/// A decrement of a pointer-type value is unsafe as it may run the pointer
415
/// out of bounds.
416
class DecrementGadget : public WarningGadget {
417
  static constexpr const char *const OpTag = "op";
418
  const UnaryOperator *Op;
419
420
public:
421
  DecrementGadget(const MatchFinder::MatchResult &Result)
422
      : WarningGadget(Kind::Decrement),
423
4
        Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
424
425
0
  static bool classof(const Gadget *G) {
426
0
    return G->getKind() == Kind::Decrement;
427
0
  }
428
429
297
  static Matcher matcher() {
430
297
    return stmt(unaryOperator(
431
297
      hasOperatorName("--"),
432
297
      hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
433
297
    ).bind(OpTag));
434
297
  }
435
436
4
  const UnaryOperator *getBaseStmt() const override { return Op; }
437
438
2
  DeclUseList getClaimedVarUseSites() const override {
439
2
    if (const auto *DRE =
440
2
            dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
441
2
      return {DRE};
442
2
    }
443
444
0
    return {};
445
2
  }
446
};
447
448
/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
449
/// it doesn't have any bounds checks for the array.
450
class ArraySubscriptGadget : public WarningGadget {
451
  static constexpr const char *const ArraySubscrTag = "ArraySubscript";
452
  const ArraySubscriptExpr *ASE;
453
454
public:
455
  ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
456
      : WarningGadget(Kind::ArraySubscript),
457
491
        ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {}
458
459
0
  static bool classof(const Gadget *G) {
460
0
    return G->getKind() == Kind::ArraySubscript;
461
0
  }
462
463
297
  static Matcher matcher() {
464
    // FIXME: What if the index is integer literal 0? Should this be
465
    // a safe gadget in this case?
466
      // clang-format off
467
297
      return stmt(arraySubscriptExpr(
468
297
            hasBase(ignoringParenImpCasts(
469
297
              anyOf(hasPointerType(), hasArrayType()))),
470
297
            unless(hasIndex(integerLiteral(equals(0)))))
471
297
            .bind(ArraySubscrTag));
472
    // clang-format on
473
297
  }
474
475
491
  const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
476
477
261
  DeclUseList getClaimedVarUseSites() const override {
478
261
    if (const auto *DRE =
479
261
            dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
480
214
      return {DRE};
481
214
    }
482
483
47
    return {};
484
261
  }
485
};
486
487
/// A pointer arithmetic expression of one of the forms:
488
///  \code
489
///  ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
490
///  \endcode
491
class PointerArithmeticGadget : public WarningGadget {
492
  static constexpr const char *const PointerArithmeticTag = "ptrAdd";
493
  static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
494
  const BinaryOperator *PA; // pointer arithmetic expression
495
  const Expr *Ptr;          // the pointer expression in `PA`
496
497
public:
498
  PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
499
      : WarningGadget(Kind::PointerArithmetic),
500
        PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)),
501
87
        Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {}
502
503
0
  static bool classof(const Gadget *G) {
504
0
    return G->getKind() == Kind::PointerArithmetic;
505
0
  }
506
507
297
  static Matcher matcher() {
508
297
    auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType()));
509
297
    auto PtrAtRight =
510
297
        allOf(hasOperatorName("+"),
511
297
              hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
512
297
              hasLHS(HasIntegerType));
513
297
    auto PtrAtLeft =
514
297
        allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
515
297
                    hasOperatorName("+="), hasOperatorName("-=")),
516
297
              hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
517
297
              hasRHS(HasIntegerType));
518
519
297
    return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight))
520
297
                    .bind(PointerArithmeticTag));
521
297
  }
522
523
87
  const Stmt *getBaseStmt() const override { return PA; }
524
525
73
  DeclUseList getClaimedVarUseSites() const override {
526
73
    if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
527
61
      return {DRE};
528
61
    }
529
530
12
    return {};
531
73
  }
532
  // FIXME: pointer adding zero should be fine
533
  // FIXME: this gadge will need a fix-it
534
};
535
536
/// A pointer assignment expression of the form:
537
///  \code
538
///  p = q;
539
///  \endcode
540
class PointerAssignmentGadget : public FixableGadget {
541
private:
542
  static constexpr const char *const PointerAssignmentTag = "ptrAssign";
543
  static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
544
  static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
545
  const BinaryOperator *PA;    // pointer arithmetic expression
546
  const DeclRefExpr * PtrLHS;         // the LHS pointer expression in `PA`
547
  const DeclRefExpr * PtrRHS;         // the RHS pointer expression in `PA`
548
549
public:
550
  PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
551
      : FixableGadget(Kind::PointerAssignment),
552
    PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerAssignmentTag)),
553
    PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
554
80
    PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
555
556
0
  static bool classof(const Gadget *G) {
557
0
    return G->getKind() == Kind::PointerAssignment;
558
0
  }
559
560
187
  static Matcher matcher() {
561
187
    auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
562
187
      hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
563
187
                                               to(varDecl())).
564
187
                                   bind(PointerAssignRHSTag))),
565
187
                                   hasLHS(declRefExpr(hasPointerType(),
566
187
                                                      to(varDecl())).
567
187
                                          bind(PointerAssignLHSTag))));
568
    
569
    //FIXME: Handle declarations at assignments
570
187
    return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
571
187
  }
572
  
573
  virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
574
575
0
  virtual const Stmt *getBaseStmt() const override { return PA; }
576
577
160
  virtual DeclUseList getClaimedVarUseSites() const override {
578
160
    return DeclUseList{PtrLHS, PtrRHS};
579
160
  }
580
581
  virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
582
147
  getStrategyImplications() const override {
583
147
    return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()),
584
147
                          cast<VarDecl>(PtrRHS->getDecl()));
585
147
  }
586
};
587
588
/// A call of a function or method that performs unchecked buffer operations
589
/// over one of its pointer parameters.
590
class UnsafeBufferUsageAttrGadget : public WarningGadget {
591
  constexpr static const char *const OpTag = "call_expr";
592
  const CallExpr *Op;
593
594
public:
595
  UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
596
      : WarningGadget(Kind::UnsafeBufferUsageAttr),
597
23
        Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {}
598
599
0
  static bool classof(const Gadget *G) {
600
0
    return G->getKind() == Kind::UnsafeBufferUsageAttr;
601
0
  }
602
603
297
  static Matcher matcher() {
604
297
    return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
605
297
                    .bind(OpTag));
606
297
  }
607
23
  const Stmt *getBaseStmt() const override { return Op; }
608
609
19
  DeclUseList getClaimedVarUseSites() const override { return {}; }
610
};
611
612
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
613
// Context (see `isInUnspecifiedLvalueContext`).
614
// Note here `[]` is the built-in subscript operator.
615
class ULCArraySubscriptGadget : public FixableGadget {
616
private:
617
  static constexpr const char *const ULCArraySubscriptTag =
618
      "ArraySubscriptUnderULC";
619
  const ArraySubscriptExpr *Node;
620
621
public:
622
  ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
623
      : FixableGadget(Kind::ULCArraySubscript),
624
195
        Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
625
195
    assert(Node != nullptr && "Expecting a non-null matching result");
626
195
  }
627
628
0
  static bool classof(const Gadget *G) {
629
0
    return G->getKind() == Kind::ULCArraySubscript;
630
0
  }
631
632
187
  static Matcher matcher() {
633
187
    auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
634
187
    auto BaseIsArrayOrPtrDRE =
635
187
        hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr)));
636
187
    auto Target =
637
187
        arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
638
639
187
    return expr(isInUnspecifiedLvalueContext(Target));
640
187
  }
641
642
  virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
643
644
0
  virtual const Stmt *getBaseStmt() const override { return Node; }
645
646
390
  virtual DeclUseList getClaimedVarUseSites() const override {
647
390
    if (const auto *DRE =
648
390
            dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
649
390
      return {DRE};
650
390
    }
651
0
    return {};
652
390
  }
653
};
654
655
// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
656
// unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
657
// fixit of the form `UPC(DRE.data())`.
658
class UPCStandalonePointerGadget : public FixableGadget {
659
private:
660
  static constexpr const char *const DeclRefExprTag = "StandalonePointer";
661
  const DeclRefExpr *Node;
662
663
public:
664
  UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result)
665
      : FixableGadget(Kind::UPCStandalonePointer),
666
38
        Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) {
667
38
    assert(Node != nullptr && "Expecting a non-null matching result");
668
38
  }
669
670
0
  static bool classof(const Gadget *G) {
671
0
    return G->getKind() == Kind::UPCStandalonePointer;
672
0
  }
673
674
187
  static Matcher matcher() {
675
187
    auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
676
187
    auto target = expr(
677
187
        ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr, to(varDecl()))).bind(DeclRefExprTag)));
678
187
    return stmt(isInUnspecifiedPointerContext(target));
679
187
  }
680
681
  virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
682
683
0
  virtual const Stmt *getBaseStmt() const override { return Node; }
684
685
76
  virtual DeclUseList getClaimedVarUseSites() const override {
686
76
    return {Node};
687
76
  }
688
};
689
690
class PointerDereferenceGadget : public FixableGadget {
691
  static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
692
  static constexpr const char *const OperatorTag = "op";
693
694
  const DeclRefExpr *BaseDeclRefExpr = nullptr;
695
  const UnaryOperator *Op = nullptr;
696
697
public:
698
  PointerDereferenceGadget(const MatchFinder::MatchResult &Result)
699
      : FixableGadget(Kind::PointerDereference),
700
        BaseDeclRefExpr(
701
            Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
702
18
        Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {}
703
704
0
  static bool classof(const Gadget *G) {
705
0
    return G->getKind() == Kind::PointerDereference;
706
0
  }
707
708
187
  static Matcher matcher() {
709
187
    auto Target =
710
187
        unaryOperator(
711
187
            hasOperatorName("*"),
712
187
            has(expr(ignoringParenImpCasts(
713
187
                declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag)))))
714
187
            .bind(OperatorTag);
715
716
187
    return expr(isInUnspecifiedLvalueContext(Target));
717
187
  }
718
719
36
  DeclUseList getClaimedVarUseSites() const override {
720
36
    return {BaseDeclRefExpr};
721
36
  }
722
723
0
  virtual const Stmt *getBaseStmt() const final { return Op; }
724
725
  virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
726
};
727
728
// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
729
// Context (see `isInUnspecifiedPointerContext`).
730
// Note here `[]` is the built-in subscript operator.
731
class UPCAddressofArraySubscriptGadget : public FixableGadget {
732
private:
733
  static constexpr const char *const UPCAddressofArraySubscriptTag =
734
      "AddressofArraySubscriptUnderUPC";
735
  const UnaryOperator *Node; // the `&DRE[any]` node
736
737
public:
738
  UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
739
      : FixableGadget(Kind::ULCArraySubscript),
740
        Node(Result.Nodes.getNodeAs<UnaryOperator>(
741
18
            UPCAddressofArraySubscriptTag)) {
742
18
    assert(Node != nullptr && "Expecting a non-null matching result");
743
18
  }
744
745
0
  static bool classof(const Gadget *G) {
746
0
    return G->getKind() == Kind::UPCAddressofArraySubscript;
747
0
  }
748
749
187
  static Matcher matcher() {
750
187
    return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
751
187
        unaryOperator(hasOperatorName("&"),
752
187
                      hasUnaryOperand(arraySubscriptExpr(
753
187
                          hasBase(ignoringParenImpCasts(declRefExpr())))))
754
187
            .bind(UPCAddressofArraySubscriptTag)))));
755
187
  }
756
757
  virtual std::optional<FixItList> getFixits(const Strategy &) const override;
758
759
0
  virtual const Stmt *getBaseStmt() const override { return Node; }
760
761
53
  virtual DeclUseList getClaimedVarUseSites() const override {
762
53
    const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
763
53
    const auto *DRE =
764
53
        cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
765
53
    return {DRE};
766
53
  }
767
};
768
} // namespace
769
770
namespace {
771
// An auxiliary tracking facility for the fixit analysis. It helps connect
772
// declarations to its and make sure we've covered all uses with our analysis
773
// before we try to fix the declaration.
774
class DeclUseTracker {
775
  using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
776
  using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
777
778
  // Allocate on the heap for easier move.
779
  std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
780
  DefMapTy Defs{};
781
782
public:
783
297
  DeclUseTracker() = default;
784
  DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
785
  DeclUseTracker &operator=(const DeclUseTracker &) = delete;
786
297
  DeclUseTracker(DeclUseTracker &&) = default;
787
  DeclUseTracker &operator=(DeclUseTracker &&) = default;
788
789
  // Start tracking a freshly discovered DRE.
790
690
  void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
791
792
  // Stop tracking the DRE as it's been fully figured out.
793
464
  void claimUse(const DeclRefExpr *DRE) {
794
464
    assert(Uses->count(DRE) &&
795
464
           "DRE not found or claimed by multiple matchers!");
796
464
    Uses->erase(DRE);
797
464
  }
798
799
  // A variable is unclaimed if at least one use is unclaimed.
800
230
  bool hasUnclaimedUses(const VarDecl *VD) const {
801
    // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
802
230
    return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
803
123
      return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
804
123
    });
805
230
  }
806
807
391
  void discoverDecl(const DeclStmt *DS) {
808
393
    for (const Decl *D : DS->decls()) {
809
393
      if (const auto *VD = dyn_cast<VarDecl>(D)) {
810
        // FIXME: Assertion temporarily disabled due to a bug in
811
        // ASTMatcher internal behavior in presence of GNU
812
        // statement-expressions. We need to properly investigate this
813
        // because it can screw up our algorithm in other ways.
814
        // assert(Defs.count(VD) == 0 && "Definition already discovered!");
815
390
        Defs[VD] = DS;
816
390
      }
817
393
    }
818
391
  }
819
820
193
  const DeclStmt *lookupDecl(const VarDecl *VD) const {
821
193
    auto It = Defs.find(VD);
822
193
    assert(It != Defs.end() && "Definition never discovered!");
823
193
    return It->second;
824
193
  }
825
};
826
} // namespace
827
828
namespace {
829
// Strategy is a map from variables to the way we plan to emit fixes for
830
// these variables. It is figured out gradually by trying different fixes
831
// for different variables depending on gadgets in which these variables
832
// participate.
833
class Strategy {
834
public:
835
  enum class Kind {
836
    Wontfix,  // We don't plan to emit a fixit for this variable.
837
    Span,     // We recommend replacing the variable with std::span.
838
    Iterator, // We recommend replacing the variable with std::span::iterator.
839
    Array,    // We recommend replacing the variable with std::array.
840
    Vector    // We recommend replacing the variable with std::vector.
841
  };
842
843
private:
844
  using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
845
846
  MapTy Map;
847
848
public:
849
187
  Strategy() = default;
850
  Strategy(const Strategy &) = delete; // Let's avoid copies.
851
  Strategy &operator=(const Strategy &) = delete;
852
  Strategy(Strategy &&) = default;
853
  Strategy &operator=(Strategy &&) = default;
854
855
197
  void set(const VarDecl *VD, Kind K) { Map[VD] = K; }
856
857
1.08k
  Kind lookup(const VarDecl *VD) const {
858
1.08k
    auto I = Map.find(VD);
859
1.08k
    if (I == Map.end())
860
11
      return Kind::Wontfix;
861
862
1.07k
    return I->second;
863
1.08k
  }
864
};
865
} // namespace
866
867
868
// Representing a pointer type expression of the form `++Ptr` in an Unspecified
869
// Pointer Context (UPC):
870
class UPCPreIncrementGadget : public FixableGadget {
871
private:
872
  static constexpr const char *const UPCPreIncrementTag =
873
    "PointerPreIncrementUnderUPC";
874
  const UnaryOperator *Node; // the `++Ptr` node
875
876
public:
877
  UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
878
    : FixableGadget(Kind::UPCPreIncrement),
879
6
      Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) {
880
6
    assert(Node != nullptr && "Expecting a non-null matching result");
881
6
  }
882
883
0
  static bool classof(const Gadget *G) {
884
0
    return G->getKind() == Kind::UPCPreIncrement;
885
0
  }
886
887
187
  static Matcher matcher() {
888
    // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
889
    // Although currently we can only provide fix-its when `Ptr` is a DRE, we
890
    // can have the matcher be general, so long as `getClaimedVarUseSites` does
891
    // things right.
892
187
    return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
893
187
                    unaryOperator(isPreInc(),
894
187
                      hasUnaryOperand(declRefExpr())
895
187
                      ).bind(UPCPreIncrementTag)))));
896
187
  }
897
898
  virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
899
900
6
  virtual const Stmt *getBaseStmt() const override { return Node; }
901
902
18
  virtual DeclUseList getClaimedVarUseSites() const override {
903
18
    return {dyn_cast<DeclRefExpr>(Node->getSubExpr())};
904
18
  }
905
};
906
907
// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
908
// ptr)`:
909
class DerefSimplePtrArithFixableGadget : public FixableGadget {
910
  static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
911
  static constexpr const char *const DerefOpTag = "DerefOp";
912
  static constexpr const char *const AddOpTag = "AddOp";
913
  static constexpr const char *const OffsetTag = "Offset";
914
915
  const DeclRefExpr *BaseDeclRefExpr = nullptr;
916
  const UnaryOperator *DerefOp = nullptr;
917
  const BinaryOperator *AddOp = nullptr;
918
  const IntegerLiteral *Offset = nullptr;
919
920
public:
921
  DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result)
922
      : FixableGadget(Kind::DerefSimplePtrArithFixable),
923
        BaseDeclRefExpr(
924
            Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
925
        DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)),
926
        AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)),
927
29
        Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {}
928
929
187
  static Matcher matcher() {
930
    // clang-format off
931
187
    auto ThePtr = expr(hasPointerType(),
932
187
                       ignoringImpCasts(declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag)));
933
187
    auto PlusOverPtrAndInteger = expr(anyOf(
934
187
          binaryOperator(hasOperatorName("+"), hasLHS(ThePtr),
935
187
                         hasRHS(integerLiteral().bind(OffsetTag)))
936
187
                         .bind(AddOpTag),
937
187
          binaryOperator(hasOperatorName("+"), hasRHS(ThePtr),
938
187
                         hasLHS(integerLiteral().bind(OffsetTag)))
939
187
                         .bind(AddOpTag)));
940
187
    return isInUnspecifiedLvalueContext(unaryOperator(
941
187
        hasOperatorName("*"),
942
187
        hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger)))
943
187
        .bind(DerefOpTag));
944
    // clang-format on
945
187
  }
946
947
  virtual std::optional<FixItList> getFixits(const Strategy &s) const final;
948
949
  // TODO remove this method from FixableGadget interface
950
0
  virtual const Stmt *getBaseStmt() const final { return nullptr; }
951
952
58
  virtual DeclUseList getClaimedVarUseSites() const final {
953
58
    return {BaseDeclRefExpr};
954
58
  }
955
};
956
957
/// Scan the function and return a list of gadgets found with provided kits.
958
static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
959
findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
960
297
            bool EmitSuggestions) {
961
962
297
  struct GadgetFinderCallback : MatchFinder::MatchCallback {
963
297
    FixableGadgetList FixableGadgets;
964
297
    WarningGadgetList WarningGadgets;
965
297
    DeclUseTracker Tracker;
966
967
2.10k
    void run(const MatchFinder::MatchResult &Result) override {
968
      // In debug mode, assert that we've found exactly one gadget.
969
      // This helps us avoid conflicts in .bind() tags.
970
#if NDEBUG
971
#define NEXT return
972
#else
973
2.10k
      [[maybe_unused]] int numFound = 0;
974
2.10k
#define NEXT ++numFound
975
2.10k
#endif
976
977
2.10k
      if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
978
690
        Tracker.discoverUse(DRE);
979
690
        NEXT;
980
690
      }
981
982
2.10k
      if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
983
391
        Tracker.discoverDecl(DS);
984
391
        NEXT;
985
391
      }
986
987
      // Figure out which matcher we've found, and call the appropriate
988
      // subclass constructor.
989
      // FIXME: Can we do this more logarithmically?
990
2.10k
#define FIXABLE_GADGET(name)                                                   \
991
14.7k
  if (Result.Nodes.getNodeAs<Stmt>(#name)) {                                   \
992
384
    FixableGadgets.push_back(std::make_unique<name##Gadget>(Result));          \
993
384
    NEXT;                                                                      \
994
384
  }
995
2.10k
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
996
2.10k
#define WARNING_GADGET(name)                                                   \
997
10.5k
  if (Result.Nodes.getNodeAs<Stmt>(#name)) {                                   \
998
639
    WarningGadgets.push_back(std::make_unique<name##Gadget>(Result));          \
999
639
    NEXT;                                                                      \
1000
639
  }
1001
2.10k
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1002
1003
2.10k
      assert(numFound >= 1 && "Gadgets not found in match result!");
1004
2.10k
      assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
1005
2.10k
    }
1006
297
  };
1007
1008
297
  MatchFinder M;
1009
297
  GadgetFinderCallback CB;
1010
1011
  // clang-format off
1012
297
  M.addMatcher(
1013
297
      stmt(
1014
297
        forEachDescendantEvaluatedStmt(stmt(anyOf(
1015
          // Add Gadget::matcher() for every gadget in the registry.
1016
297
#define WARNING_GADGET(x)                                                      \
1017
1.48k
          allOf(x ## Gadget::matcher().bind(#x),                               \
1018
1.48k
                notInSafeBufferOptOut(&Handler)),
1019
297
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1020
            // Avoid a hanging comma.
1021
297
            unless(stmt())
1022
297
        )))
1023
297
    ),
1024
297
    &CB
1025
297
  );
1026
  // clang-format on
1027
1028
297
  if (EmitSuggestions) {
1029
    // clang-format off
1030
187
    M.addMatcher(
1031
187
        stmt(
1032
187
          forEachDescendantStmt(stmt(eachOf(
1033
187
#define FIXABLE_GADGET(x)                                                      \
1034
1.30k
            x ## Gadget::matcher().bind(#x),
1035
187
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1036
            // In parallel, match all DeclRefExprs so that to find out
1037
            // whether there are any uncovered by gadgets.
1038
187
            declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1039
187
                        to(varDecl())).bind("any_dre"),
1040
            // Also match DeclStmts because we'll need them when fixing
1041
            // their underlying VarDecls that otherwise don't have
1042
            // any backreferences to DeclStmts.
1043
187
            declStmt().bind("any_ds")
1044
187
          )))
1045
187
      ),
1046
187
      &CB
1047
187
    );
1048
    // clang-format on
1049
187
  }
1050
1051
297
  M.match(*D->getBody(), D->getASTContext());
1052
1053
  // Gadgets "claim" variables they're responsible for. Once this loop finishes,
1054
  // the tracker will only track DREs that weren't claimed by any gadgets,
1055
  // i.e. not understood by the analysis.
1056
384
  for (const auto &G : CB.FixableGadgets) {
1057
464
    for (const auto *DRE : G->getClaimedVarUseSites()) {
1058
464
      CB.Tracker.claimUse(DRE);
1059
464
    }
1060
384
  }
1061
1062
297
  return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets),
1063
297
          std::move(CB.Tracker)};
1064
297
}
1065
1066
// Compares AST nodes by source locations.
1067
template <typename NodeTy> struct CompareNode {
1068
1.01k
  bool operator()(const NodeTy *N1, const NodeTy *N2) const {
1069
1.01k
    return N1->getBeginLoc().getRawEncoding() <
1070
1.01k
           N2->getBeginLoc().getRawEncoding();
1071
1.01k
  }
1072
};
1073
1074
struct WarningGadgetSets {
1075
  std::map<const VarDecl *, std::set<const WarningGadget *>,
1076
           // To keep keys sorted by their locations in the map so that the
1077
           // order is deterministic:
1078
           CompareNode<VarDecl>>
1079
      byVar;
1080
  // These Gadgets are not related to pointer variables (e. g. temporaries).
1081
  llvm::SmallVector<const WarningGadget *, 16> noVar;
1082
};
1083
1084
static WarningGadgetSets
1085
187
groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
1086
187
  WarningGadgetSets result;
1087
  // If some gadgets cover more than one
1088
  // variable, they'll appear more than once in the map.
1089
387
  for (auto &G : AllUnsafeOperations) {
1090
387
    DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
1091
1092
387
    bool AssociatedWithVarDecl = false;
1093
387
    for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
1094
308
      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1095
307
        result.byVar[VD].insert(G.get());
1096
307
        AssociatedWithVarDecl = true;
1097
307
      }
1098
308
    }
1099
1100
387
    if (!AssociatedWithVarDecl) {
1101
80
      result.noVar.push_back(G.get());
1102
80
      continue;
1103
80
    }
1104
387
  }
1105
187
  return result;
1106
187
}
1107
1108
struct FixableGadgetSets {
1109
  std::map<const VarDecl *, std::set<const FixableGadget *>> byVar;
1110
};
1111
1112
static FixableGadgetSets
1113
187
groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
1114
187
  FixableGadgetSets FixablesForUnsafeVars;
1115
384
  for (auto &F : AllFixableOperations) {
1116
384
    DeclUseList DREs = F->getClaimedVarUseSites();
1117
1118
464
    for (const DeclRefExpr *DRE : DREs) {
1119
464
      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1120
464
        FixablesForUnsafeVars.byVar[VD].insert(F.get());
1121
464
      }
1122
464
    }
1123
384
  }
1124
187
  return FixablesForUnsafeVars;
1125
187
}
1126
1127
bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
1128
179
                                  const SourceManager &SM) {
1129
  // A simple interval overlap detection algorithm.  Sorts all ranges by their
1130
  // begin location then finds the first overlap in one pass.
1131
179
  std::vector<const FixItHint *> All; // a copy of `FixIts`
1132
1133
179
  for (const FixItHint &H : FixIts)
1134
638
    All.push_back(&H);
1135
179
  std::sort(All.begin(), All.end(),
1136
1.11k
            [&SM](const FixItHint *H1, const FixItHint *H2) {
1137
1.11k
              return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(),
1138
1.11k
                                                  H2->RemoveRange.getBegin());
1139
1.11k
            });
1140
1141
179
  const FixItHint *CurrHint = nullptr;
1142
1143
632
  for (const FixItHint *Hint : All) {
1144
632
    if (!CurrHint ||
1145
632
        SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(),
1146
628
                                     Hint->RemoveRange.getBegin())) {
1147
      // Either to initialize `CurrHint` or `CurrHint` does not
1148
      // overlap with `Hint`:
1149
628
      CurrHint = Hint;
1150
628
    } else
1151
      // In case `Hint` overlaps the `CurrHint`, we found at least one
1152
      // conflict:
1153
4
      return true;
1154
632
  }
1155
175
  return false;
1156
179
}
1157
1158
std::optional<FixItList>
1159
238
PointerAssignmentGadget::getFixits(const Strategy &S) const {
1160
238
  const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
1161
238
  const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
1162
238
  switch (S.lookup(LeftVD)) {
1163
235
    case Strategy::Kind::Span:
1164
235
      if (S.lookup(RightVD) == Strategy::Kind::Span)
1165
227
        return FixItList{};
1166
8
      return std::nullopt;
1167
3
    case Strategy::Kind::Wontfix:
1168
3
      return std::nullopt;
1169
0
    case Strategy::Kind::Iterator:
1170
0
    case Strategy::Kind::Array:
1171
0
    case Strategy::Kind::Vector:
1172
0
      llvm_unreachable("unsupported strategies for FixableGadgets");
1173
238
  }
1174
0
  return std::nullopt;
1175
238
}
1176
1177
1178
std::optional<FixItList>
1179
152
ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
1180
152
  if (const auto *DRE =
1181
152
          dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
1182
152
    if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1183
152
      switch (S.lookup(VD)) {
1184
152
      case Strategy::Kind::Span: {
1185
        // If the index has a negative constant value, we give up as no valid
1186
        // fix-it can be generated:
1187
152
        const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
1188
152
            VD->getASTContext();
1189
152
        if (auto ConstVal = Node->getIdx()->getIntegerConstantExpr(Ctx)) {
1190
149
          if (ConstVal->isNegative())
1191
1
            return std::nullopt;
1192
149
        } else 
if (3
!Node->getIdx()->getType()->isUnsignedIntegerType()3
)
1193
1
          return std::nullopt;
1194
        // no-op is a good fix-it, otherwise
1195
150
        return FixItList{};
1196
152
      }
1197
0
      case Strategy::Kind::Wontfix:
1198
0
      case Strategy::Kind::Iterator:
1199
0
      case Strategy::Kind::Array:
1200
0
      case Strategy::Kind::Vector:
1201
0
        llvm_unreachable("unsupported strategies for FixableGadgets");
1202
152
      }
1203
152
    }
1204
0
  return std::nullopt;
1205
152
}
1206
1207
static std::optional<FixItList> // forward declaration
1208
fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
1209
1210
std::optional<FixItList>
1211
17
UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
1212
17
  auto DREs = getClaimedVarUseSites();
1213
17
  const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
1214
1215
17
  switch (S.lookup(VD)) {
1216
17
  case Strategy::Kind::Span:
1217
17
    return fixUPCAddressofArraySubscriptWithSpan(Node);
1218
0
  case Strategy::Kind::Wontfix:
1219
0
  case Strategy::Kind::Iterator:
1220
0
  case Strategy::Kind::Array:
1221
0
  case Strategy::Kind::Vector:
1222
0
    llvm_unreachable("unsupported strategies for FixableGadgets");
1223
17
  }
1224
0
  return std::nullopt; // something went wrong, no fix-it
1225
17
}
1226
1227
// Return the text representation of the given `APInt Val`:
1228
2
static std::string getAPIntText(APInt Val) {
1229
2
  SmallVector<char> Txt;
1230
2
  Val.toString(Txt, 10, true);
1231
  // APInt::toString does not add '\0' to the end of the string for us:
1232
2
  Txt.push_back('\0');
1233
2
  return Txt.data();
1234
2
}
1235
1236
// Return the source location of the last character of the AST `Node`.
1237
template <typename NodeTy>
1238
static std::optional<SourceLocation>
1239
getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
1240
49
              const LangOptions &LangOpts) {
1241
49
  unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1242
49
  SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1243
1244
49
  if (Loc.isValid())
1245
49
    return Loc;
1246
1247
0
  return std::nullopt;
1248
49
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getEndCharLoc<clang::DeclRefExpr>(clang::DeclRefExpr const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1240
8
              const LangOptions &LangOpts) {
1241
8
  unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1242
8
  SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1243
1244
8
  if (Loc.isValid())
1245
8
    return Loc;
1246
1247
0
  return std::nullopt;
1248
8
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getEndCharLoc<clang::Expr>(clang::Expr const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1240
3
              const LangOptions &LangOpts) {
1241
3
  unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1242
3
  SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1243
1244
3
  if (Loc.isValid())
1245
3
    return Loc;
1246
1247
0
  return std::nullopt;
1248
3
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getEndCharLoc<clang::VarDecl>(clang::VarDecl const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1240
32
              const LangOptions &LangOpts) {
1241
32
  unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1242
32
  SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1243
1244
32
  if (Loc.isValid())
1245
32
    return Loc;
1246
1247
0
  return std::nullopt;
1248
32
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getEndCharLoc<clang::Stmt>(clang::Stmt const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1240
6
              const LangOptions &LangOpts) {
1241
6
  unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1242
6
  SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1243
1244
6
  if (Loc.isValid())
1245
6
    return Loc;
1246
1247
0
  return std::nullopt;
1248
6
}
1249
1250
// Return the source location just past the last character of the AST `Node`.
1251
template <typename NodeTy>
1252
static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
1253
                                                const SourceManager &SM,
1254
439
                                                const LangOptions &LangOpts) {
1255
439
  SourceLocation Loc =
1256
439
      Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1257
1258
439
  if (Loc.isValid())
1259
437
    return Loc;
1260
1261
2
  return std::nullopt;
1262
439
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getPastLoc<clang::DeclRefExpr>(clang::DeclRefExpr const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1254
31
                                                const LangOptions &LangOpts) {
1255
31
  SourceLocation Loc =
1256
31
      Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1257
1258
31
  if (Loc.isValid())
1259
31
    return Loc;
1260
1261
0
  return std::nullopt;
1262
31
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getPastLoc<clang::Expr>(clang::Expr const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1254
350
                                                const LangOptions &LangOpts) {
1255
350
  SourceLocation Loc =
1256
350
      Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1257
1258
350
  if (Loc.isValid())
1259
348
    return Loc;
1260
1261
2
  return std::nullopt;
1262
350
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getPastLoc<clang::BinaryOperator>(clang::BinaryOperator const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1254
29
                                                const LangOptions &LangOpts) {
1255
29
  SourceLocation Loc =
1256
29
      Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1257
1258
29
  if (Loc.isValid())
1259
29
    return Loc;
1260
1261
0
  return std::nullopt;
1262
29
}
UnsafeBufferUsage.cpp:std::__1::optional<clang::SourceLocation> getPastLoc<clang::UnaryOperator>(clang::UnaryOperator const*, clang::SourceManager const&, clang::LangOptions const&)
Line
Count
Source
1254
29
                                                const LangOptions &LangOpts) {
1255
29
  SourceLocation Loc =
1256
29
      Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1257
1258
29
  if (Loc.isValid())
1259
29
    return Loc;
1260
1261
0
  return std::nullopt;
1262
29
}
1263
1264
// Return text representation of an `Expr`.
1265
static std::optional<StringRef> getExprText(const Expr *E,
1266
                                            const SourceManager &SM,
1267
165
                                            const LangOptions &LangOpts) {
1268
165
  std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts);
1269
1270
165
  if (LastCharLoc)
1271
165
    return Lexer::getSourceText(
1272
165
        CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM,
1273
165
        LangOpts);
1274
1275
0
  return std::nullopt;
1276
165
}
1277
1278
std::optional<FixItList>
1279
29
DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const {
1280
29
  const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl());
1281
1282
29
  if (VD && s.lookup(VD) == Strategy::Kind::Span) {
1283
29
    ASTContext &Ctx = VD->getASTContext();
1284
    // std::span can't represent elements before its begin()
1285
29
    if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
1286
29
      if (ConstVal->isNegative())
1287
0
        return std::nullopt;
1288
1289
    // note that the expr may (oddly) has multiple layers of parens
1290
    // example:
1291
    //   *((..(pointer + 123)..))
1292
    // goal:
1293
    //   pointer[123]
1294
    // Fix-It:
1295
    //   remove '*('
1296
    //   replace ' + ' with '['
1297
    //   replace ')' with ']'
1298
1299
    // example:
1300
    //   *((..(123 + pointer)..))
1301
    // goal:
1302
    //   123[pointer]
1303
    // Fix-It:
1304
    //   remove '*('
1305
    //   replace ' + ' with '['
1306
    //   replace ')' with ']'
1307
1308
29
    const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
1309
29
    const SourceManager &SM = Ctx.getSourceManager();
1310
29
    const LangOptions &LangOpts = Ctx.getLangOpts();
1311
29
    CharSourceRange StarWithTrailWhitespace =
1312
29
        clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(),
1313
29
                                             LHS->getBeginLoc());
1314
1315
29
    std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts);
1316
29
    if (!LHSLocation)
1317
0
      return std::nullopt;
1318
1319
29
    CharSourceRange PlusWithSurroundingWhitespace =
1320
29
        clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
1321
1322
29
    std::optional<SourceLocation> AddOpLocation =
1323
29
        getPastLoc(AddOp, SM, LangOpts);
1324
29
    std::optional<SourceLocation> DerefOpLocation =
1325
29
        getPastLoc(DerefOp, SM, LangOpts);
1326
1327
29
    if (!AddOpLocation || !DerefOpLocation)
1328
0
      return std::nullopt;
1329
1330
29
    CharSourceRange ClosingParenWithPrecWhitespace =
1331
29
        clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation);
1332
1333
29
    return FixItList{
1334
29
        {FixItHint::CreateRemoval(StarWithTrailWhitespace),
1335
29
         FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["),
1336
29
         FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}};
1337
29
  }
1338
0
  return std::nullopt; // something wrong or unsupported, give up
1339
29
}
1340
1341
std::optional<FixItList>
1342
8
PointerDereferenceGadget::getFixits(const Strategy &S) const {
1343
8
  const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl());
1344
8
  switch (S.lookup(VD)) {
1345
8
  case Strategy::Kind::Span: {
1346
8
    ASTContext &Ctx = VD->getASTContext();
1347
8
    SourceManager &SM = Ctx.getSourceManager();
1348
    // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1349
    // Deletes the *operand
1350
8
    CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
1351
8
        Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1));
1352
    // Inserts the [0]
1353
8
    std::optional<SourceLocation> EndOfOperand =
1354
8
        getEndCharLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts());
1355
8
    if (EndOfOperand) {
1356
8
      return FixItList{{FixItHint::CreateRemoval(derefRange),
1357
8
                        FixItHint::CreateInsertion(
1358
8
                            (*EndOfOperand).getLocWithOffset(1), "[0]")}};
1359
8
    }
1360
8
  }
1361
8
    
[[fallthrough]];0
1362
0
  case Strategy::Kind::Iterator:
1363
0
  case Strategy::Kind::Array:
1364
0
  case Strategy::Kind::Vector:
1365
0
    llvm_unreachable("Strategy not implemented yet!");
1366
0
  case Strategy::Kind::Wontfix:
1367
0
    llvm_unreachable("Invalid strategy!");
1368
8
  }
1369
1370
0
  return std::nullopt;
1371
8
}
1372
1373
// Generates fix-its replacing an expression of the form UPC(DRE) with
1374
// `DRE.data()`
1375
std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S)
1376
31
      const {
1377
31
  const auto VD = cast<VarDecl>(Node->getDecl());
1378
31
  switch (S.lookup(VD)) {
1379
31
    case Strategy::Kind::Span: {
1380
31
      ASTContext &Ctx = VD->getASTContext();
1381
31
      SourceManager &SM = Ctx.getSourceManager();
1382
      // Inserts the .data() after the DRE
1383
31
      std::optional<SourceLocation> EndOfOperand =
1384
31
          getPastLoc(Node, SM, Ctx.getLangOpts());
1385
1386
31
      if (EndOfOperand)
1387
31
        return FixItList{{FixItHint::CreateInsertion(
1388
31
            *EndOfOperand, ".data()")}};
1389
31
    }
1390
31
      
[[fallthrough]];0
1391
0
    case Strategy::Kind::Wontfix:
1392
0
    case Strategy::Kind::Iterator:
1393
0
    case Strategy::Kind::Array:
1394
0
    case Strategy::Kind::Vector:
1395
0
      llvm_unreachable("unsupported strategies for FixableGadgets");
1396
31
  }
1397
1398
0
  return std::nullopt;
1399
31
}
1400
1401
// Generates fix-its replacing an expression of the form `&DRE[e]` with
1402
// `&DRE.data()[e]`:
1403
static std::optional<FixItList>
1404
17
fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
1405
17
  const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
1406
17
  const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
1407
  // FIXME: this `getASTContext` call is costly, we should pass the
1408
  // ASTContext in:
1409
17
  const ASTContext &Ctx = DRE->getDecl()->getASTContext();
1410
17
  const Expr *Idx = ArraySub->getIdx();
1411
17
  const SourceManager &SM = Ctx.getSourceManager();
1412
17
  const LangOptions &LangOpts = Ctx.getLangOpts();
1413
17
  std::stringstream SS;
1414
17
  bool IdxIsLitZero = false;
1415
1416
17
  if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
1417
12
    if ((*ICE).isZero())
1418
2
      IdxIsLitZero = true;
1419
17
  std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
1420
17
  if (!DreString)
1421
0
    return std::nullopt;
1422
1423
17
  if (IdxIsLitZero) {
1424
    // If the index is literal zero, we produce the most concise fix-it:
1425
2
    SS << (*DreString).str() << ".data()";
1426
15
  } else {
1427
15
    std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts);
1428
15
    if (!IndexString)
1429
0
      return std::nullopt;
1430
1431
15
    SS << "&" << (*DreString).str() << ".data()"
1432
15
       << "[" << (*IndexString).str() << "]";
1433
15
  }
1434
17
  return FixItList{
1435
17
      FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
1436
17
}
1437
1438
1439
6
std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const {
1440
6
  DeclUseList DREs = getClaimedVarUseSites();
1441
1442
6
  if (DREs.size() != 1)
1443
0
    return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1444
                         // give up
1445
6
  if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1446
6
    if (S.lookup(VD) == Strategy::Kind::Span) {
1447
6
      FixItList Fixes;
1448
6
      std::stringstream SS;
1449
6
      const Stmt *PreIncNode = getBaseStmt();
1450
6
      StringRef varName = VD->getName();
1451
6
      const ASTContext &Ctx = VD->getASTContext();
1452
1453
      // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1454
6
      SS << "(" << varName.data() << " = " << varName.data()
1455
6
         << ".subspan(1)).data()";
1456
6
      std::optional<SourceLocation> PreIncLocation =
1457
6
          getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1458
6
      if (!PreIncLocation)
1459
0
        return std::nullopt;
1460
1461
6
      Fixes.push_back(FixItHint::CreateReplacement(
1462
6
          SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str()));
1463
6
      return Fixes;
1464
6
    }
1465
6
  }
1466
0
  return std::nullopt; // Not in the cases that we can handle for now, give up.
1467
6
}
1468
1469
1470
// For a non-null initializer `Init` of `T *` type, this function returns
1471
// `FixItHint`s producing a list initializer `{Init,  S}` as a part of a fix-it
1472
// to output stream.
1473
// In many cases, this function cannot figure out the actual extent `S`.  It
1474
// then will use a place holder to replace `S` to ask users to fill `S` in.  The
1475
// initializer shall be used to initialize a variable of type `std::span<T>`.
1476
//
1477
// FIXME: Support multi-level pointers
1478
//
1479
// Parameters:
1480
//   `Init` a pointer to the initializer expression
1481
//   `Ctx` a reference to the ASTContext
1482
static FixItList
1483
populateInitializerFixItWithSpan(const Expr *Init, const ASTContext &Ctx,
1484
159
                                 const StringRef UserFillPlaceHolder) {
1485
159
  const SourceManager &SM = Ctx.getSourceManager();
1486
159
  const LangOptions &LangOpts = Ctx.getLangOpts();
1487
1488
  // If `Init` has a constant value that is (or equivalent to) a
1489
  // NULL pointer, we use the default constructor to initialize the span
1490
  // object, i.e., a `std:span` variable declaration with no initializer.
1491
  // So the fix-it is just to remove the initializer.
1492
159
  if (Init->isNullPointerConstant(
1493
159
          std::remove_const_t<ASTContext &>(Ctx),
1494
          // FIXME: Why does this function not ask for `const ASTContext
1495
          // &`? It should. Maybe worth an NFC patch later.
1496
159
          Expr::NullPointerConstantValueDependence::
1497
159
              NPC_ValueDependentIsNotNull)) {
1498
3
    std::optional<SourceLocation> InitLocation =
1499
3
        getEndCharLoc(Init, SM, LangOpts);
1500
3
    if (!InitLocation)
1501
0
      return {};
1502
1503
3
    SourceRange SR(Init->getBeginLoc(), *InitLocation);
1504
1505
3
    return {FixItHint::CreateRemoval(SR)};
1506
3
  }
1507
1508
156
  FixItList FixIts{};
1509
156
  std::string ExtentText = UserFillPlaceHolder.data();
1510
156
  StringRef One = "1";
1511
1512
  // Insert `{` before `Init`:
1513
156
  FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
1514
  // Try to get the data extent. Break into different cases:
1515
156
  if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
1516
    // In cases `Init` is `new T[n]` and there is no explicit cast over
1517
    // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1518
    // of `T`. So the extent is `n` unless `n` has side effects.  Similar but
1519
    // simpler for the case where `Init` is `new T`.
1520
135
    if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
1521
134
      if (!Ext->HasSideEffects(Ctx)) {
1522
133
        std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts);
1523
133
        if (!ExtentString)
1524
0
          return {};
1525
133
        ExtentText = *ExtentString;
1526
133
      }
1527
134
    } else 
if (1
!CxxNew->isArray()1
)
1528
      // Although the initializer is not allocating a buffer, the pointer
1529
      // variable could still be used in buffer access operations.
1530
1
      ExtentText = One;
1531
135
  } else 
if (const auto *21
CArrTy21
= Ctx.getAsConstantArrayType(
1532
21
                 Init->IgnoreImpCasts()->getType())) {
1533
    // In cases `Init` is of an array type after stripping off implicit casts,
1534
    // the extent is the array size.  Note that if the array size is not a
1535
    // constant, we cannot use it as the extent.
1536
2
    ExtentText = getAPIntText(CArrTy->getSize());
1537
19
  } else {
1538
    // In cases `Init` is of the form `&Var` after stripping of implicit
1539
    // casts, where `&` is the built-in operator, the extent is 1.
1540
19
    if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
1541
2
      if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
1542
2
          
isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())1
)
1543
1
        ExtentText = One;
1544
    // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1545
    // and explicit casting, etc. etc.
1546
19
  }
1547
1548
156
  SmallString<32> StrBuffer{};
1549
156
  std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts);
1550
1551
156
  if (!LocPassInit)
1552
2
    return {};
1553
1554
154
  StrBuffer.append(", ");
1555
154
  StrBuffer.append(ExtentText);
1556
154
  StrBuffer.append("}");
1557
154
  FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str()));
1558
154
  return FixIts;
1559
156
}
1560
1561
// For a `VarDecl` of the form `T  * var (= Init)?`, this
1562
// function generates a fix-it for the declaration, which re-declares `var` to
1563
// be of `span<T>` type and transforms the initializer, if present, to a span
1564
// constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user
1565
// to fill in.
1566
//
1567
// FIXME: support Multi-level pointers
1568
//
1569
// Parameters:
1570
//   `D` a pointer the variable declaration node
1571
//   `Ctx` a reference to the ASTContext
1572
// Returns:
1573
//    the generated fix-it
1574
static FixItList fixVarDeclWithSpan(const VarDecl *D, const ASTContext &Ctx,
1575
191
                                    const StringRef UserFillPlaceHolder) {
1576
191
  const QualType &SpanEltT = D->getType()->getPointeeType();
1577
191
  assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!");
1578
1579
191
  FixItList FixIts{};
1580
191
  std::optional<SourceLocation>
1581
191
      ReplacementLastLoc; // the inclusive end location of the replacement
1582
191
  const SourceManager &SM = Ctx.getSourceManager();
1583
1584
191
  if (const Expr *Init = D->getInit()) {
1585
159
    FixItList InitFixIts =
1586
159
        populateInitializerFixItWithSpan(Init, Ctx, UserFillPlaceHolder);
1587
1588
159
    if (InitFixIts.empty())
1589
2
      return {};
1590
1591
    // The loc right before the initializer:
1592
157
    ReplacementLastLoc = Init->getBeginLoc().getLocWithOffset(-1);
1593
157
    FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
1594
157
                  std::make_move_iterator(InitFixIts.end()));
1595
157
  } else
1596
32
    ReplacementLastLoc = getEndCharLoc(D, SM, Ctx.getLangOpts());
1597
1598
  // Producing fix-it for variable declaration---make `D` to be of span type:
1599
189
  SmallString<32> Replacement;
1600
189
  raw_svector_ostream OS(Replacement);
1601
1602
189
  OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName();
1603
1604
189
  if (!ReplacementLastLoc)
1605
0
    return {};
1606
1607
189
  FixIts.push_back(FixItHint::CreateReplacement(
1608
189
      SourceRange{D->getBeginLoc(), *ReplacementLastLoc}, OS.str()));
1609
189
  return FixIts;
1610
189
}
1611
1612
static FixItList fixVariableWithSpan(const VarDecl *VD,
1613
                                     const DeclUseTracker &Tracker,
1614
                                     const ASTContext &Ctx,
1615
193
                                     UnsafeBufferUsageHandler &Handler) {
1616
193
  const DeclStmt *DS = Tracker.lookupDecl(VD);
1617
193
  assert(DS && "Fixing non-local variables not implemented yet!");
1618
193
  if (!DS->isSingleDecl()) {
1619
    // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
1620
2
    return {};
1621
2
  }
1622
  // Currently DS is an unused variable but we'll need it when
1623
  // non-single decls are implemented, where the pointee type name
1624
  // and the '*' are spread around the place.
1625
191
  (void)DS;
1626
1627
  // FIXME: handle cases where DS has multiple declarations
1628
191
  return fixVarDeclWithSpan(VD, Ctx, Handler.getUserFillPlaceHolder());
1629
193
}
1630
1631
static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K,
1632
                             const DeclUseTracker &Tracker,
1633
                             const ASTContext &Ctx,
1634
197
                             UnsafeBufferUsageHandler &Handler) {
1635
197
  switch (K) {
1636
197
  case Strategy::Kind::Span: {
1637
197
    if (VD->getType()->isPointerType() && 
VD->isLocalVarDecl()193
)
1638
193
      return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
1639
4
    return {};
1640
197
  }
1641
0
  case Strategy::Kind::Iterator:
1642
0
  case Strategy::Kind::Array:
1643
0
  case Strategy::Kind::Vector:
1644
0
    llvm_unreachable("Strategy not implemented yet!");
1645
0
  case Strategy::Kind::Wontfix:
1646
0
    llvm_unreachable("Invalid strategy!");
1647
197
  }
1648
0
  llvm_unreachable("Unknown strategy!");
1649
0
}
1650
1651
// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
1652
// `RemoveRange` of 'h' overlaps with a macro use.
1653
176
static bool overlapWithMacro(const FixItList &FixIts) {
1654
  // FIXME: For now we only check if the range (or the first token) is (part of)
1655
  // a macro expansion.  Ideally, we want to check for all tokens in the range.
1656
623
  return llvm::any_of(FixIts, [](const FixItHint &Hint) {
1657
623
    auto Range = Hint.RemoveRange;
1658
623
    if (Range.getBegin().isMacroID() || 
Range.getEnd().isMacroID()620
)
1659
      // If the range (or the first token) is (part of) a macro expansion:
1660
3
      return true;
1661
620
    return false;
1662
623
  });
1663
176
}
1664
1665
static bool impossibleToFixForVar(const FixableGadgetSets &FixablesForUnsafeVars,
1666
                                  const Strategy &S,
1667
61
                                  const VarDecl * Var) {
1668
113
  for (const auto &F : FixablesForUnsafeVars.byVar.find(Var)->second) {
1669
113
    std::optional<FixItList> Fixits = F->getFixits(S);
1670
113
    if (!Fixits) {
1671
0
      return true;
1672
0
    }
1673
113
  }
1674
61
  return false;
1675
61
}
1676
1677
static std::map<const VarDecl *, FixItList>
1678
getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S,
1679
        const DeclUseTracker &Tracker, const ASTContext &Ctx,
1680
        UnsafeBufferUsageHandler &Handler,
1681
187
        const DefMapTy &VarGrpMap) {
1682
187
  std::map<const VarDecl *, FixItList> FixItsForVariable;
1683
197
  for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) {
1684
197
    const Strategy::Kind ReplacementTypeForVD = S.lookup(VD);
1685
197
    FixItsForVariable[VD] =
1686
197
        fixVariable(VD, ReplacementTypeForVD, Tracker, Ctx, Handler);
1687
    // If we fail to produce Fix-It for the declaration we have to skip the
1688
    // variable entirely.
1689
197
    if (FixItsForVariable[VD].empty()) {
1690
8
      FixItsForVariable.erase(VD);
1691
8
      continue;
1692
8
    }
1693
189
    bool ImpossibleToFix = false;
1694
189
    llvm::SmallVector<FixItHint, 16> FixItsForVD;
1695
368
    for (const auto &F : Fixables) {
1696
368
      std::optional<FixItList> Fixits = F->getFixits(S);
1697
368
      if (!Fixits) {
1698
13
        ImpossibleToFix = true;
1699
13
        break;
1700
355
      } else {
1701
355
        const FixItList CorrectFixes = Fixits.value();
1702
355
        FixItsForVD.insert(FixItsForVD.end(), CorrectFixes.begin(),
1703
355
                           CorrectFixes.end());
1704
355
      }
1705
368
    }
1706
1707
189
    if (ImpossibleToFix) {
1708
13
      FixItsForVariable.erase(VD);
1709
13
      continue;
1710
13
    }
1711
    
1712
176
    const auto VarGroupForVD = VarGrpMap.find(VD);
1713
176
    if (VarGroupForVD != VarGrpMap.end()) {
1714
177
      for (const VarDecl * V : VarGroupForVD->second) {
1715
177
        if (V == VD) {
1716
116
          continue;
1717
116
        }
1718
61
        if (impossibleToFixForVar(FixablesForUnsafeVars, S, V)) {
1719
0
          ImpossibleToFix = true;
1720
0
          break;
1721
0
        }
1722
61
      }
1723
1724
116
      if (ImpossibleToFix) {
1725
0
        FixItsForVariable.erase(VD);
1726
0
        for (const VarDecl * V : VarGroupForVD->second) {
1727
0
          FixItsForVariable.erase(V);
1728
0
        }
1729
0
        continue;
1730
0
      }
1731
116
    }
1732
176
    FixItsForVariable[VD].insert(FixItsForVariable[VD].end(),
1733
176
                                 FixItsForVD.begin(), FixItsForVD.end());
1734
1735
    // Fix-it shall not overlap with macros or/and templates:
1736
176
    if (overlapWithMacro(FixItsForVariable[VD]) ||
1737
176
        clang::internal::anyConflict(FixItsForVariable[VD],
1738
173
                                     Ctx.getSourceManager())) {
1739
3
      FixItsForVariable.erase(VD);
1740
3
      continue;
1741
3
    }
1742
176
  }
1743
  
1744
187
  for (auto VD : FixItsForVariable) {
1745
173
    const auto VarGroupForVD = VarGrpMap.find(VD.first);
1746
173
    const Strategy::Kind ReplacementTypeForVD = S.lookup(VD.first);
1747
173
    if (VarGroupForVD != VarGrpMap.end()) {
1748
174
      for (const VarDecl * Var : VarGroupForVD->second) {
1749
174
        if (Var == VD.first) {
1750
113
          continue;
1751
113
        }
1752
        
1753
61
        FixItList GroupFix;
1754
61
        if (FixItsForVariable.find(Var) == FixItsForVariable.end()) {
1755
0
          GroupFix = fixVariable(Var, ReplacementTypeForVD, Tracker,
1756
0
                                           Var->getASTContext(), Handler);
1757
61
        } else {
1758
61
          GroupFix = FixItsForVariable[Var];
1759
61
        }
1760
        
1761
194
        for (auto Fix : GroupFix) {
1762
194
          FixItsForVariable[VD.first].push_back(Fix);
1763
194
        }
1764
61
      }
1765
113
    }
1766
173
  }
1767
187
  return FixItsForVariable;
1768
187
}
1769
1770
1771
static Strategy
1772
187
getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) {
1773
187
  Strategy S;
1774
197
  for (const VarDecl *VD : UnsafeVars) {
1775
197
    S.set(VD, Strategy::Kind::Span);
1776
197
  }
1777
187
  return S;
1778
187
}
1779
1780
void clang::checkUnsafeBufferUsage(const Decl *D,
1781
                                   UnsafeBufferUsageHandler &Handler,
1782
297
                                   bool EmitSuggestions) {
1783
297
  assert(D && D->getBody());
1784
297
  WarningGadgetSets UnsafeOps;
1785
297
  FixableGadgetSets FixablesForAllVars;
1786
1787
297
  auto [FixableGadgets, WarningGadgets, Tracker] =
1788
297
    findGadgets(D, Handler, EmitSuggestions);
1789
1790
297
  if (!EmitSuggestions) {
1791
    // Our job is very easy without suggestions. Just warn about
1792
    // every problematic operation and consider it done. No need to deal
1793
    // with fixable gadgets, no need to group operations by variable.
1794
252
    for (const auto &G : WarningGadgets) {
1795
252
      Handler.handleUnsafeOperation(G->getBaseStmt(),
1796
252
                                    /*IsRelatedToDecl=*/false);
1797
252
    }
1798
1799
    // This return guarantees that most of the machine doesn't run when
1800
    // suggestions aren't requested.
1801
110
    assert(FixableGadgets.size() == 0 &&
1802
110
           "Fixable gadgets found but suggestions not requested!");
1803
110
    return;
1804
110
  }
1805
1806
187
  UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets));
1807
187
  FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets));
1808
1809
187
  std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
1810
187
  DefMapTy VariableGroupsMap{};
1811
1812
  // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
1813
187
  for (auto it = FixablesForAllVars.byVar.cbegin();
1814
442
       it != FixablesForAllVars.byVar.cend();) {
1815
    // FIXME: Support ParmVarDecl as well.
1816
255
    if (!it->first->isLocalVarDecl() || 
Tracker.hasUnclaimedUses(it->first)230
) {
1817
58
      it = FixablesForAllVars.byVar.erase(it);
1818
197
    } else {
1819
197
      ++it;
1820
197
    }
1821
255
  }
1822
1823
187
  llvm::SmallVector<const VarDecl *, 16> UnsafeVars;
1824
187
  for (const auto &[VD, ignore] : FixablesForAllVars.byVar)
1825
197
    UnsafeVars.push_back(VD);
1826
  
1827
  // Fixpoint iteration for pointer assignments
1828
187
  using DepMapTy = DenseMap<const VarDecl *, std::set<const VarDecl *>>;
1829
187
  DepMapTy DependenciesMap{};
1830
187
  DepMapTy PtrAssignmentGraph{};
1831
    
1832
197
  for (auto it : FixablesForAllVars.byVar) {
1833
387
    for (const FixableGadget *fixable : it.second) {
1834
387
      std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
1835
387
                                  fixable->getStrategyImplications();
1836
387
      if (ImplPair) {
1837
147
        std::pair<const VarDecl *, const VarDecl *> Impl = ImplPair.value();
1838
147
        PtrAssignmentGraph[Impl.first].insert(Impl.second);
1839
147
      }
1840
387
    }
1841
197
  }
1842
    
1843
  /*
1844
   The following code does a BFS traversal of the `PtrAssignmentGraph`
1845
   considering all unsafe vars as starting nodes and constructs an undirected
1846
   graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
1847
   elimiates all variables that are unreachable from any unsafe var. In other
1848
   words, this removes all dependencies that don't include any unsafe variable
1849
   and consequently don't need any fixit generation.
1850
   Note: A careful reader would observe that the code traverses
1851
   `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
1852
   `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
1853
   achieve the same result but the one used here dramatically cuts the
1854
   amount of hoops the second part of the algorithm needs to jump, given that
1855
   a lot of these connections become "direct". The reader is advised not to
1856
   imagine how the graph is transformed because of using `Var` instead of
1857
   `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
1858
   and think about why it's equivalent later.
1859
   */
1860
187
  std::set<const VarDecl *> VisitedVarsDirected{};
1861
222
  for (const auto &[Var, ignore] : UnsafeOps.byVar) {
1862
222
    if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) {
1863
  
1864
212
      std::queue<const VarDecl*> QueueDirected{};
1865
212
      QueueDirected.push(Var);
1866
471
      while(!QueueDirected.empty()) {
1867
259
        const VarDecl* CurrentVar = QueueDirected.front();
1868
259
        QueueDirected.pop();
1869
259
        VisitedVarsDirected.insert(CurrentVar);
1870
259
        auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
1871
259
        for (const VarDecl *Adj : AdjacentNodes) {
1872
61
          if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) {
1873
47
            QueueDirected.push(Adj);
1874
47
          }
1875
61
          DependenciesMap[Var].insert(Adj);
1876
61
          DependenciesMap[Adj].insert(Var);
1877
61
        }
1878
259
      }
1879
212
    }
1880
222
  }
1881
  
1882
  // Group Connected Components for Unsafe Vars
1883
  // (Dependencies based on pointer assignments)
1884
187
  std::set<const VarDecl *> VisitedVars{};
1885
222
  for (const auto &[Var, ignore] : UnsafeOps.byVar) {
1886
222
    if (VisitedVars.find(Var) == VisitedVars.end()) {
1887
204
      std::vector<const VarDecl *> VarGroup{};
1888
  
1889
204
      std::queue<const VarDecl*> Queue{};
1890
204
      Queue.push(Var);
1891
463
      while(!Queue.empty()) {
1892
259
        const VarDecl* CurrentVar = Queue.front();
1893
259
        Queue.pop();
1894
259
        VisitedVars.insert(CurrentVar);
1895
259
        VarGroup.push_back(CurrentVar);
1896
259
        auto AdjacentNodes = DependenciesMap[CurrentVar];
1897
259
        for (const VarDecl *Adj : AdjacentNodes) {
1898
116
          if (VisitedVars.find(Adj) == VisitedVars.end()) {
1899
55
            Queue.push(Adj);
1900
55
          }
1901
116
        }
1902
259
      }
1903
259
      for (const VarDecl * V : VarGroup) {
1904
259
        if (UnsafeOps.byVar.find(V) != UnsafeOps.byVar.end()) {
1905
222
          VariableGroupsMap[V] = VarGroup;
1906
222
        }
1907
259
      }
1908
204
    }
1909
222
  }
1910
1911
187
  Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars);
1912
187
  FixItsForVariableGroup = getFixIts(FixablesForAllVars, NaiveStrategy, Tracker,
1913
187
                                D->getASTContext(), Handler, VariableGroupsMap);
1914
1915
  // FIXME Detect overlapping FixIts.
1916
1917
187
  for (const auto &G : UnsafeOps.noVar) {
1918
80
    Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false);
1919
80
  }
1920
1921
222
  for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
1922
222
    auto FixItsIt = FixItsForVariableGroup.find(VD);
1923
222
    Handler.handleUnsafeVariableGroup(VD, VariableGroupsMap,
1924
222
                                      FixItsIt != FixItsForVariableGroup.end()
1925
222
                                      ? 
std::move(FixItsIt->second)113
1926
222
                                      : 
FixItList{}109
);
1927
307
    for (const auto &G : WarningGadgets) {
1928
307
      Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true);
1929
307
    }
1930
222
  }
1931
187
}