/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // An AST checker that looks for common pitfalls when using C string APIs. |
10 | | // - Identifies erroneous patterns in the last argument to strncat - the number |
11 | | // of bytes to copy. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
15 | | #include "clang/AST/Expr.h" |
16 | | #include "clang/AST/OperationKinds.h" |
17 | | #include "clang/AST/StmtVisitor.h" |
18 | | #include "clang/Analysis/AnalysisDeclContext.h" |
19 | | #include "clang/Basic/TargetInfo.h" |
20 | | #include "clang/Basic/TypeTraits.h" |
21 | | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
22 | | #include "clang/StaticAnalyzer/Core/Checker.h" |
23 | | #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" |
24 | | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
25 | | #include "llvm/ADT/SmallString.h" |
26 | | #include "llvm/Support/raw_ostream.h" |
27 | | |
28 | | using namespace clang; |
29 | | using namespace ento; |
30 | | |
31 | | namespace { |
32 | | class WalkAST: public StmtVisitor<WalkAST> { |
33 | | const CheckerBase *Checker; |
34 | | BugReporter &BR; |
35 | | AnalysisDeclContext* AC; |
36 | | |
37 | | /// Check if two expressions refer to the same declaration. |
38 | 48 | bool sameDecl(const Expr *A1, const Expr *A2) { |
39 | 48 | if (const auto *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts())) |
40 | 48 | if (const auto *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts())) |
41 | 48 | return D1->getDecl() == D2->getDecl(); |
42 | 0 | return false; |
43 | 48 | } |
44 | | |
45 | | /// Check if the expression E is a sizeof(WithArg). |
46 | 228 | bool isSizeof(const Expr *E, const Expr *WithArg) { |
47 | 228 | if (const auto *UE = dyn_cast<UnaryExprOrTypeTraitExpr>(E)) |
48 | 52 | if (UE->getKind() == UETT_SizeOf && !UE->isArgumentType()) |
49 | 44 | return sameDecl(UE->getArgumentExpr(), WithArg); |
50 | 184 | return false; |
51 | 228 | } |
52 | | |
53 | | /// Check if the expression E is a strlen(WithArg). |
54 | 8 | bool isStrlen(const Expr *E, const Expr *WithArg) { |
55 | 8 | if (const auto *CE = dyn_cast<CallExpr>(E)) { |
56 | 4 | const FunctionDecl *FD = CE->getDirectCallee(); |
57 | 4 | if (!FD) |
58 | 0 | return false; |
59 | 4 | return (CheckerContext::isCLibraryFunction(FD, "strlen") && |
60 | 4 | sameDecl(CE->getArg(0), WithArg)); |
61 | 4 | } |
62 | 4 | return false; |
63 | 8 | } |
64 | | |
65 | | /// Check if the expression is an integer literal with value 1. |
66 | 4 | bool isOne(const Expr *E) { |
67 | 4 | if (const auto *IL = dyn_cast<IntegerLiteral>(E)) |
68 | 4 | return (IL->getValue().isIntN(1)); |
69 | 0 | return false; |
70 | 4 | } |
71 | | |
72 | 36 | StringRef getPrintableName(const Expr *E) { |
73 | 36 | if (const auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) |
74 | 28 | return D->getDecl()->getName(); |
75 | 8 | return StringRef(); |
76 | 36 | } |
77 | | |
78 | | /// Identify erroneous patterns in the last argument to strncat - the number |
79 | | /// of bytes to copy. |
80 | | bool containsBadStrncatPattern(const CallExpr *CE); |
81 | | |
82 | | /// Identify erroneous patterns in the last argument to strlcpy - the number |
83 | | /// of bytes to copy. |
84 | | /// The bad pattern checked is when the size is known |
85 | | /// to be larger than the destination can handle. |
86 | | /// char dst[2]; |
87 | | /// size_t cpy = 4; |
88 | | /// strlcpy(dst, "abcd", sizeof("abcd") - 1); |
89 | | /// strlcpy(dst, "abcd", 4); |
90 | | /// strlcpy(dst + 3, "abcd", 2); |
91 | | /// strlcpy(dst, "abcd", cpy); |
92 | | /// Identify erroneous patterns in the last argument to strlcat - the number |
93 | | /// of bytes to copy. |
94 | | /// The bad pattern checked is when the last argument is basically |
95 | | /// pointing to the destination buffer size or argument larger or |
96 | | /// equal to. |
97 | | /// char dst[2]; |
98 | | /// strlcat(dst, src2, sizeof(dst)); |
99 | | /// strlcat(dst, src2, 2); |
100 | | /// strlcat(dst, src2, 10); |
101 | | bool containsBadStrlcpyStrlcatPattern(const CallExpr *CE); |
102 | | |
103 | | public: |
104 | | WalkAST(const CheckerBase *Checker, BugReporter &BR, AnalysisDeclContext *AC) |
105 | 1.95k | : Checker(Checker), BR(BR), AC(AC) {} |
106 | | |
107 | | // Statement visitor methods. |
108 | | void VisitChildren(Stmt *S); |
109 | 49.3k | void VisitStmt(Stmt *S) { |
110 | 49.3k | VisitChildren(S); |
111 | 49.3k | } |
112 | | void VisitCallExpr(CallExpr *CE); |
113 | | }; |
114 | | } // end anonymous namespace |
115 | | |
116 | | // The correct size argument should look like following: |
117 | | // strncat(dst, src, sizeof(dst) - strlen(dest) - 1); |
118 | | // We look for the following anti-patterns: |
119 | | // - strncat(dst, src, sizeof(dst) - strlen(dst)); |
120 | | // - strncat(dst, src, sizeof(dst) - 1); |
121 | | // - strncat(dst, src, sizeof(dst)); |
122 | 113 | bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) { |
123 | 113 | if (CE->getNumArgs() != 3) |
124 | 41 | return false; |
125 | 72 | const Expr *DstArg = CE->getArg(0); |
126 | 72 | const Expr *SrcArg = CE->getArg(1); |
127 | 72 | const Expr *LenArg = CE->getArg(2); |
128 | | |
129 | | // Identify wrong size expressions, which are commonly used instead. |
130 | 72 | if (const auto *BE = dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) { |
131 | | // - sizeof(dst) - strlen(dst) |
132 | 8 | if (BE->getOpcode() == BO_Sub) { |
133 | 8 | const Expr *L = BE->getLHS(); |
134 | 8 | const Expr *R = BE->getRHS(); |
135 | 8 | if (isSizeof(L, DstArg) && isStrlen(R, DstArg)) |
136 | 4 | return true; |
137 | | |
138 | | // - sizeof(dst) - 1 |
139 | 4 | if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts())) |
140 | 4 | return true; |
141 | 4 | } |
142 | 8 | } |
143 | | // - sizeof(dst) |
144 | 64 | if (isSizeof(LenArg, DstArg)) |
145 | 4 | return true; |
146 | | |
147 | | // - sizeof(src) |
148 | 60 | if (isSizeof(LenArg, SrcArg)) |
149 | 4 | return true; |
150 | 56 | return false; |
151 | 60 | } |
152 | | |
153 | 92 | bool WalkAST::containsBadStrlcpyStrlcatPattern(const CallExpr *CE) { |
154 | 92 | if (CE->getNumArgs() != 3) |
155 | 0 | return false; |
156 | 92 | const Expr *DstArg = CE->getArg(0); |
157 | 92 | const Expr *LenArg = CE->getArg(2); |
158 | | |
159 | 92 | const auto *DstArgDRE = dyn_cast<DeclRefExpr>(DstArg->IgnoreParenImpCasts()); |
160 | 92 | const auto *LenArgDRE = |
161 | 92 | dyn_cast<DeclRefExpr>(LenArg->IgnoreParenLValueCasts()); |
162 | 92 | uint64_t DstOff = 0; |
163 | 92 | if (isSizeof(LenArg, DstArg)) |
164 | 20 | return false; |
165 | | |
166 | | // - size_t dstlen = sizeof(dst) |
167 | 72 | if (LenArgDRE) { |
168 | 26 | const auto *LenArgVal = dyn_cast<VarDecl>(LenArgDRE->getDecl()); |
169 | | // If it's an EnumConstantDecl instead, then we're missing out on something. |
170 | 26 | if (!LenArgVal) { |
171 | 2 | assert(isa<EnumConstantDecl>(LenArgDRE->getDecl())); |
172 | 2 | return false; |
173 | 2 | } |
174 | 24 | if (LenArgVal->getInit()) |
175 | 16 | LenArg = LenArgVal->getInit(); |
176 | 24 | } |
177 | | |
178 | | // - integral value |
179 | | // We try to figure out if the last argument is possibly longer |
180 | | // than the destination can possibly handle if its size can be defined. |
181 | 70 | if (const auto *IL = dyn_cast<IntegerLiteral>(LenArg->IgnoreParenImpCasts())) { |
182 | 40 | uint64_t ILRawVal = IL->getValue().getZExtValue(); |
183 | | |
184 | | // Case when there is pointer arithmetic on the destination buffer |
185 | | // especially when we offset from the base decreasing the |
186 | | // buffer length accordingly. |
187 | 40 | if (!DstArgDRE) { |
188 | 12 | if (const auto *BE = |
189 | 12 | dyn_cast<BinaryOperator>(DstArg->IgnoreParenImpCasts())) { |
190 | 12 | DstArgDRE = dyn_cast<DeclRefExpr>(BE->getLHS()->IgnoreParenImpCasts()); |
191 | 12 | if (BE->getOpcode() == BO_Add) { |
192 | 12 | if ((IL = dyn_cast<IntegerLiteral>(BE->getRHS()->IgnoreParenImpCasts()))) { |
193 | 12 | DstOff = IL->getValue().getZExtValue(); |
194 | 12 | } |
195 | 12 | } |
196 | 12 | } |
197 | 12 | } |
198 | 40 | if (DstArgDRE) { |
199 | 40 | if (const auto *Buffer = |
200 | 40 | dyn_cast<ConstantArrayType>(DstArgDRE->getType())) { |
201 | 40 | ASTContext &C = BR.getContext(); |
202 | 40 | uint64_t BufferLen = C.getTypeSize(Buffer) / 8; |
203 | 40 | auto RemainingBufferLen = BufferLen - DstOff; |
204 | 40 | if (RemainingBufferLen < ILRawVal) |
205 | 20 | return true; |
206 | 40 | } |
207 | 40 | } |
208 | 40 | } |
209 | | |
210 | 50 | return false; |
211 | 70 | } |
212 | | |
213 | 5.30k | void WalkAST::VisitCallExpr(CallExpr *CE) { |
214 | 5.30k | const FunctionDecl *FD = CE->getDirectCallee(); |
215 | 5.30k | if (!FD) |
216 | 14 | return; |
217 | | |
218 | 5.29k | if (CheckerContext::isCLibraryFunction(FD, "strncat")) { |
219 | 113 | if (containsBadStrncatPattern(CE)) { |
220 | 16 | const Expr *DstArg = CE->getArg(0); |
221 | 16 | const Expr *LenArg = CE->getArg(2); |
222 | 16 | PathDiagnosticLocation Loc = |
223 | 16 | PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC); |
224 | | |
225 | 16 | StringRef DstName = getPrintableName(DstArg); |
226 | | |
227 | 16 | SmallString<256> S; |
228 | 16 | llvm::raw_svector_ostream os(S); |
229 | 16 | os << "Potential buffer overflow. "; |
230 | 16 | if (!DstName.empty()) { |
231 | 16 | os << "Replace with 'sizeof(" << DstName << ") " |
232 | 16 | "- strlen(" << DstName <<") - 1'"; |
233 | 16 | os << " or u"; |
234 | 16 | } else |
235 | 0 | os << "U"; |
236 | 16 | os << "se a safer 'strlcat' API"; |
237 | | |
238 | 16 | BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument", |
239 | 16 | "C String API", os.str(), Loc, |
240 | 16 | LenArg->getSourceRange()); |
241 | 16 | } |
242 | 5.18k | } else if (CheckerContext::isCLibraryFunction(FD, "strlcpy") || |
243 | 5.18k | CheckerContext::isCLibraryFunction(FD, "strlcat")5.12k ) { |
244 | 92 | if (containsBadStrlcpyStrlcatPattern(CE)) { |
245 | 20 | const Expr *DstArg = CE->getArg(0); |
246 | 20 | const Expr *LenArg = CE->getArg(2); |
247 | 20 | PathDiagnosticLocation Loc = |
248 | 20 | PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC); |
249 | | |
250 | 20 | StringRef DstName = getPrintableName(DstArg); |
251 | | |
252 | 20 | SmallString<256> S; |
253 | 20 | llvm::raw_svector_ostream os(S); |
254 | 20 | os << "The third argument allows to potentially copy more bytes than it should. "; |
255 | 20 | os << "Replace with the value "; |
256 | 20 | if (!DstName.empty()) |
257 | 12 | os << "sizeof(" << DstName << ")"; |
258 | 8 | else |
259 | 8 | os << "sizeof(<destination buffer>)"; |
260 | 20 | os << " or lower"; |
261 | | |
262 | 20 | BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument", |
263 | 20 | "C String API", os.str(), Loc, |
264 | 20 | LenArg->getSourceRange()); |
265 | 20 | } |
266 | 92 | } |
267 | | |
268 | | // Recurse and check children. |
269 | 5.29k | VisitChildren(CE); |
270 | 5.29k | } |
271 | | |
272 | 54.6k | void WalkAST::VisitChildren(Stmt *S) { |
273 | 54.6k | for (Stmt *Child : S->children()) |
274 | 52.7k | if (Child) |
275 | 52.7k | Visit(Child); |
276 | 54.6k | } |
277 | | |
278 | | namespace { |
279 | | class CStringSyntaxChecker: public Checker<check::ASTCodeBody> { |
280 | | public: |
281 | | |
282 | | void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, |
283 | 1.95k | BugReporter &BR) const { |
284 | 1.95k | WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D)); |
285 | 1.95k | walker.Visit(D->getBody()); |
286 | 1.95k | } |
287 | | }; |
288 | | } |
289 | | |
290 | 97 | void ento::registerCStringSyntaxChecker(CheckerManager &mgr) { |
291 | 97 | mgr.registerChecker<CStringSyntaxChecker>(); |
292 | 97 | } |
293 | | |
294 | 198 | bool ento::shouldRegisterCStringSyntaxChecker(const CheckerManager &mgr) { |
295 | 198 | return true; |
296 | 198 | } |