/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/JSONCompilationDatabase.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- JSONCompilationDatabase.cpp ----------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the implementation of the JSONCompilationDatabase. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/Tooling/JSONCompilationDatabase.h" |
14 | | #include "clang/Basic/LLVM.h" |
15 | | #include "clang/Tooling/CompilationDatabase.h" |
16 | | #include "clang/Tooling/CompilationDatabasePluginRegistry.h" |
17 | | #include "clang/Tooling/Tooling.h" |
18 | | #include "llvm/ADT/Optional.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | #include "llvm/ADT/SmallString.h" |
21 | | #include "llvm/ADT/SmallVector.h" |
22 | | #include "llvm/ADT/StringRef.h" |
23 | | #include "llvm/ADT/Triple.h" |
24 | | #include "llvm/Support/Allocator.h" |
25 | | #include "llvm/Support/Casting.h" |
26 | | #include "llvm/Support/CommandLine.h" |
27 | | #include "llvm/Support/ErrorOr.h" |
28 | | #include "llvm/Support/Host.h" |
29 | | #include "llvm/Support/MemoryBuffer.h" |
30 | | #include "llvm/Support/Path.h" |
31 | | #include "llvm/Support/StringSaver.h" |
32 | | #include "llvm/Support/VirtualFileSystem.h" |
33 | | #include "llvm/Support/YAMLParser.h" |
34 | | #include "llvm/Support/raw_ostream.h" |
35 | | #include <cassert> |
36 | | #include <memory> |
37 | | #include <string> |
38 | | #include <system_error> |
39 | | #include <tuple> |
40 | | #include <utility> |
41 | | #include <vector> |
42 | | |
43 | | using namespace clang; |
44 | | using namespace tooling; |
45 | | |
46 | | namespace { |
47 | | |
48 | | /// A parser for escaped strings of command line arguments. |
49 | | /// |
50 | | /// Assumes \-escaping for quoted arguments (see the documentation of |
51 | | /// unescapeCommandLine(...)). |
52 | | class CommandLineArgumentParser { |
53 | | public: |
54 | | CommandLineArgumentParser(StringRef CommandLine) |
55 | 243 | : Input(CommandLine), Position(Input.begin()-1) {} |
56 | | |
57 | 243 | std::vector<std::string> parse() { |
58 | 243 | bool HasMoreInput = true; |
59 | 2.08k | while (HasMoreInput && nextNonWhitespace()1.84k ) { |
60 | 1.84k | std::string Argument; |
61 | 1.84k | HasMoreInput = parseStringInto(Argument); |
62 | 1.84k | CommandLine.push_back(Argument); |
63 | 1.84k | } |
64 | 243 | return CommandLine; |
65 | 243 | } |
66 | | |
67 | | private: |
68 | | // All private methods return true if there is more input available. |
69 | | |
70 | 1.84k | bool parseStringInto(std::string &String) { |
71 | 1.84k | do { |
72 | 1.84k | if (*Position == '"') { |
73 | 18 | if (!parseDoubleQuotedStringInto(String)) return false8 ; |
74 | 1.83k | } else if (*Position == '\'') { |
75 | 1 | if (!parseSingleQuotedStringInto(String)) return false; |
76 | 1.83k | } else { |
77 | 1.83k | if (!parseFreeStringInto(String)) return false231 ; |
78 | 1.83k | } |
79 | 1.84k | } while (*Position != ' '1.60k ); |
80 | 1.60k | return true; |
81 | 1.84k | } |
82 | | |
83 | 18 | bool parseDoubleQuotedStringInto(std::string &String) { |
84 | 18 | if (!next()) return false1 ; |
85 | 88 | while (17 *Position != '"') { |
86 | 72 | if (!skipEscapeCharacter()) return false0 ; |
87 | 72 | String.push_back(*Position); |
88 | 72 | if (!next()) return false1 ; |
89 | 72 | } |
90 | 16 | return next(); |
91 | 17 | } |
92 | | |
93 | 1 | bool parseSingleQuotedStringInto(std::string &String) { |
94 | 1 | if (!next()) return false0 ; |
95 | 7 | while (1 *Position != '\'') { |
96 | 6 | String.push_back(*Position); |
97 | 6 | if (!next()) return false0 ; |
98 | 6 | } |
99 | 1 | return next(); |
100 | 1 | } |
101 | | |
102 | 1.83k | bool parseFreeStringInto(std::string &String) { |
103 | 66.4k | do { |
104 | 66.4k | if (!skipEscapeCharacter()) return false0 ; |
105 | 66.4k | String.push_back(*Position); |
106 | 66.4k | if (!next()) return false231 ; |
107 | 66.4k | } while (*Position != ' '66.1k && *Position != '"'64.5k && *Position != '\''64.5k ); |
108 | 1.59k | return true; |
109 | 1.83k | } |
110 | | |
111 | 66.4k | bool skipEscapeCharacter() { |
112 | 66.4k | if (*Position == '\\') { |
113 | 7 | return next(); |
114 | 7 | } |
115 | 66.4k | return true; |
116 | 66.4k | } |
117 | | |
118 | 1.84k | bool nextNonWhitespace() { |
119 | 1.87k | do { |
120 | 1.87k | if (!next()) return false3 ; |
121 | 1.87k | } while (*Position == ' '1.87k ); |
122 | 1.84k | return true; |
123 | 1.84k | } |
124 | | |
125 | 68.4k | bool next() { |
126 | 68.4k | ++Position; |
127 | 68.4k | return Position != Input.end(); |
128 | 68.4k | } |
129 | | |
130 | | const StringRef Input; |
131 | | StringRef::iterator Position; |
132 | | std::vector<std::string> CommandLine; |
133 | | }; |
134 | | |
135 | | std::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax, |
136 | 243 | StringRef EscapedCommandLine) { |
137 | 243 | if (Syntax == JSONCommandLineSyntax::AutoDetect) { |
138 | | #ifdef _WIN32 |
139 | | // Assume Windows command line parsing on Win32 |
140 | | Syntax = JSONCommandLineSyntax::Windows; |
141 | | #else |
142 | 210 | Syntax = JSONCommandLineSyntax::Gnu; |
143 | 210 | #endif |
144 | 210 | } |
145 | | |
146 | 243 | if (Syntax == JSONCommandLineSyntax::Windows) { |
147 | 0 | llvm::BumpPtrAllocator Alloc; |
148 | 0 | llvm::StringSaver Saver(Alloc); |
149 | 0 | llvm::SmallVector<const char *, 64> T; |
150 | 0 | llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T); |
151 | 0 | std::vector<std::string> Result(T.begin(), T.end()); |
152 | 0 | return Result; |
153 | 0 | } |
154 | 243 | assert(Syntax == JSONCommandLineSyntax::Gnu); |
155 | 0 | CommandLineArgumentParser parser(EscapedCommandLine); |
156 | 243 | return parser.parse(); |
157 | 243 | } |
158 | | |
159 | | // This plugin locates a nearby compile_command.json file, and also infers |
160 | | // compile commands for files not present in the database. |
161 | | class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin { |
162 | | std::unique_ptr<CompilationDatabase> |
163 | 61 | loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override { |
164 | 61 | SmallString<1024> JSONDatabasePath(Directory); |
165 | 61 | llvm::sys::path::append(JSONDatabasePath, "compile_commands.json"); |
166 | 61 | auto Base = JSONCompilationDatabase::loadFromFile( |
167 | 61 | JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect); |
168 | 61 | return Base ? inferTargetAndDriverMode( |
169 | 20 | inferMissingCompileCommands(expandResponseFiles( |
170 | 20 | std::move(Base), llvm::vfs::getRealFileSystem()))) |
171 | 61 | : nullptr41 ; |
172 | 61 | } |
173 | | }; |
174 | | |
175 | | } // namespace |
176 | | |
177 | | // Register the JSONCompilationDatabasePlugin with the |
178 | | // CompilationDatabasePluginRegistry using this statically initialized variable. |
179 | | static CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin> |
180 | | X("json-compilation-database", "Reads JSON formatted compilation databases"); |
181 | | |
182 | | namespace clang { |
183 | | namespace tooling { |
184 | | |
185 | | // This anchor is used to force the linker to link in the generated object file |
186 | | // and thus register the JSONCompilationDatabasePlugin. |
187 | | volatile int JSONAnchorSource = 0; |
188 | | |
189 | | } // namespace tooling |
190 | | } // namespace clang |
191 | | |
192 | | std::unique_ptr<JSONCompilationDatabase> |
193 | | JSONCompilationDatabase::loadFromFile(StringRef FilePath, |
194 | | std::string &ErrorMessage, |
195 | 149 | JSONCommandLineSyntax Syntax) { |
196 | | // Don't mmap: if we're a long-lived process, the build system may overwrite. |
197 | 149 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer = |
198 | 149 | llvm::MemoryBuffer::getFile(FilePath, /*IsText=*/false, |
199 | 149 | /*RequiresNullTerminator=*/true, |
200 | 149 | /*IsVolatile=*/true); |
201 | 149 | if (std::error_code Result = DatabaseBuffer.getError()) { |
202 | 41 | ErrorMessage = "Error while opening JSON database: " + Result.message(); |
203 | 41 | return nullptr; |
204 | 41 | } |
205 | 108 | std::unique_ptr<JSONCompilationDatabase> Database( |
206 | 108 | new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax)); |
207 | 108 | if (!Database->parse(ErrorMessage)) |
208 | 0 | return nullptr; |
209 | 108 | return Database; |
210 | 108 | } |
211 | | |
212 | | std::unique_ptr<JSONCompilationDatabase> |
213 | | JSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString, |
214 | | std::string &ErrorMessage, |
215 | 52 | JSONCommandLineSyntax Syntax) { |
216 | 52 | std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer( |
217 | 52 | llvm::MemoryBuffer::getMemBufferCopy(DatabaseString)); |
218 | 52 | std::unique_ptr<JSONCompilationDatabase> Database( |
219 | 52 | new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax)); |
220 | 52 | if (!Database->parse(ErrorMessage)) |
221 | 17 | return nullptr; |
222 | 35 | return Database; |
223 | 52 | } |
224 | | |
225 | | std::vector<CompileCommand> |
226 | 47 | JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { |
227 | 47 | SmallString<128> NativeFilePath; |
228 | 47 | llvm::sys::path::native(FilePath, NativeFilePath); |
229 | | |
230 | 47 | std::string Error; |
231 | 47 | llvm::raw_string_ostream ES(Error); |
232 | 47 | StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES); |
233 | 47 | if (Match.empty()) |
234 | 2 | return {}; |
235 | 45 | const auto CommandsRefI = IndexByFile.find(Match); |
236 | 45 | if (CommandsRefI == IndexByFile.end()) |
237 | 0 | return {}; |
238 | 45 | std::vector<CompileCommand> Commands; |
239 | 45 | getCommands(CommandsRefI->getValue(), Commands); |
240 | 45 | return Commands; |
241 | 45 | } |
242 | | |
243 | | std::vector<std::string> |
244 | 22 | JSONCompilationDatabase::getAllFiles() const { |
245 | 22 | std::vector<std::string> Result; |
246 | 22 | for (const auto &CommandRef : IndexByFile) |
247 | 14.9k | Result.push_back(CommandRef.first().str()); |
248 | 22 | return Result; |
249 | 22 | } |
250 | | |
251 | | std::vector<CompileCommand> |
252 | 92 | JSONCompilationDatabase::getAllCompileCommands() const { |
253 | 92 | std::vector<CompileCommand> Commands; |
254 | 92 | getCommands(AllCommands, Commands); |
255 | 92 | return Commands; |
256 | 92 | } |
257 | | |
258 | 243 | static llvm::StringRef stripExecutableExtension(llvm::StringRef Name) { |
259 | 243 | Name.consume_back(".exe"); |
260 | 243 | return Name; |
261 | 243 | } |
262 | | |
263 | | // There are compiler-wrappers (ccache, distcc, gomacc) that take the "real" |
264 | | // compiler as an argument, e.g. distcc gcc -O3 foo.c. |
265 | | // These end up in compile_commands.json when people set CC="distcc gcc". |
266 | | // Clang's driver doesn't understand this, so we need to unwrap. |
267 | 251 | static bool unwrapCommand(std::vector<std::string> &Args) { |
268 | 251 | if (Args.size() < 2) |
269 | 17 | return false; |
270 | 234 | StringRef Wrapper = |
271 | 234 | stripExecutableExtension(llvm::sys::path::filename(Args.front())); |
272 | 234 | if (Wrapper == "distcc" || Wrapper == "gomacc"230 || Wrapper == "ccache"229 || |
273 | 234 | Wrapper == "sccache"225 ) { |
274 | | // Most of these wrappers support being invoked 3 ways: |
275 | | // `distcc g++ file.c` This is the mode we're trying to match. |
276 | | // We need to drop `distcc`. |
277 | | // `distcc file.c` This acts like compiler is cc or similar. |
278 | | // Clang's driver can handle this, no change needed. |
279 | | // `g++ file.c` g++ is a symlink to distcc. |
280 | | // We don't even notice this case, and all is well. |
281 | | // |
282 | | // We need to distinguish between the first and second case. |
283 | | // The wrappers themselves don't take flags, so Args[1] is a compiler flag, |
284 | | // an input file, or a compiler. Inputs have extensions, compilers don't. |
285 | 10 | bool HasCompiler = |
286 | 10 | (Args[1][0] != '-') && |
287 | 10 | !llvm::sys::path::has_extension(stripExecutableExtension(Args[1]))9 ; |
288 | 10 | if (HasCompiler) { |
289 | 8 | Args.erase(Args.begin()); |
290 | 8 | return true; |
291 | 8 | } |
292 | | // If !HasCompiler, wrappers act like GCC. Fine: so do we. |
293 | 10 | } |
294 | 226 | return false; |
295 | 234 | } |
296 | | |
297 | | static std::vector<std::string> |
298 | | nodeToCommandLine(JSONCommandLineSyntax Syntax, |
299 | 243 | const std::vector<llvm::yaml::ScalarNode *> &Nodes) { |
300 | 243 | SmallString<1024> Storage; |
301 | 243 | std::vector<std::string> Arguments; |
302 | 243 | if (Nodes.size() == 1) |
303 | 243 | Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage)); |
304 | 0 | else |
305 | 0 | for (const auto *Node : Nodes) |
306 | 0 | Arguments.push_back(std::string(Node->getValue(Storage))); |
307 | | // There may be multiple wrappers: using distcc and ccache together is common. |
308 | 251 | while (unwrapCommand(Arguments)) |
309 | 8 | ; |
310 | 243 | return Arguments; |
311 | 243 | } |
312 | | |
313 | | void JSONCompilationDatabase::getCommands( |
314 | | ArrayRef<CompileCommandRef> CommandsRef, |
315 | 137 | std::vector<CompileCommand> &Commands) const { |
316 | 243 | for (const auto &CommandRef : CommandsRef) { |
317 | 243 | SmallString<8> DirectoryStorage; |
318 | 243 | SmallString<32> FilenameStorage; |
319 | 243 | SmallString<32> OutputStorage; |
320 | 243 | auto Output = std::get<3>(CommandRef); |
321 | 243 | Commands.emplace_back( |
322 | 243 | std::get<0>(CommandRef)->getValue(DirectoryStorage), |
323 | 243 | std::get<1>(CommandRef)->getValue(FilenameStorage), |
324 | 243 | nodeToCommandLine(Syntax, std::get<2>(CommandRef)), |
325 | 243 | Output ? Output->getValue(OutputStorage)1 : ""242 ); |
326 | 243 | } |
327 | 137 | } |
328 | | |
329 | 160 | bool JSONCompilationDatabase::parse(std::string &ErrorMessage) { |
330 | 160 | llvm::yaml::document_iterator I = YAMLStream.begin(); |
331 | 160 | if (I == YAMLStream.end()) { |
332 | 0 | ErrorMessage = "Error while parsing YAML."; |
333 | 0 | return false; |
334 | 0 | } |
335 | 160 | llvm::yaml::Node *Root = I->getRoot(); |
336 | 160 | if (!Root) { |
337 | 0 | ErrorMessage = "Error while parsing YAML."; |
338 | 0 | return false; |
339 | 0 | } |
340 | 160 | auto *Array = dyn_cast<llvm::yaml::SequenceNode>(Root); |
341 | 160 | if (!Array) { |
342 | 3 | ErrorMessage = "Expected array."; |
343 | 3 | return false; |
344 | 3 | } |
345 | 15.3k | for (auto &NextObject : *Array)157 { |
346 | 15.3k | auto *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject); |
347 | 15.3k | if (!Object) { |
348 | 1 | ErrorMessage = "Expected object."; |
349 | 1 | return false; |
350 | 1 | } |
351 | 15.3k | llvm::yaml::ScalarNode *Directory = nullptr; |
352 | 15.3k | llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command; |
353 | 15.3k | llvm::yaml::ScalarNode *File = nullptr; |
354 | 15.3k | llvm::yaml::ScalarNode *Output = nullptr; |
355 | 46.1k | for (auto& NextKeyValue : *Object) { |
356 | 46.1k | auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey()); |
357 | 46.1k | if (!KeyString) { |
358 | 1 | ErrorMessage = "Expected strings as key."; |
359 | 1 | return false; |
360 | 1 | } |
361 | 46.1k | SmallString<10> KeyStorage; |
362 | 46.1k | StringRef KeyValue = KeyString->getValue(KeyStorage); |
363 | 46.1k | llvm::yaml::Node *Value = NextKeyValue.getValue(); |
364 | 46.1k | if (!Value) { |
365 | 0 | ErrorMessage = "Expected value."; |
366 | 0 | return false; |
367 | 0 | } |
368 | 46.1k | auto *ValueString = dyn_cast<llvm::yaml::ScalarNode>(Value); |
369 | 46.1k | auto *SequenceString = dyn_cast<llvm::yaml::SequenceNode>(Value); |
370 | 46.1k | if (KeyValue == "arguments") { |
371 | 5 | if (!SequenceString) { |
372 | 2 | ErrorMessage = "Expected sequence as value."; |
373 | 2 | return false; |
374 | 2 | } |
375 | 3 | Command = std::vector<llvm::yaml::ScalarNode *>(); |
376 | 3 | for (auto &Argument : *SequenceString) { |
377 | 2 | auto *Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument); |
378 | 2 | if (!Scalar) { |
379 | 1 | ErrorMessage = "Only strings are allowed in 'arguments'."; |
380 | 1 | return false; |
381 | 1 | } |
382 | 1 | Command->push_back(Scalar); |
383 | 1 | } |
384 | 46.1k | } else { |
385 | 46.1k | if (!ValueString) { |
386 | 3 | ErrorMessage = "Expected string as value."; |
387 | 3 | return false; |
388 | 3 | } |
389 | 46.1k | if (KeyValue == "directory") { |
390 | 15.3k | Directory = ValueString; |
391 | 30.7k | } else if (KeyValue == "command") { |
392 | 15.3k | if (!Command) |
393 | 15.3k | Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString); |
394 | 15.3k | } else if (KeyValue == "file") { |
395 | 15.3k | File = ValueString; |
396 | 15.3k | } else if (2 KeyValue == "output"2 ) { |
397 | 1 | Output = ValueString; |
398 | 1 | } else { |
399 | 1 | ErrorMessage = |
400 | 1 | ("Unknown key: \"" + KeyString->getRawValue() + "\"").str(); |
401 | 1 | return false; |
402 | 1 | } |
403 | 46.1k | } |
404 | 46.1k | } |
405 | 15.3k | if (!File) { |
406 | 3 | ErrorMessage = "Missing key: \"file\"."; |
407 | 3 | return false; |
408 | 3 | } |
409 | 15.3k | if (!Command) { |
410 | 1 | ErrorMessage = "Missing key: \"command\" or \"arguments\"."; |
411 | 1 | return false; |
412 | 1 | } |
413 | 15.3k | if (!Directory) { |
414 | 1 | ErrorMessage = "Missing key: \"directory\"."; |
415 | 1 | return false; |
416 | 1 | } |
417 | 15.3k | SmallString<8> FileStorage; |
418 | 15.3k | StringRef FileName = File->getValue(FileStorage); |
419 | 15.3k | SmallString<128> NativeFilePath; |
420 | 15.3k | if (llvm::sys::path::is_relative(FileName)) { |
421 | 43 | SmallString<8> DirectoryStorage; |
422 | 43 | SmallString<128> AbsolutePath( |
423 | 43 | Directory->getValue(DirectoryStorage)); |
424 | 43 | llvm::sys::path::append(AbsolutePath, FileName); |
425 | 43 | llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/ true); |
426 | 43 | llvm::sys::path::native(AbsolutePath, NativeFilePath); |
427 | 15.3k | } else { |
428 | 15.3k | llvm::sys::path::native(FileName, NativeFilePath); |
429 | 15.3k | } |
430 | 15.3k | auto Cmd = CompileCommandRef(Directory, File, *Command, Output); |
431 | 15.3k | IndexByFile[NativeFilePath].push_back(Cmd); |
432 | 15.3k | AllCommands.push_back(Cmd); |
433 | 15.3k | MatchTrie.insert(NativeFilePath); |
434 | 15.3k | } |
435 | 143 | return true; |
436 | 157 | } |