/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/JSONCompilationDatabase.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- JSONCompilationDatabase.cpp ----------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the implementation of the JSONCompilationDatabase. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/Tooling/JSONCompilationDatabase.h" |
14 | | #include "clang/Basic/LLVM.h" |
15 | | #include "clang/Tooling/CompilationDatabase.h" |
16 | | #include "clang/Tooling/CompilationDatabasePluginRegistry.h" |
17 | | #include "clang/Tooling/Tooling.h" |
18 | | #include "llvm/ADT/Optional.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | #include "llvm/ADT/SmallString.h" |
21 | | #include "llvm/ADT/SmallVector.h" |
22 | | #include "llvm/ADT/StringRef.h" |
23 | | #include "llvm/ADT/Triple.h" |
24 | | #include "llvm/Support/Allocator.h" |
25 | | #include "llvm/Support/Casting.h" |
26 | | #include "llvm/Support/CommandLine.h" |
27 | | #include "llvm/Support/ErrorOr.h" |
28 | | #include "llvm/Support/Host.h" |
29 | | #include "llvm/Support/MemoryBuffer.h" |
30 | | #include "llvm/Support/Path.h" |
31 | | #include "llvm/Support/StringSaver.h" |
32 | | #include "llvm/Support/VirtualFileSystem.h" |
33 | | #include "llvm/Support/YAMLParser.h" |
34 | | #include "llvm/Support/raw_ostream.h" |
35 | | #include <cassert> |
36 | | #include <memory> |
37 | | #include <string> |
38 | | #include <system_error> |
39 | | #include <tuple> |
40 | | #include <utility> |
41 | | #include <vector> |
42 | | |
43 | | using namespace clang; |
44 | | using namespace tooling; |
45 | | |
46 | | namespace { |
47 | | |
48 | | /// A parser for escaped strings of command line arguments. |
49 | | /// |
50 | | /// Assumes \-escaping for quoted arguments (see the documentation of |
51 | | /// unescapeCommandLine(...)). |
52 | | class CommandLineArgumentParser { |
53 | | public: |
54 | | CommandLineArgumentParser(StringRef CommandLine) |
55 | 126 | : Input(CommandLine), Position(Input.begin()-1) {} |
56 | | |
57 | 126 | std::vector<std::string> parse() { |
58 | 126 | bool HasMoreInput = true; |
59 | 823 | while (HasMoreInput && nextNonWhitespace()700 ) { |
60 | 697 | std::string Argument; |
61 | 697 | HasMoreInput = parseStringInto(Argument); |
62 | 697 | CommandLine.push_back(Argument); |
63 | 697 | } |
64 | 126 | return CommandLine; |
65 | 126 | } |
66 | | |
67 | | private: |
68 | | // All private methods return true if there is more input available. |
69 | | |
70 | 697 | bool parseStringInto(std::string &String) { |
71 | 704 | do { |
72 | 704 | if (*Position == '"') { |
73 | 18 | if (!parseDoubleQuotedStringInto(String)) return false8 ; |
74 | 686 | } else if (*Position == '\'') { |
75 | 1 | if (!parseSingleQuotedStringInto(String)) return false; |
76 | 685 | } else { |
77 | 685 | if (!parseFreeStringInto(String)) return false114 ; |
78 | 581 | } |
79 | 581 | } while (*Position != ' '); |
80 | 574 | return true; |
81 | 697 | } |
82 | | |
83 | 18 | bool parseDoubleQuotedStringInto(std::string &String) { |
84 | 18 | if (!next()) return false1 ; |
85 | 88 | while (17 *Position != '"') { |
86 | 72 | if (!skipEscapeCharacter()) return false0 ; |
87 | 72 | String.push_back(*Position); |
88 | 72 | if (!next()) return false1 ; |
89 | 72 | } |
90 | 16 | return next(); |
91 | 17 | } |
92 | | |
93 | 1 | bool parseSingleQuotedStringInto(std::string &String) { |
94 | 1 | if (!next()) return false0 ; |
95 | 7 | while (1 *Position != '\'') { |
96 | 6 | String.push_back(*Position); |
97 | 6 | if (!next()) return false0 ; |
98 | 6 | } |
99 | 1 | return next(); |
100 | 1 | } |
101 | | |
102 | 685 | bool parseFreeStringInto(std::string &String) { |
103 | 21.6k | do { |
104 | 21.6k | if (!skipEscapeCharacter()) return false0 ; |
105 | 21.6k | String.push_back(*Position); |
106 | 21.6k | if (!next()) return false114 ; |
107 | 21.5k | } while (*Position != ' ' && *Position != '"'20.9k && *Position != '\''20.9k ); |
108 | 571 | return true; |
109 | 685 | } |
110 | | |
111 | 21.6k | bool skipEscapeCharacter() { |
112 | 21.6k | if (*Position == '\\') { |
113 | 7 | return next(); |
114 | 7 | } |
115 | 21.6k | return true; |
116 | 21.6k | } |
117 | | |
118 | 700 | bool nextNonWhitespace() { |
119 | 720 | do { |
120 | 720 | if (!next()) return false3 ; |
121 | 717 | } while (*Position == ' '); |
122 | 697 | return true; |
123 | 700 | } |
124 | | |
125 | 22.4k | bool next() { |
126 | 22.4k | ++Position; |
127 | 22.4k | return Position != Input.end(); |
128 | 22.4k | } |
129 | | |
130 | | const StringRef Input; |
131 | | StringRef::iterator Position; |
132 | | std::vector<std::string> CommandLine; |
133 | | }; |
134 | | |
135 | | std::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax, |
136 | 126 | StringRef EscapedCommandLine) { |
137 | 126 | if (Syntax == JSONCommandLineSyntax::AutoDetect) { |
138 | 93 | Syntax = JSONCommandLineSyntax::Gnu; |
139 | 93 | llvm::Triple Triple(llvm::sys::getProcessTriple()); |
140 | 93 | if (Triple.getOS() == llvm::Triple::OSType::Win32) { |
141 | | // Assume Windows command line parsing on Win32 unless the triple |
142 | | // explicitly tells us otherwise. |
143 | 0 | if (!Triple.hasEnvironment() || |
144 | 0 | Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC) |
145 | 0 | Syntax = JSONCommandLineSyntax::Windows; |
146 | 0 | } |
147 | 93 | } |
148 | | |
149 | 126 | if (Syntax == JSONCommandLineSyntax::Windows) { |
150 | 0 | llvm::BumpPtrAllocator Alloc; |
151 | 0 | llvm::StringSaver Saver(Alloc); |
152 | 0 | llvm::SmallVector<const char *, 64> T; |
153 | 0 | llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T); |
154 | 0 | std::vector<std::string> Result(T.begin(), T.end()); |
155 | 0 | return Result; |
156 | 0 | } |
157 | 126 | assert(Syntax == JSONCommandLineSyntax::Gnu); |
158 | 126 | CommandLineArgumentParser parser(EscapedCommandLine); |
159 | 126 | return parser.parse(); |
160 | 126 | } |
161 | | |
162 | | // This plugin locates a nearby compile_command.json file, and also infers |
163 | | // compile commands for files not present in the database. |
164 | | class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin { |
165 | | std::unique_ptr<CompilationDatabase> |
166 | 60 | loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override { |
167 | 60 | SmallString<1024> JSONDatabasePath(Directory); |
168 | 60 | llvm::sys::path::append(JSONDatabasePath, "compile_commands.json"); |
169 | 60 | auto Base = JSONCompilationDatabase::loadFromFile( |
170 | 60 | JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect); |
171 | 19 | return Base ? inferTargetAndDriverMode( |
172 | 19 | inferMissingCompileCommands(expandResponseFiles( |
173 | 19 | std::move(Base), llvm::vfs::getRealFileSystem()))) |
174 | 41 | : nullptr; |
175 | 60 | } |
176 | | }; |
177 | | |
178 | | } // namespace |
179 | | |
180 | | // Register the JSONCompilationDatabasePlugin with the |
181 | | // CompilationDatabasePluginRegistry using this statically initialized variable. |
182 | | static CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin> |
183 | | X("json-compilation-database", "Reads JSON formatted compilation databases"); |
184 | | |
185 | | namespace clang { |
186 | | namespace tooling { |
187 | | |
188 | | // This anchor is used to force the linker to link in the generated object file |
189 | | // and thus register the JSONCompilationDatabasePlugin. |
190 | | volatile int JSONAnchorSource = 0; |
191 | | |
192 | | } // namespace tooling |
193 | | } // namespace clang |
194 | | |
195 | | std::unique_ptr<JSONCompilationDatabase> |
196 | | JSONCompilationDatabase::loadFromFile(StringRef FilePath, |
197 | | std::string &ErrorMessage, |
198 | 91 | JSONCommandLineSyntax Syntax) { |
199 | | // Don't mmap: if we're a long-lived process, the build system may overwrite. |
200 | 91 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer = |
201 | 91 | llvm::MemoryBuffer::getFile(FilePath, /*FileSize=*/-1, |
202 | 91 | /*RequiresNullTerminator=*/true, |
203 | 91 | /*IsVolatile=*/true); |
204 | 91 | if (std::error_code Result = DatabaseBuffer.getError()) { |
205 | 41 | ErrorMessage = "Error while opening JSON database: " + Result.message(); |
206 | 41 | return nullptr; |
207 | 41 | } |
208 | 50 | std::unique_ptr<JSONCompilationDatabase> Database( |
209 | 50 | new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax)); |
210 | 50 | if (!Database->parse(ErrorMessage)) |
211 | 0 | return nullptr; |
212 | 50 | return Database; |
213 | 50 | } |
214 | | |
215 | | std::unique_ptr<JSONCompilationDatabase> |
216 | | JSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString, |
217 | | std::string &ErrorMessage, |
218 | 52 | JSONCommandLineSyntax Syntax) { |
219 | 52 | std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer( |
220 | 52 | llvm::MemoryBuffer::getMemBufferCopy(DatabaseString)); |
221 | 52 | std::unique_ptr<JSONCompilationDatabase> Database( |
222 | 52 | new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax)); |
223 | 52 | if (!Database->parse(ErrorMessage)) |
224 | 17 | return nullptr; |
225 | 35 | return Database; |
226 | 35 | } |
227 | | |
228 | | std::vector<CompileCommand> |
229 | 46 | JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { |
230 | 46 | SmallString<128> NativeFilePath; |
231 | 46 | llvm::sys::path::native(FilePath, NativeFilePath); |
232 | | |
233 | 46 | std::string Error; |
234 | 46 | llvm::raw_string_ostream ES(Error); |
235 | 46 | StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES); |
236 | 46 | if (Match.empty()) |
237 | 2 | return {}; |
238 | 44 | const auto CommandsRefI = IndexByFile.find(Match); |
239 | 44 | if (CommandsRefI == IndexByFile.end()) |
240 | 0 | return {}; |
241 | 44 | std::vector<CompileCommand> Commands; |
242 | 44 | getCommands(CommandsRefI->getValue(), Commands); |
243 | 44 | return Commands; |
244 | 44 | } |
245 | | |
246 | | std::vector<std::string> |
247 | 21 | JSONCompilationDatabase::getAllFiles() const { |
248 | 21 | std::vector<std::string> Result; |
249 | 21 | for (const auto &CommandRef : IndexByFile) |
250 | 14.9k | Result.push_back(CommandRef.first().str()); |
251 | 21 | return Result; |
252 | 21 | } |
253 | | |
254 | | std::vector<CompileCommand> |
255 | 35 | JSONCompilationDatabase::getAllCompileCommands() const { |
256 | 35 | std::vector<CompileCommand> Commands; |
257 | 35 | getCommands(AllCommands, Commands); |
258 | 35 | return Commands; |
259 | 35 | } |
260 | | |
261 | 126 | static llvm::StringRef stripExecutableExtension(llvm::StringRef Name) { |
262 | 126 | Name.consume_back(".exe"); |
263 | 126 | return Name; |
264 | 126 | } |
265 | | |
266 | | // There are compiler-wrappers (ccache, distcc, gomacc) that take the "real" |
267 | | // compiler as an argument, e.g. distcc gcc -O3 foo.c. |
268 | | // These end up in compile_commands.json when people set CC="distcc gcc". |
269 | | // Clang's driver doesn't understand this, so we need to unwrap. |
270 | 134 | static bool unwrapCommand(std::vector<std::string> &Args) { |
271 | 134 | if (Args.size() < 2) |
272 | 17 | return false; |
273 | 117 | StringRef Wrapper = |
274 | 117 | stripExecutableExtension(llvm::sys::path::filename(Args.front())); |
275 | 117 | if (Wrapper == "distcc" || Wrapper == "gomacc"113 || Wrapper == "ccache"112 || |
276 | 108 | Wrapper == "sccache") { |
277 | | // Most of these wrappers support being invoked 3 ways: |
278 | | // `distcc g++ file.c` This is the mode we're trying to match. |
279 | | // We need to drop `distcc`. |
280 | | // `distcc file.c` This acts like compiler is cc or similar. |
281 | | // Clang's driver can handle this, no change needed. |
282 | | // `g++ file.c` g++ is a symlink to distcc. |
283 | | // We don't even notice this case, and all is well. |
284 | | // |
285 | | // We need to distinguish between the first and second case. |
286 | | // The wrappers themselves don't take flags, so Args[1] is a compiler flag, |
287 | | // an input file, or a compiler. Inputs have extensions, compilers don't. |
288 | 10 | bool HasCompiler = |
289 | 10 | (Args[1][0] != '-') && |
290 | 9 | !llvm::sys::path::has_extension(stripExecutableExtension(Args[1])); |
291 | 10 | if (HasCompiler) { |
292 | 8 | Args.erase(Args.begin()); |
293 | 8 | return true; |
294 | 8 | } |
295 | | // If !HasCompiler, wrappers act like GCC. Fine: so do we. |
296 | 10 | } |
297 | 109 | return false; |
298 | 109 | } |
299 | | |
300 | | static std::vector<std::string> |
301 | | nodeToCommandLine(JSONCommandLineSyntax Syntax, |
302 | 126 | const std::vector<llvm::yaml::ScalarNode *> &Nodes) { |
303 | 126 | SmallString<1024> Storage; |
304 | 126 | std::vector<std::string> Arguments; |
305 | 126 | if (Nodes.size() == 1) |
306 | 126 | Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage)); |
307 | 0 | else |
308 | 0 | for (const auto *Node : Nodes) |
309 | 0 | Arguments.push_back(std::string(Node->getValue(Storage))); |
310 | | // There may be multiple wrappers: using distcc and ccache together is common. |
311 | 134 | while (unwrapCommand(Arguments)) |
312 | 8 | ; |
313 | 126 | return Arguments; |
314 | 126 | } |
315 | | |
316 | | void JSONCompilationDatabase::getCommands( |
317 | | ArrayRef<CompileCommandRef> CommandsRef, |
318 | 79 | std::vector<CompileCommand> &Commands) const { |
319 | 126 | for (const auto &CommandRef : CommandsRef) { |
320 | 126 | SmallString<8> DirectoryStorage; |
321 | 126 | SmallString<32> FilenameStorage; |
322 | 126 | SmallString<32> OutputStorage; |
323 | 126 | auto Output = std::get<3>(CommandRef); |
324 | 126 | Commands.emplace_back( |
325 | 126 | std::get<0>(CommandRef)->getValue(DirectoryStorage), |
326 | 126 | std::get<1>(CommandRef)->getValue(FilenameStorage), |
327 | 126 | nodeToCommandLine(Syntax, std::get<2>(CommandRef)), |
328 | 125 | Output ? Output->getValue(OutputStorage)1 : ""); |
329 | 126 | } |
330 | 79 | } |
331 | | |
332 | 102 | bool JSONCompilationDatabase::parse(std::string &ErrorMessage) { |
333 | 102 | llvm::yaml::document_iterator I = YAMLStream.begin(); |
334 | 102 | if (I == YAMLStream.end()) { |
335 | 0 | ErrorMessage = "Error while parsing YAML."; |
336 | 0 | return false; |
337 | 0 | } |
338 | 102 | llvm::yaml::Node *Root = I->getRoot(); |
339 | 102 | if (!Root) { |
340 | 0 | ErrorMessage = "Error while parsing YAML."; |
341 | 0 | return false; |
342 | 0 | } |
343 | 102 | auto *Array = dyn_cast<llvm::yaml::SequenceNode>(Root); |
344 | 102 | if (!Array) { |
345 | 3 | ErrorMessage = "Expected array."; |
346 | 3 | return false; |
347 | 3 | } |
348 | 15.2k | for (auto &NextObject : *Array)99 { |
349 | 15.2k | auto *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject); |
350 | 15.2k | if (!Object) { |
351 | 1 | ErrorMessage = "Expected object."; |
352 | 1 | return false; |
353 | 1 | } |
354 | 15.2k | llvm::yaml::ScalarNode *Directory = nullptr; |
355 | 15.2k | llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command; |
356 | 15.2k | llvm::yaml::ScalarNode *File = nullptr; |
357 | 15.2k | llvm::yaml::ScalarNode *Output = nullptr; |
358 | 45.7k | for (auto& NextKeyValue : *Object) { |
359 | 45.7k | auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey()); |
360 | 45.7k | if (!KeyString) { |
361 | 1 | ErrorMessage = "Expected strings as key."; |
362 | 1 | return false; |
363 | 1 | } |
364 | 45.7k | SmallString<10> KeyStorage; |
365 | 45.7k | StringRef KeyValue = KeyString->getValue(KeyStorage); |
366 | 45.7k | llvm::yaml::Node *Value = NextKeyValue.getValue(); |
367 | 45.7k | if (!Value) { |
368 | 0 | ErrorMessage = "Expected value."; |
369 | 0 | return false; |
370 | 0 | } |
371 | 45.7k | auto *ValueString = dyn_cast<llvm::yaml::ScalarNode>(Value); |
372 | 45.7k | auto *SequenceString = dyn_cast<llvm::yaml::SequenceNode>(Value); |
373 | 45.7k | if (KeyValue == "arguments") { |
374 | 5 | if (!SequenceString) { |
375 | 2 | ErrorMessage = "Expected sequence as value."; |
376 | 2 | return false; |
377 | 2 | } |
378 | 3 | Command = std::vector<llvm::yaml::ScalarNode *>(); |
379 | 2 | for (auto &Argument : *SequenceString) { |
380 | 2 | auto *Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument); |
381 | 2 | if (!Scalar) { |
382 | 1 | ErrorMessage = "Only strings are allowed in 'arguments'."; |
383 | 1 | return false; |
384 | 1 | } |
385 | 1 | Command->push_back(Scalar); |
386 | 1 | } |
387 | 45.7k | } else { |
388 | 45.7k | if (!ValueString) { |
389 | 3 | ErrorMessage = "Expected string as value."; |
390 | 3 | return false; |
391 | 3 | } |
392 | 45.7k | if (KeyValue == "directory") { |
393 | 15.2k | Directory = ValueString; |
394 | 30.5k | } else if (KeyValue == "command") { |
395 | 15.2k | if (!Command) |
396 | 15.2k | Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString); |
397 | 15.2k | } else if (KeyValue == "file") { |
398 | 15.2k | File = ValueString; |
399 | 2 | } else if (KeyValue == "output") { |
400 | 1 | Output = ValueString; |
401 | 1 | } else { |
402 | 1 | ErrorMessage = |
403 | 1 | ("Unknown key: \"" + KeyString->getRawValue() + "\"").str(); |
404 | 1 | return false; |
405 | 1 | } |
406 | 45.7k | } |
407 | 45.7k | } |
408 | 15.2k | if (!File) { |
409 | 3 | ErrorMessage = "Missing key: \"file\"."; |
410 | 3 | return false; |
411 | 3 | } |
412 | 15.2k | if (!Command) { |
413 | 1 | ErrorMessage = "Missing key: \"command\" or \"arguments\"."; |
414 | 1 | return false; |
415 | 1 | } |
416 | 15.2k | if (!Directory) { |
417 | 1 | ErrorMessage = "Missing key: \"directory\"."; |
418 | 1 | return false; |
419 | 1 | } |
420 | 15.2k | SmallString<8> FileStorage; |
421 | 15.2k | StringRef FileName = File->getValue(FileStorage); |
422 | 15.2k | SmallString<128> NativeFilePath; |
423 | 15.2k | if (llvm::sys::path::is_relative(FileName)) { |
424 | 39 | SmallString<8> DirectoryStorage; |
425 | 39 | SmallString<128> AbsolutePath( |
426 | 39 | Directory->getValue(DirectoryStorage)); |
427 | 39 | llvm::sys::path::append(AbsolutePath, FileName); |
428 | 39 | llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/ true); |
429 | 39 | llvm::sys::path::native(AbsolutePath, NativeFilePath); |
430 | 15.2k | } else { |
431 | 15.2k | llvm::sys::path::native(FileName, NativeFilePath); |
432 | 15.2k | } |
433 | 15.2k | auto Cmd = CompileCommandRef(Directory, File, *Command, Output); |
434 | 15.2k | IndexByFile[NativeFilePath].push_back(Cmd); |
435 | 15.2k | AllCommands.push_back(Cmd); |
436 | 15.2k | MatchTrie.insert(NativeFilePath); |
437 | 15.2k | } |
438 | 85 | return true; |
439 | 99 | } |