Coverage Report

Created: 2022-07-16 07:03

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10
11
#include "clang/Basic/MakeSupport.h"
12
#include "clang/Frontend/CompilerInstance.h"
13
#include "clang/Lex/Preprocessor.h"
14
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15
#include "llvm/Support/StringSaver.h"
16
17
using namespace clang;
18
using namespace tooling;
19
using namespace dependencies;
20
21
static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
22
                                     ASTReader &Reader,
23
7
                                     const serialization::ModuleFile &MF) {
24
  // Only preserve search paths that were used during the dependency scan.
25
7
  std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
26
7
  Opts.UserEntries.clear();
27
28
7
  llvm::BitVector SearchPathUsage(Entries.size());
29
7
  llvm::DenseSet<const serialization::ModuleFile *> Visited;
30
7
  std::function<void(const serialization::ModuleFile *)> VisitMF =
31
9
      [&](const serialization::ModuleFile *MF) {
32
9
        SearchPathUsage |= MF->SearchPathUsage;
33
9
        Visited.insert(MF);
34
9
        for (const serialization::ModuleFile *Import : MF->Imports)
35
2
          if (!Visited.contains(Import))
36
2
            VisitMF(Import);
37
9
      };
38
7
  VisitMF(&MF);
39
40
7
  for (auto Idx : SearchPathUsage.set_bits())
41
20
    Opts.UserEntries.push_back(Entries[Idx]);
42
7
}
43
44
CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
45
    const ModuleDeps &Deps,
46
85
    llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
47
  // Make a deep copy of the original Clang invocation.
48
85
  CompilerInvocation CI(OriginalInvocation);
49
50
85
  CI.getLangOpts()->resetNonModularOptions();
51
85
  CI.getPreprocessorOpts().resetNonModularOptions();
52
53
  // Remove options incompatible with explicit module build or are likely to
54
  // differ between identical modules discovered from different translation
55
  // units.
56
85
  CI.getFrontendOpts().Inputs.clear();
57
85
  CI.getFrontendOpts().OutputFile.clear();
58
85
  CI.getCodeGenOpts().MainFileName.clear();
59
85
  CI.getCodeGenOpts().DwarfDebugFlags.clear();
60
85
  CI.getDiagnosticOpts().DiagnosticSerializationFile.clear();
61
85
  CI.getDependencyOutputOpts().OutputFile.clear();
62
85
  CI.getDependencyOutputOpts().Targets.clear();
63
64
85
  CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
65
85
  CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
66
85
  CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
67
68
  // Disable implicit modules and canonicalize options that are only used by
69
  // implicit modules.
70
85
  CI.getLangOpts()->ImplicitModules = false;
71
85
  CI.getHeaderSearchOpts().ImplicitModuleMaps = false;
72
85
  CI.getHeaderSearchOpts().ModuleCachePath.clear();
73
85
  CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false;
74
85
  CI.getHeaderSearchOpts().BuildSessionTimestamp = 0;
75
  // The specific values we canonicalize to for pruning don't affect behaviour,
76
  /// so use the default values so they will be dropped from the command-line.
77
85
  CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
78
85
  CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
79
80
  // Report the prebuilt modules this module uses.
81
85
  for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
82
3
    CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
83
84
85
  CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps;
85
86
85
  Optimize(CI);
87
88
  // The original invocation probably didn't have strict context hash enabled.
89
  // We will use the context hash of this invocation to distinguish between
90
  // multiple incompatible versions of the same module and will use it when
91
  // reporting dependencies to the clients. Let's make sure we're using
92
  // **strict** context hash in order to prevent accidental sharing of
93
  // incompatible modules (e.g. with differences in search paths).
94
85
  CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
95
96
85
  return CI;
97
85
}
98
99
static std::vector<std::string>
100
77
serializeCompilerInvocation(const CompilerInvocation &CI) {
101
  // Set up string allocator.
102
77
  llvm::BumpPtrAllocator Alloc;
103
77
  llvm::StringSaver Strings(Alloc);
104
4.07k
  auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); };
105
106
  // Synthesize full command line from the CompilerInvocation, including "-cc1".
107
77
  SmallVector<const char *, 32> Args{"-cc1"};
108
77
  CI.generateCC1CommandLine(Args, SA);
109
110
  // Convert arguments to the return type.
111
77
  return std::vector<std::string>{Args.begin(), Args.end()};
112
77
}
113
114
8
static std::vector<std::string> splitString(std::string S, char Separator) {
115
8
  SmallVector<StringRef> Segments;
116
8
  StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
117
8
  std::vector<std::string> Result;
118
8
  Result.reserve(Segments.size());
119
8
  for (StringRef Segment : Segments)
120
3
    Result.push_back(Segment.str());
121
8
  return Result;
122
8
}
123
124
std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
125
    llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
126
51
        LookupModuleOutput) const {
127
51
  CompilerInvocation CI(BuildInvocation);
128
51
  FrontendOptions &FrontendOpts = CI.getFrontendOpts();
129
130
51
  InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
131
51
                               InputKind::Format::ModuleMap);
132
51
  FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
133
51
  FrontendOpts.OutputFile =
134
51
      LookupModuleOutput(ID, ModuleOutputKind::ModuleFile);
135
51
  if (HadSerializedDiagnostics)
136
3
    CI.getDiagnosticOpts().DiagnosticSerializationFile =
137
3
        LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile);
138
51
  if (HadDependencyFile) {
139
8
    DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts();
140
8
    DepOpts.OutputFile =
141
8
        LookupModuleOutput(ID, ModuleOutputKind::DependencyFile);
142
8
    DepOpts.Targets = splitString(
143
8
        LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0');
144
8
    if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) {
145
      // Fallback to -o as dependency target, as in the driver.
146
6
      SmallString<128> Target;
147
6
      quoteMakeTarget(FrontendOpts.OutputFile, Target);
148
6
      DepOpts.Targets.push_back(std::string(Target));
149
6
    }
150
8
  }
151
152
51
  for (ModuleID MID : ClangModuleDeps)
153
24
    FrontendOpts.ModuleFiles.push_back(
154
24
        LookupModuleOutput(MID, ModuleOutputKind::ModuleFile));
155
156
51
  return serializeCompilerInvocation(CI);
157
51
}
158
159
std::vector<std::string>
160
26
ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
161
26
  return serializeCompilerInvocation(BuildInvocation);
162
26
}
163
164
void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
165
                                       FileChangeReason Reason,
166
                                       SrcMgr::CharacteristicKind FileType,
167
286
                                       FileID PrevFID) {
168
286
  if (Reason != PPCallbacks::EnterFile)
169
140
    return;
170
171
  // This has to be delayed as the context hash can change at the start of
172
  // `CompilerInstance::ExecuteAction`.
173
146
  if (MDC.ContextHash.empty()) {
174
49
    MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
175
49
    MDC.Consumer.handleContextHash(MDC.ContextHash);
176
49
  }
177
178
146
  SourceManager &SM = MDC.ScanInstance.getSourceManager();
179
180
  // Dependency generation really does want to go all the way to the
181
  // file entry for a source location to find out what is depended on.
182
  // We do not want #line markers to affect dependency generation!
183
146
  if (Optional<StringRef> Filename =
184
146
          SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc))))
185
52
    MDC.FileDeps.push_back(
186
52
        std::string(llvm::sys::path::remove_leading_dotslash(*Filename)));
187
146
}
188
189
void ModuleDepCollectorPP::InclusionDirective(
190
    SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
191
    bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File,
192
    StringRef SearchPath, StringRef RelativePath, const Module *Imported,
193
57
    SrcMgr::CharacteristicKind FileType) {
194
57
  if (!File && 
!Imported0
) {
195
    // This is a non-modular include that HeaderSearch failed to find. Add it
196
    // here as `FileChanged` will never see it.
197
0
    MDC.FileDeps.push_back(std::string(FileName));
198
0
  }
199
57
  handleImport(Imported);
200
57
}
201
202
void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
203
                                        ModuleIdPath Path,
204
5
                                        const Module *Imported) {
205
5
  handleImport(Imported);
206
5
}
207
208
61
void ModuleDepCollectorPP::handleImport(const Module *Imported) {
209
61
  if (!Imported)
210
3
    return;
211
212
58
  const Module *TopLevelModule = Imported->getTopLevelModule();
213
214
58
  if (MDC.isPrebuiltModule(TopLevelModule))
215
2
    DirectPrebuiltModularDeps.insert(TopLevelModule);
216
56
  else
217
56
    DirectModularDeps.insert(TopLevelModule);
218
58
}
219
220
51
void ModuleDepCollectorPP::EndOfMainFile() {
221
51
  FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
222
51
  MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
223
51
                                 .getFileEntryForID(MainFileID)
224
51
                                 ->getName());
225
226
51
  if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
227
6
    MDC.FileDeps.push_back(
228
6
        MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
229
230
56
  for (const Module *M : DirectModularDeps) {
231
    // A top-level module might not be actually imported as a module when
232
    // -fmodule-name is used to compile a translation unit that imports this
233
    // module. In that case it can be skipped. The appropriate header
234
    // dependencies will still be reported as expected.
235
56
    if (!M->getASTFile())
236
1
      continue;
237
55
    handleTopLevelModule(M);
238
55
  }
239
240
51
  MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
241
242
51
  for (auto &&I : MDC.ModularDeps)
243
85
    MDC.Consumer.handleModuleDependency(*I.second);
244
245
51
  for (auto &&I : MDC.FileDeps)
246
59
    MDC.Consumer.handleFileDependency(I);
247
248
51
  for (auto &&I : DirectPrebuiltModularDeps)
249
2
    MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I});
250
51
}
251
252
85
ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
253
85
  assert(M == M->getTopLevelModule() && "Expected top level module!");
254
255
  // If this module has been handled already, just return its ID.
256
0
  auto ModI = MDC.ModularDeps.insert({M, nullptr});
257
85
  if (!ModI.second)
258
0
    return ModI.first->second->ID;
259
260
85
  ModI.first->second = std::make_unique<ModuleDeps>();
261
85
  ModuleDeps &MD = *ModI.first->second;
262
263
85
  MD.ID.ModuleName = M->getFullModuleName();
264
85
  MD.ImportedByMainFile = DirectModularDeps.contains(M);
265
85
  MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
266
85
  MD.IsSystem = M->IsSystem;
267
268
85
  const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
269
85
                                   .getHeaderSearchInfo()
270
85
                                   .getModuleMap()
271
85
                                   .getModuleMapFileForUniquing(M);
272
273
85
  if (ModuleMap) {
274
85
    StringRef Path = ModuleMap->tryGetRealPathName();
275
85
    if (Path.empty())
276
0
      Path = ModuleMap->getName();
277
85
    MD.ClangModuleMapFile = std::string(Path);
278
85
  }
279
280
85
  serialization::ModuleFile *MF =
281
85
      MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
282
85
          M->getASTFile());
283
85
  MDC.ScanInstance.getASTReader()->visitInputFiles(
284
193
      *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
285
        // __inferred_module.map is the result of the way in which an implicit
286
        // module build handles inferred modules. It adds an overlay VFS with
287
        // this file in the proper directory and relies on the rest of Clang to
288
        // handle it like normal. With explicitly built modules we don't need
289
        // to play VFS tricks, so replace it with the correct module map.
290
193
        if (IF.getFile()->getName().endswith("__inferred_module.map")) {
291
2
          MD.FileDeps.insert(ModuleMap->getName());
292
2
          return;
293
2
        }
294
191
        MD.FileDeps.insert(IF.getFile()->getName());
295
191
      });
296
297
  // We usually don't need to list the module map files of our dependencies when
298
  // building a module explicitly: their semantics will be deserialized from PCM
299
  // files.
300
  //
301
  // However, some module maps loaded implicitly during the dependency scan can
302
  // describe anti-dependencies. That happens when this module, let's call it
303
  // M1, is marked as '[no_undeclared_includes]' and tries to access a header
304
  // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;'
305
  // declaration. The explicit build needs the module map for M2 so that it
306
  // knows that textually including "M2/M2.h" is not allowed.
307
  // E.g., '__has_include("M2/M2.h")' should return false, but without M2's
308
  // module map the explicit build would return true.
309
  //
310
  // An alternative approach would be to tell the explicit build what its
311
  // textual dependencies are, instead of having it re-discover its
312
  // anti-dependencies. For example, we could create and use an `-ivfs-overlay`
313
  // with `fall-through: false` that explicitly listed the dependencies.
314
  // However, that's more complicated to implement and harder to reason about.
315
85
  if (M->NoUndeclaredIncludes) {
316
    // We don't have a good way to determine which module map described the
317
    // anti-dependency (let alone what's the corresponding top-level module
318
    // map). We simply specify all the module maps in the order they were loaded
319
    // during the implicit build during scan.
320
    // TODO: Resolve this by serializing and only using Module::UndeclaredUses.
321
1
    MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
322
2
        *MF, [&](const FileEntry *FE) {
323
2
          if (FE->getName().endswith("__inferred_module.map"))
324
0
            return;
325
          // The top-level modulemap of this module will be the input file. We
326
          // don't need to specify it as a module map.
327
2
          if (FE == ModuleMap)
328
1
            return;
329
1
          MD.ModuleMapFileDeps.push_back(FE->getName().str());
330
1
        });
331
1
  }
332
333
  // Add direct prebuilt module dependencies now, so that we can use them when
334
  // creating a CompilerInvocation and computing context hash for this
335
  // ModuleDeps instance.
336
85
  llvm::DenseSet<const Module *> SeenModules;
337
85
  addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
338
339
85
  MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
340
85
      MD, [&](CompilerInvocation &BuildInvocation) {
341
85
        if (MDC.OptimizeArgs)
342
7
          optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
343
7
                                   *MDC.ScanInstance.getASTReader(), *MF);
344
85
      });
345
85
  MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts()
346
85
                                     .DiagnosticSerializationFile.empty();
347
85
  MD.HadDependencyFile =
348
85
      !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
349
  // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
350
  // the context hash since it can affect the command-line.
351
85
  MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
352
353
85
  llvm::DenseSet<const Module *> AddedModules;
354
85
  addAllSubmoduleDeps(M, MD, AddedModules);
355
356
85
  return MD.ID;
357
85
}
358
359
static void forEachSubmoduleSorted(const Module *M,
360
186
                                   llvm::function_ref<void(const Module *)> F) {
361
  // Submodule order depends on order of header includes for inferred submodules
362
  // we don't care about the exact order, so sort so that it's consistent across
363
  // TUs to improve sharing.
364
186
  SmallVector<const Module *> Submodules(M->submodule_begin(),
365
186
                                         M->submodule_end());
366
186
  llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
367
4
    return A->Name < B->Name;
368
4
  });
369
186
  for (const Module *SubM : Submodules)
370
16
    F(SubM);
371
186
}
372
373
void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
374
    const Module *M, ModuleDeps &MD,
375
93
    llvm::DenseSet<const Module *> &SeenSubmodules) {
376
93
  addModulePrebuiltDeps(M, MD, SeenSubmodules);
377
378
93
  forEachSubmoduleSorted(M, [&](const Module *SubM) {
379
8
    addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
380
8
  });
381
93
}
382
383
void ModuleDepCollectorPP::addModulePrebuiltDeps(
384
    const Module *M, ModuleDeps &MD,
385
93
    llvm::DenseSet<const Module *> &SeenSubmodules) {
386
93
  for (const Module *Import : M->Imports)
387
33
    if (Import->getTopLevelModule() != M->getTopLevelModule())
388
33
      if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
389
3
        if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
390
3
          MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
391
93
}
392
393
void ModuleDepCollectorPP::addAllSubmoduleDeps(
394
    const Module *M, ModuleDeps &MD,
395
93
    llvm::DenseSet<const Module *> &AddedModules) {
396
93
  addModuleDep(M, MD, AddedModules);
397
398
93
  forEachSubmoduleSorted(M, [&](const Module *SubM) {
399
8
    addAllSubmoduleDeps(SubM, MD, AddedModules);
400
8
  });
401
93
}
402
403
void ModuleDepCollectorPP::addModuleDep(
404
    const Module *M, ModuleDeps &MD,
405
93
    llvm::DenseSet<const Module *> &AddedModules) {
406
93
  for (const Module *Import : M->Imports) {
407
33
    if (Import->getTopLevelModule() != M->getTopLevelModule() &&
408
33
        !MDC.isPrebuiltModule(Import)) {
409
30
      ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule());
410
30
      if (AddedModules.insert(Import->getTopLevelModule()).second)
411
30
        MD.ClangModuleDeps.push_back(ImportID);
412
30
    }
413
33
  }
414
93
}
415
416
ModuleDepCollector::ModuleDepCollector(
417
    std::unique_ptr<DependencyOutputOptions> Opts,
418
    CompilerInstance &ScanInstance, DependencyConsumer &C,
419
    CompilerInvocation &&OriginalCI, bool OptimizeArgs)
420
    : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
421
52
      OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
422
423
54
void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
424
54
  PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
425
54
}
426
427
54
void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
428
429
125
bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
430
125
  std::string Name(M->getTopLevelModuleName());
431
125
  const auto &PrebuiltModuleFiles =
432
125
      ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
433
125
  auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
434
125
  if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
435
116
    return false;
436
9
  assert("Prebuilt module came from the expected AST file" &&
437
9
         PrebuiltModuleFileIt->second == M->getASTFile()->getName());
438
0
  return true;
439
125
}