Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
///
10
/// \file Converts from in-memory normalized mach-o to in-memory Atoms.
11
///
12
///                  +------------+
13
///                  | normalized |
14
///                  +------------+
15
///                        |
16
///                        |
17
///                        v
18
///                    +-------+
19
///                    | Atoms |
20
///                    +-------+
21
22
#include "ArchHandler.h"
23
#include "Atoms.h"
24
#include "File.h"
25
#include "MachONormalizedFile.h"
26
#include "MachONormalizedFileBinaryUtils.h"
27
#include "lld/Common/LLVM.h"
28
#include "lld/Core/Error.h"
29
#include "llvm/BinaryFormat/Dwarf.h"
30
#include "llvm/BinaryFormat/MachO.h"
31
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
32
#include "llvm/Support/DataExtractor.h"
33
#include "llvm/Support/Debug.h"
34
#include "llvm/Support/Error.h"
35
#include "llvm/Support/Format.h"
36
#include "llvm/Support/LEB128.h"
37
#include "llvm/Support/raw_ostream.h"
38
39
using namespace llvm::MachO;
40
using namespace lld::mach_o::normalized;
41
42
#define DEBUG_TYPE "normalized-file-to-atoms"
43
44
namespace lld {
45
namespace mach_o {
46
47
48
namespace { // anonymous
49
50
51
#define ENTRY(seg, sect, type, atomType) \
52
  {seg, sect, type, DefinedAtom::atomType }
53
54
struct MachORelocatableSectionToAtomType {
55
  StringRef                 segmentName;
56
  StringRef                 sectionName;
57
  SectionType               sectionType;
58
  DefinedAtom::ContentType  atomType;
59
};
60
61
const MachORelocatableSectionToAtomType sectsToAtomType[] = {
62
  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
63
  ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
64
  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
65
  ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
66
  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
67
  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
68
  ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
69
  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
70
  ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
71
  ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
72
  ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
73
  ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
74
  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
75
  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
76
  ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
77
  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
78
  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
79
  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
80
                                                          typeInitializerPtr),
81
  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
82
                                                          typeTerminatorPtr),
83
  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
84
                                                          typeGOT),
85
  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
86
  ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
87
                                                          typeGOT),
88
  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
89
  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
90
                                                          typeThunkTLV),
91
  ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
92
  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
93
                                                        typeTLVInitialZeroFill),
94
  ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
95
  ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
96
  ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
97
  ENTRY("__LD",   "__compact_unwind", S_REGULAR,
98
                                                         typeCompactUnwindInfo),
99
  ENTRY("",       "",                 S_REGULAR,          typeUnknown)
100
};
101
#undef ENTRY
102
103
104
/// Figures out ContentType of a mach-o section.
105
DefinedAtom::ContentType atomTypeFromSection(const Section &section,
106
301
                                             bool &customSectionName) {
107
301
  // First look for match of name and type. Empty names in table are wildcards.
108
301
  customSectionName = false;
109
301
  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
110
2.49k
                                 p->atomType != DefinedAtom::typeUnknown; 
++p2.18k
) {
111
2.48k
    if (p->sectionType != section.type)
112
1.50k
      continue;
113
977
    if (!p->segmentName.equals(section.segmentName) && 
!p->segmentName.empty()578
)
114
571
      continue;
115
406
    if (!p->sectionName.equals(section.sectionName) && 
!p->sectionName.empty()122
)
116
115
      continue;
117
291
    customSectionName = p->segmentName.empty() && 
p->sectionName.empty()7
;
118
291
    return p->atomType;
119
291
  }
120
301
  // Look for code denoted by section attributes
121
301
  
if (10
section.attributes & S_ATTR_PURE_INSTRUCTIONS10
)
122
2
    return DefinedAtom::typeCode;
123
8
124
8
  return DefinedAtom::typeUnknown;
125
8
}
126
127
enum AtomizeModel {
128
  atomizeAtSymbols,
129
  atomizeFixedSize,
130
  atomizePointerSize,
131
  atomizeUTF8,
132
  atomizeUTF16,
133
  atomizeCFI,
134
  atomizeCU,
135
  atomizeCFString
136
};
137
138
/// Returns info on how to atomize a section of the specified ContentType.
139
void sectionParseInfo(DefinedAtom::ContentType atomType,
140
                      unsigned int &sizeMultiple,
141
                      DefinedAtom::Scope &scope,
142
                      DefinedAtom::Merge &merge,
143
301
                      AtomizeModel &atomizeModel) {
144
301
  struct ParseInfo {
145
301
    DefinedAtom::ContentType  atomType;
146
301
    unsigned int              sizeMultiple;
147
301
    DefinedAtom::Scope        scope;
148
301
    DefinedAtom::Merge        merge;
149
301
    AtomizeModel              atomizeModel;
150
301
  };
151
301
152
301
  #define ENTRY(type, size, scope, merge, model) \
153
5.41k
    {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
154
301
155
301
  static const ParseInfo parseInfo[] = {
156
301
    ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
157
301
                                                            atomizeAtSymbols),
158
301
    ENTRY(typeData,              1, scopeGlobal,          mergeNo,
159
301
                                                            atomizeAtSymbols),
160
301
    ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
161
301
                                                            atomizeAtSymbols),
162
301
    ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
163
301
                                                            atomizeAtSymbols),
164
301
    ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
165
301
                                                            atomizeAtSymbols),
166
301
    ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
167
301
                                                            atomizeUTF8),
168
301
    ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
169
301
                                                            atomizeUTF16),
170
301
    ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
171
301
                                                            atomizeCFI),
172
301
    ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
173
301
                                                            atomizeFixedSize),
174
301
    ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
175
301
                                                            atomizeFixedSize),
176
301
    ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
177
301
                                                            atomizeFixedSize),
178
301
    ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
179
301
                                                            atomizeCFString),
180
301
    ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
181
301
                                                            atomizePointerSize),
182
301
    ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
183
301
                                                            atomizePointerSize),
184
301
    ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
185
301
                                                            atomizeCU),
186
301
    ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
187
301
                                                            atomizePointerSize),
188
301
    ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
189
301
                                                            atomizePointerSize),
190
301
    ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
191
301
                                                            atomizeAtSymbols)
192
301
  };
193
301
  #undef ENTRY
194
301
  const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
195
1.46k
  for (int i=0; i < tableLen; 
++i1.16k
) {
196
1.45k
    if (parseInfo[i].atomType == atomType) {
197
285
      sizeMultiple = parseInfo[i].sizeMultiple;
198
285
      scope        = parseInfo[i].scope;
199
285
      merge        = parseInfo[i].merge;
200
285
      atomizeModel = parseInfo[i].atomizeModel;
201
285
      return;
202
285
    }
203
1.45k
  }
204
301
205
301
  // Unknown type is atomized by symbols.
206
301
  sizeMultiple = 1;
207
16
  scope = DefinedAtom::scopeGlobal;
208
16
  merge = DefinedAtom::mergeNo;
209
16
  atomizeModel = atomizeAtSymbols;
210
16
}
211
212
213
443
Atom::Scope atomScope(uint8_t scope) {
214
443
  switch (scope) {
215
443
  case N_EXT:
216
272
    return Atom::scopeGlobal;
217
443
  case N_PEXT:
218
42
  case N_PEXT | N_EXT:
219
42
    return Atom::scopeLinkageUnit;
220
129
  case 0:
221
129
    return Atom::scopeTranslationUnit;
222
0
  }
223
0
  llvm_unreachable("unknown scope value!");
224
0
}
225
226
void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
227
                            uint32_t sectionIndex,
228
454
                            SmallVector<const Symbol *, 64> &outSyms) {
229
640
  for (const Symbol &sym : inSymbols) {
230
640
    // Only look at definition symbols.
231
640
    if ((sym.type & N_TYPE) != N_SECT)
232
0
      continue;
233
640
    if (sym.sect != sectionIndex)
234
281
      continue;
235
359
    outSyms.push_back(&sym);
236
359
  }
237
454
}
238
239
void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
240
                    MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
241
                    uint16_t symbolDescFlags, Atom::Scope symbolScope,
242
367
                    uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
243
367
  // Mach-O symbol table does have size in it. Instead the size is the
244
367
  // difference between this and the next symbol.
245
367
  uint64_t size = nextSymbolAddr - symbolAddr;
246
367
  uint64_t offset = symbolAddr - section.address;
247
367
  bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || 
!scatterable355
;
248
367
  if (isZeroFillSection(section.type)) {
249
3
    file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
250
3
                                noDeadStrip, copyRefs, &section);
251
364
  } else {
252
364
    DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
253
364
                              ? 
DefinedAtom::mergeAsWeak6
:
DefinedAtom::mergeNo358
;
254
364
    bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
255
364
    if (atomType == DefinedAtom::typeUnknown) {
256
15
      // Mach-O needs a segment and section name.  Concatentate those two
257
15
      // with a / separator (e.g. "seg/sect") to fit into the lld model
258
15
      // of just a section name.
259
15
      std::string segSectName = section.segmentName.str()
260
15
                                + "/" + section.sectionName.str();
261
15
      file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
262
15
                                         merge, thumb, noDeadStrip, offset,
263
15
                                         size, segSectName, true, &section);
264
349
    } else {
265
349
      if ((atomType == lld::DefinedAtom::typeCode) &&
266
349
          
(symbolDescFlags & N_SYMBOL_RESOLVER)272
) {
267
2
        atomType = lld::DefinedAtom::typeResolver;
268
2
      }
269
349
      file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
270
349
                          offset, size, thumb, noDeadStrip, copyRefs, &section);
271
349
    }
272
364
  }
273
367
}
274
275
llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
276
                                   const Section &section,
277
                                   const NormalizedFile &normalizedFile,
278
                                   MachOFile &file, bool scatterable,
279
227
                                   bool copyRefs) {
280
227
  // Find section's index.
281
227
  uint32_t sectIndex = 1;
282
309
  for (auto &sect : normalizedFile.sections) {
283
309
    if (&sect == &section)
284
227
      break;
285
82
    ++sectIndex;
286
82
  }
287
227
288
227
  // Find all symbols in this section.
289
227
  SmallVector<const Symbol *, 64> symbols;
290
227
  appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
291
227
  appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);
292
227
293
227
  // Sort symbols.
294
227
  std::sort(symbols.begin(), symbols.end(),
295
227
            [](const Symbol *lhs, const Symbol *rhs) -> bool {
296
205
              if (lhs == rhs)
297
0
                return false;
298
205
              // First by address.
299
205
              uint64_t lhsAddr = lhs->value;
300
205
              uint64_t rhsAddr = rhs->value;
301
205
              if (lhsAddr != rhsAddr)
302
160
                return lhsAddr < rhsAddr;
303
45
               // If same address, one is an alias so sort by scope.
304
45
              Atom::Scope lScope = atomScope(lhs->scope);
305
45
              Atom::Scope rScope = atomScope(rhs->scope);
306
45
              if (lScope != rScope)
307
34
                return lScope < rScope;
308
11
              // If same address and scope, see if one might be better as
309
11
              // the alias.
310
11
              bool lPrivate = (lhs->name.front() == 'l');
311
11
              bool rPrivate = (rhs->name.front() == 'l');
312
11
              if (lPrivate != rPrivate)
313
4
                return lPrivate;
314
7
              // If same address and scope, sort by name.
315
7
              return lhs->name < rhs->name;
316
7
            });
317
227
318
227
  // Debug logging of symbols.
319
227
  //for (const Symbol *sym : symbols)
320
227
  //  llvm::errs() << "  sym: "
321
227
  //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
322
227
  //    << ", " << sym->name << "\n";
323
227
324
227
  // If section has no symbols and no content, there are no atoms.
325
227
  if (symbols.empty() && 
section.content.empty()11
)
326
3
    return llvm::Error::success();
327
224
328
224
  if (symbols.empty()) {
329
8
    // Section has no symbols, put all content in one anoymous atom.
330
8
    atomFromSymbol(atomType, section, file, section.address, StringRef(),
331
8
                  0, Atom::scopeTranslationUnit,
332
8
                  section.address + section.content.size(),
333
8
                  scatterable, copyRefs);
334
8
  }
335
216
  else if (symbols.front()->value != section.address) {
336
11
    // Section has anonymous content before first symbol.
337
11
    atomFromSymbol(atomType, section, file, section.address, StringRef(),
338
11
                   0, Atom::scopeTranslationUnit, symbols.front()->value,
339
11
                   scatterable, copyRefs);
340
11
  }
341
224
342
224
  const Symbol *lastSym = nullptr;
343
359
  for (const Symbol *sym : symbols) {
344
359
    if (lastSym != nullptr) {
345
143
      // Ignore any assembler added "ltmpNNN" symbol at start of section
346
143
      // if there is another symbol at the start.
347
143
      if ((lastSym->value != sym->value)
348
143
          || 
lastSym->value != section.address19
349
143
          || 
!lastSym->name.startswith("ltmp")11
) {
350
132
        atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
351
132
                       lastSym->desc, atomScope(lastSym->scope), sym->value,
352
132
                       scatterable, copyRefs);
353
132
      }
354
143
    }
355
359
    lastSym = sym;
356
359
  }
357
224
  if (lastSym != nullptr) {
358
216
    atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
359
216
                   lastSym->desc, atomScope(lastSym->scope),
360
216
                   section.address + section.content.size(),
361
216
                   scatterable, copyRefs);
362
216
  }
363
224
364
224
  // If object built without .subsections_via_symbols, add reference chain.
365
224
  if (!scatterable) {
366
21
    MachODefinedAtom *prevAtom = nullptr;
367
21
    file.eachAtomInSection(section,
368
27
                           [&](MachODefinedAtom *atom, uint64_t offset)->void {
369
27
      if (prevAtom)
370
6
        prevAtom->addReference(Reference::KindNamespace::all,
371
6
                               Reference::KindArch::all,
372
6
                               Reference::kindLayoutAfter, 0, atom, 0);
373
27
      prevAtom = atom;
374
27
    });
375
21
  }
376
224
377
224
  return llvm::Error::success();
378
224
}
379
380
llvm::Error processSection(DefinedAtom::ContentType atomType,
381
                           const Section &section,
382
                           bool customSectionName,
383
                           const NormalizedFile &normalizedFile,
384
                           MachOFile &file, bool scatterable,
385
301
                           bool copyRefs) {
386
301
  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
387
301
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
388
301
389
301
  // Get info on how to atomize section.
390
301
  unsigned int       sizeMultiple;
391
301
  DefinedAtom::Scope scope;
392
301
  DefinedAtom::Merge merge;
393
301
  AtomizeModel       atomizeModel;
394
301
  sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
395
301
396
301
  // Validate section size.
397
301
  if ((section.content.size() % sizeMultiple) != 0)
398
0
    return llvm::make_error<GenericError>(Twine("Section ")
399
0
                                          + section.segmentName
400
0
                                          + "/" + section.sectionName
401
0
                                          + " has size ("
402
0
                                          + Twine(section.content.size())
403
0
                                          + ") which is not a multiple of "
404
0
                                          + Twine(sizeMultiple));
405
301
406
301
  if (atomizeModel == atomizeAtSymbols) {
407
227
    // Break section up into atoms each with a fixed size.
408
227
    return processSymboledSection(atomType, section, normalizedFile, file,
409
227
                                  scatterable, copyRefs);
410
227
  } else {
411
74
    unsigned int size;
412
202
    for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
413
128
      switch (atomizeModel) {
414
128
      case atomizeFixedSize:
415
6
        // Break section up into atoms each with a fixed size.
416
6
        size = sizeMultiple;
417
6
        break;
418
128
      case atomizePointerSize:
419
14
        // Break section up into atoms each the size of a pointer.
420
14
        size = is64 ? 
87
:
47
;
421
14
        break;
422
128
      case atomizeUTF8:
423
26
        // Break section up into zero terminated c-strings.
424
26
        size = 0;
425
176
        for (unsigned int i = offset; i < e; 
++i150
) {
426
176
          if (section.content[i] == 0) {
427
26
            size = i + 1 - offset;
428
26
            break;
429
26
          }
430
176
        }
431
26
        break;
432
128
      case atomizeUTF16:
433
2
        // Break section up into zero terminated UTF16 strings.
434
2
        size = 0;
435
11
        for (unsigned int i = offset; i < e; 
i += 29
) {
436
11
          if ((section.content[i] == 0) && 
(section.content[i + 1] == 0)2
) {
437
2
            size = i + 2 - offset;
438
2
            break;
439
2
          }
440
11
        }
441
2
        break;
442
128
      case atomizeCFI:
443
48
        // Break section up into dwarf unwind CFIs (FDE or CIE).
444
48
        size = read32(&section.content[offset], isBig) + 4;
445
48
        if (offset+size > section.content.size()) {
446
0
          return llvm::make_error<GenericError>(Twine("Section ")
447
0
                                                + section.segmentName
448
0
                                                + "/" + section.sectionName
449
0
                                                + " is malformed.  Size of CFI "
450
0
                                                "starting at offset ("
451
0
                                                + Twine(offset)
452
0
                                                + ") is past end of section.");
453
0
        }
454
48
        break;
455
48
      case atomizeCU:
456
28
        // Break section up into compact unwind entries.
457
28
        size = is64 ? 
3226
:
202
;
458
28
        break;
459
48
      case atomizeCFString:
460
4
        // Break section up into NS/CFString objects.
461
4
        size = is64 ? 
322
:
162
;
462
4
        break;
463
48
      case atomizeAtSymbols:
464
0
        break;
465
128
      }
466
128
      if (size == 0) {
467
0
        return llvm::make_error<GenericError>(Twine("Section ")
468
0
                                              + section.segmentName
469
0
                                              + "/" + section.sectionName
470
0
                                              + " is malformed.  The last atom "
471
0
                                              "is not zero terminated.");
472
0
      }
473
128
      if (customSectionName) {
474
8
        // Mach-O needs a segment and section name.  Concatentate those two
475
8
        // with a / separator (e.g. "seg/sect") to fit into the lld model
476
8
        // of just a section name.
477
8
        std::string segSectName = section.segmentName.str()
478
8
                                  + "/" + section.sectionName.str();
479
8
        file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
480
8
                                           merge, false, false, offset,
481
8
                                           size, segSectName, true, &section);
482
120
      } else {
483
120
        file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
484
120
                            false, false, copyRefs, &section);
485
120
      }
486
128
      offset += size;
487
128
    }
488
74
  }
489
301
  
return llvm::Error::success()74
;
490
301
}
491
492
const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
493
238
                                          uint64_t address) {
494
382
  for (const Section &s : normalizedFile.sections) {
495
382
    uint64_t sAddr = s.address;
496
382
    if ((sAddr <= address) && (address < sAddr+s.content.size())) {
497
238
      return &s;
498
238
    }
499
382
  }
500
238
  
return nullptr0
;
501
238
}
502
503
const MachODefinedAtom *
504
findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
505
56
                        uint64_t addr, Reference::Addend &addend) {
506
56
  const Section *sect = nullptr;
507
56
  sect = findSectionCoveringAddress(normalizedFile, addr);
508
56
  if (!sect)
509
0
    return nullptr;
510
56
511
56
  uint32_t offsetInTarget;
512
56
  uint64_t offsetInSect = addr - sect->address;
513
56
  auto atom =
514
56
      file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
515
56
  addend = offsetInTarget;
516
56
  return atom;
517
56
}
518
519
// Walks all relocations for a section in a normalized .o file and
520
// creates corresponding lld::Reference objects.
521
llvm::Error convertRelocs(const Section &section,
522
                          const NormalizedFile &normalizedFile,
523
                          bool scatterable,
524
                          MachOFile &file,
525
307
                          ArchHandler &handler) {
526
307
  // Utility function for ArchHandler to find atom by its address.
527
307
  auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
528
307
                         const lld::Atom **atom, Reference::Addend *addend)
529
307
                         -> llvm::Error {
530
268
    if (sectIndex > normalizedFile.sections.size())
531
0
      return llvm::make_error<GenericError>(Twine("out of range section "
532
0
                                     "index (") + Twine(sectIndex) + ")");
533
268
    const Section *sect = nullptr;
534
268
    if (sectIndex == 0) {
535
154
      sect = findSectionCoveringAddress(normalizedFile, addr);
536
154
      if (!sect)
537
0
        return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
538
0
                                       + ") is not in any section"));
539
114
    } else {
540
114
      sect = &normalizedFile.sections[sectIndex-1];
541
114
    }
542
268
    uint32_t offsetInTarget;
543
268
    uint64_t offsetInSect = addr - sect->address;
544
268
    *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
545
268
    *addend = offsetInTarget;
546
268
    return llvm::Error::success();
547
268
  };
548
307
549
307
  // Utility function for ArchHandler to find atom by its symbol index.
550
307
  auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
551
307
                           -> llvm::Error {
552
291
    // Find symbol from index.
553
291
    const Symbol *sym = nullptr;
554
291
    uint32_t numStabs  = normalizedFile.stabsSymbols.size();
555
291
    uint32_t numLocal  = normalizedFile.localSymbols.size();
556
291
    uint32_t numGlobal = normalizedFile.globalSymbols.size();
557
291
    uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
558
291
    assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
559
291
    if (symbolIndex < numStabs+numLocal) {
560
76
      sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
561
215
    } else if (symbolIndex < numStabs+numLocal+numGlobal) {
562
26
      sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
563
189
    } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
564
189
      sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
565
189
                                             numGlobal];
566
189
    } else {
567
0
      return llvm::make_error<GenericError>(Twine("symbol index (")
568
0
                                     + Twine(symbolIndex) + ") out of range");
569
0
    }
570
291
571
291
    // Find atom from symbol.
572
291
    if ((sym->type & N_TYPE) == N_SECT) {
573
102
      if (sym->sect > normalizedFile.sections.size())
574
0
        return llvm::make_error<GenericError>(Twine("symbol section index (")
575
0
                                        + Twine(sym->sect) + ") out of range ");
576
102
      const Section &symSection = normalizedFile.sections[sym->sect-1];
577
102
      uint64_t targetOffsetInSect = sym->value - symSection.address;
578
102
      MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
579
102
                                                            targetOffsetInSect);
580
102
      if (target) {
581
102
        *result = target;
582
102
        return llvm::Error::success();
583
102
      }
584
0
      return llvm::make_error<GenericError>("no atom found for defined symbol");
585
189
    } else if ((sym->type & N_TYPE) == N_UNDF) {
586
189
      const lld::Atom *target = file.findUndefAtom(sym->name);
587
189
      if (target) {
588
189
        *result = target;
589
189
        return llvm::Error::success();
590
189
      }
591
0
      return llvm::make_error<GenericError>("no undefined atom found for sym");
592
0
    } else {
593
0
      // Search undefs
594
0
      return llvm::make_error<GenericError>("no atom found for symbol");
595
0
    }
596
291
  };
597
307
598
307
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
599
307
  // Use old-school iterator so that paired relocations can be grouped.
600
307
  for (auto it=section.relocations.begin(), e=section.relocations.end();
601
781
                                                                it != e; 
++it474
) {
602
475
    const Relocation &reloc = *it;
603
475
    // Find atom this relocation is in.
604
475
    if (reloc.offset > section.content.size())
605
0
      return llvm::make_error<GenericError>(
606
0
                                    Twine("r_address (") + Twine(reloc.offset)
607
0
                                    + ") is larger than section size ("
608
0
                                    + Twine(section.content.size()) + ")");
609
475
    uint32_t offsetInAtom;
610
475
    MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
611
475
                                                            reloc.offset,
612
475
                                                            &offsetInAtom);
613
475
    assert(inAtom && "r_address in range, should have found atom");
614
475
    uint64_t fixupAddress = section.address + reloc.offset;
615
475
616
475
    const lld::Atom *target = nullptr;
617
475
    Reference::Addend addend = 0;
618
475
    Reference::KindValue kind;
619
475
    if (handler.isPairedReloc(reloc)) {
620
157
      // Handle paired relocations together.
621
157
      const Relocation &reloc2 = *++it;
622
157
      auto relocErr = handler.getPairReferenceInfo(
623
157
          reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
624
157
          atomByAddr, atomBySymbol, &kind, &target, &addend);
625
157
      if (relocErr) {
626
1
        return handleErrors(std::move(relocErr),
627
1
                            [&](std::unique_ptr<GenericError> GE) {
628
1
          return llvm::make_error<GenericError>(
629
1
            Twine("bad relocation (") + GE->getMessage()
630
1
             + ") in section "
631
1
             + section.segmentName + "/" + section.sectionName
632
1
             + " (r1_address=" + Twine::utohexstr(reloc.offset)
633
1
             + ", r1_type=" + Twine(reloc.type)
634
1
             + ", r1_extern=" + Twine(reloc.isExtern)
635
1
             + ", r1_length=" + Twine((int)reloc.length)
636
1
             + ", r1_pcrel=" + Twine(reloc.pcRel)
637
1
             + (!reloc.scattered ? (Twine(", r1_symbolnum=")
638
1
                                    + Twine(reloc.symbol))
639
1
                                 : (Twine(", r1_scattered=1, r1_value=")
640
0
                                    + Twine(reloc.value)))
641
1
             + ")"
642
1
             + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
643
1
             + ", r2_type=" + Twine(reloc2.type)
644
1
             + ", r2_extern=" + Twine(reloc2.isExtern)
645
1
             + ", r2_length=" + Twine((int)reloc2.length)
646
1
             + ", r2_pcrel=" + Twine(reloc2.pcRel)
647
1
             + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
648
1
                                     + Twine(reloc2.symbol))
649
1
                                  : (Twine(", r2_scattered=1, r2_value=")
650
0
                                     + Twine(reloc2.value)))
651
1
             + ")" );
652
1
          });
653
1
      }
654
318
    }
655
318
    else {
656
318
      // Use ArchHandler to convert relocation record into information
657
318
      // needed to instantiate an lld::Reference object.
658
318
      auto relocErr = handler.getReferenceInfo(
659
318
          reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
660
318
          atomBySymbol, &kind, &target, &addend);
661
318
      if (relocErr) {
662
0
        return handleErrors(std::move(relocErr),
663
0
                            [&](std::unique_ptr<GenericError> GE) {
664
0
          return llvm::make_error<GenericError>(
665
0
            Twine("bad relocation (") + GE->getMessage()
666
0
             + ") in section "
667
0
             + section.segmentName + "/" + section.sectionName
668
0
             + " (r_address=" + Twine::utohexstr(reloc.offset)
669
0
             + ", r_type=" + Twine(reloc.type)
670
0
             + ", r_extern=" + Twine(reloc.isExtern)
671
0
             + ", r_length=" + Twine((int)reloc.length)
672
0
             + ", r_pcrel=" + Twine(reloc.pcRel)
673
0
             + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
674
0
                                 : (Twine(", r_scattered=1, r_value=")
675
0
                                    + Twine(reloc.value)))
676
0
             + ")" );
677
0
          });
678
0
      }
679
474
    }
680
474
    // Instantiate an lld::Reference object and add to its atom.
681
474
    inAtom->addReference(Reference::KindNamespace::mach_o,
682
474
                         handler.kindArch(),
683
474
                         kind, offsetInAtom, target, addend);
684
474
  }
685
307
686
307
  
return llvm::Error::success()306
;
687
307
}
688
689
641
bool isDebugInfoSection(const Section &section) {
690
641
  if ((section.attributes & S_ATTR_DEBUG) == 0)
691
603
    return false;
692
38
  return section.segmentName.equals("__DWARF");
693
38
}
694
695
0
static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
696
0
  std::string strName = name.str();
697
0
  for (auto *atom : file.defined())
698
0
    if (atom->name() == strName)
699
0
      return atom;
700
0
  return nullptr;
701
0
}
702
703
2
static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
704
2
  char *strCopy = alloc.Allocate<char>(str.size() + 1);
705
2
  memcpy(strCopy, str.data(), str.size());
706
2
  strCopy[str.size()] = '\0';
707
2
  return strCopy;
708
2
}
709
710
llvm::Error parseStabs(MachOFile &file,
711
                       const NormalizedFile &normalizedFile,
712
176
                       bool copyRefs) {
713
176
714
176
  if (normalizedFile.stabsSymbols.empty())
715
176
    return llvm::Error::success();
716
0
717
0
  // FIXME: Kill this off when we can move to sane yaml parsing.
718
0
  std::unique_ptr<BumpPtrAllocator> allocator;
719
0
  if (copyRefs)
720
0
    allocator = llvm::make_unique<BumpPtrAllocator>();
721
0
722
0
  enum { start, inBeginEnd } state = start;
723
0
724
0
  const Atom *currentAtom = nullptr;
725
0
  uint64_t currentAtomAddress = 0;
726
0
  StabsDebugInfo::StabsList stabsList;
727
0
  for (const auto &stabSym : normalizedFile.stabsSymbols) {
728
0
    Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
729
0
              stabSym.value, stabSym.name);
730
0
    switch (state) {
731
0
    case start:
732
0
      switch (static_cast<StabType>(stabSym.type)) {
733
0
      case N_BNSYM:
734
0
        state = inBeginEnd;
735
0
        currentAtomAddress = stabSym.value;
736
0
        Reference::Addend addend;
737
0
        currentAtom = findAtomCoveringAddress(normalizedFile, file,
738
0
                                              currentAtomAddress, addend);
739
0
        if (addend != 0)
740
0
          return llvm::make_error<GenericError>(
741
0
                   "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
742
0
                   file.path());
743
0
        if (currentAtom)
744
0
          stab.atom = currentAtom;
745
0
        else {
746
0
          // FIXME: ld64 just issues a warning here - should we match that?
747
0
          return llvm::make_error<GenericError>(
748
0
                   "can't find atom for stabs BNSYM at " +
749
0
                   Twine::utohexstr(stabSym.value) + " in " + file.path());
750
0
        }
751
0
        break;
752
0
      case N_SO:
753
0
      case N_OSO:
754
0
        // Not associated with an atom, just copy.
755
0
        if (copyRefs)
756
0
          stab.str = copyDebugString(stabSym.name, *allocator);
757
0
        else
758
0
          stab.str = stabSym.name;
759
0
        break;
760
0
      case N_GSYM: {
761
0
        auto colonIdx = stabSym.name.find(':');
762
0
        if (colonIdx != StringRef::npos) {
763
0
          StringRef name = stabSym.name.substr(0, colonIdx);
764
0
          currentAtom = findDefinedAtomByName(file, "_" + name);
765
0
          stab.atom = currentAtom;
766
0
          if (copyRefs)
767
0
            stab.str = copyDebugString(stabSym.name, *allocator);
768
0
          else
769
0
            stab.str = stabSym.name;
770
0
        } else {
771
0
          currentAtom = findDefinedAtomByName(file, stabSym.name);
772
0
          stab.atom = currentAtom;
773
0
          if (copyRefs)
774
0
            stab.str = copyDebugString(stabSym.name, *allocator);
775
0
          else
776
0
            stab.str = stabSym.name;
777
0
        }
778
0
        if (stab.atom == nullptr)
779
0
          return llvm::make_error<GenericError>(
780
0
                   "can't find atom for N_GSYM stabs" + stabSym.name +
781
0
                   " in " + file.path());
782
0
        break;
783
0
      }
784
0
      case N_FUN:
785
0
        return llvm::make_error<GenericError>(
786
0
                 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
787
0
      default:
788
0
        return llvm::make_error<GenericError>(
789
0
                 "unrecognized stab symbol '" + stabSym.name + "'");
790
0
      }
791
0
      break;
792
0
    case inBeginEnd:
793
0
      stab.atom = currentAtom;
794
0
      switch (static_cast<StabType>(stabSym.type)) {
795
0
      case N_ENSYM:
796
0
        state = start;
797
0
        currentAtom = nullptr;
798
0
        break;
799
0
      case N_FUN:
800
0
        // Just copy the string.
801
0
        if (copyRefs)
802
0
          stab.str = copyDebugString(stabSym.name, *allocator);
803
0
        else
804
0
          stab.str = stabSym.name;
805
0
        break;
806
0
      default:
807
0
        return llvm::make_error<GenericError>(
808
0
                 "unrecognized stab symbol '" + stabSym.name + "'");
809
0
      }
810
0
    }
811
0
    llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
812
0
    stabsList.push_back(stab);
813
0
  }
814
0
815
0
  file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
816
0
817
0
  // FIXME: Kill this off when we fix YAML memory ownership.
818
0
  file.debugInfo()->setAllocator(std::move(allocator));
819
0
820
0
  return llvm::Error::success();
821
0
}
822
823
static llvm::DataExtractor
824
dataExtractorFromSection(const NormalizedFile &normalizedFile,
825
4
                         const Section &S) {
826
4
  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
827
4
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
828
4
  StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
829
4
                    S.content.size());
830
4
  return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 
40
);
831
4
}
832
833
// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
834
//        inspection" code if possible.
835
static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
836
1
                                  uint64_t abbrCode) {
837
1
  uint64_t curCode;
838
1
  uint32_t offset = 0;
839
1
  while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
840
0
    // Tag
841
0
    abbrevData.getULEB128(&offset);
842
0
    // DW_CHILDREN
843
0
    abbrevData.getU8(&offset);
844
0
    // Attributes
845
0
    while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
846
0
      ;
847
0
  }
848
1
  return offset;
849
1
}
850
851
// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
852
//        inspection" code if possible.
853
static Expected<const char *>
854
getIndexedString(const NormalizedFile &normalizedFile,
855
                 llvm::dwarf::Form form, llvm::DataExtractor infoData,
856
2
                 uint32_t &infoOffset, const Section &stringsSection) {
857
2
  if (form == llvm::dwarf::DW_FORM_string)
858
0
   return infoData.getCStr(&infoOffset);
859
2
  if (form != llvm::dwarf::DW_FORM_strp)
860
0
    return llvm::make_error<GenericError>(
861
0
        "string field encoded without DW_FORM_strp");
862
2
  uint32_t stringOffset = infoData.getU32(&infoOffset);
863
2
  llvm::DataExtractor stringsData =
864
2
    dataExtractorFromSection(normalizedFile, stringsSection);
865
2
  return stringsData.getCStr(&stringOffset);
866
2
}
867
868
// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
869
//        inspection" code if possible.
870
static llvm::Expected<TranslationUnitSource>
871
readCompUnit(const NormalizedFile &normalizedFile,
872
             const Section &info,
873
             const Section &abbrev,
874
             const Section &strings,
875
1
             StringRef path) {
876
1
  // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
877
1
  //        inspection" code if possible.
878
1
  uint32_t offset = 0;
879
1
  llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
880
1
  auto infoData = dataExtractorFromSection(normalizedFile, info);
881
1
  uint32_t length = infoData.getU32(&offset);
882
1
  if (length == 0xffffffff) {
883
0
    Format = llvm::dwarf::DwarfFormat::DWARF64;
884
0
    infoData.getU64(&offset);
885
0
  }
886
1
  else if (length > 0xffffff00)
887
0
    return llvm::make_error<GenericError>("Malformed DWARF in " + path);
888
1
889
1
  uint16_t version = infoData.getU16(&offset);
890
1
891
1
  if (version < 2 || version > 4)
892
0
    return llvm::make_error<GenericError>("Unsupported DWARF version in " +
893
0
                                          path);
894
1
895
1
  infoData.getU32(&offset); // Abbrev offset (should be zero)
896
1
  uint8_t addrSize = infoData.getU8(&offset);
897
1
898
1
  uint32_t abbrCode = infoData.getULEB128(&offset);
899
1
  auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
900
1
  uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
901
1
  uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
902
1
  if (tag != llvm::dwarf::DW_TAG_compile_unit)
903
0
    return llvm::make_error<GenericError>("top level DIE is not a compile unit");
904
1
  // DW_CHILDREN
905
1
  abbrevData.getU8(&abbrevOffset);
906
1
  uint32_t name;
907
1
  llvm::dwarf::Form form;
908
1
  llvm::dwarf::FormParams formParams = {version, addrSize, Format};
909
1
  TranslationUnitSource tu;
910
8
  while ((name = abbrevData.getULEB128(&abbrevOffset)) |
911
8
         (form = static_cast<llvm::dwarf::Form>(
912
8
             abbrevData.getULEB128(&abbrevOffset))) &&
913
8
         
(7
name != 07
||
form != 00
)) {
914
7
    switch (name) {
915
7
    case llvm::dwarf::DW_AT_name: {
916
1
      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
917
1
                                        strings))
918
1
          tu.name = *eName;
919
0
      else
920
0
        return eName.takeError();
921
1
      break;
922
1
    }
923
1
    case llvm::dwarf::DW_AT_comp_dir: {
924
1
      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
925
1
                                        strings))
926
1
        tu.path = *eName;
927
0
      else
928
0
        return eName.takeError();
929
1
      break;
930
1
    }
931
5
    default:
932
5
      llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
933
7
    }
934
7
  }
935
1
  return tu;
936
1
}
937
938
llvm::Error parseDebugInfo(MachOFile &file,
939
177
                           const NormalizedFile &normalizedFile, bool copyRefs) {
940
177
941
177
  // Find the interesting debug info sections.
942
177
  const Section *debugInfo = nullptr;
943
177
  const Section *debugAbbrev = nullptr;
944
177
  const Section *debugStrings = nullptr;
945
177
946
317
  for (auto &s : normalizedFile.sections) {
947
317
    if (s.segmentName == "__DWARF") {
948
12
      if (s.sectionName == "__debug_info")
949
1
        debugInfo = &s;
950
11
      else if (s.sectionName == "__debug_abbrev")
951
1
        debugAbbrev = &s;
952
10
      else if (s.sectionName == "__debug_str")
953
1
        debugStrings = &s;
954
12
    }
955
317
  }
956
177
957
177
  if (!debugInfo)
958
176
    return parseStabs(file, normalizedFile, copyRefs);
959
1
960
1
  if (debugInfo->content.size() == 0)
961
0
    return llvm::Error::success();
962
1
963
1
  if (debugInfo->content.size() < 12)
964
0
    return llvm::make_error<GenericError>("Malformed __debug_info section in " +
965
0
                                          file.path() + ": too small");
966
1
967
1
  if (!debugAbbrev)
968
0
    return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
969
0
                                          file.path());
970
1
971
1
  if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
972
1
                                  *debugStrings, file.path())) {
973
1
    // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
974
1
    //        memory ownership.
975
1
    std::unique_ptr<BumpPtrAllocator> allocator;
976
1
    if (copyRefs) {
977
1
      allocator = llvm::make_unique<BumpPtrAllocator>();
978
1
      tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
979
1
      tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
980
1
    }
981
1
    file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
982
1
    if (copyRefs)
983
1
      file.debugInfo()->setAllocator(std::move(allocator));
984
1
  } else
985
0
    return tuOrErr.takeError();
986
1
987
1
  return llvm::Error::success();
988
1
}
989
990
34
static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
991
34
  if (is64)
992
30
    return read64(addr, isBig);
993
4
994
4
  int32_t res = read32(addr, isBig);
995
4
  return res;
996
4
}
997
998
/// --- Augmentation String Processing ---
999
1000
struct CIEInfo {
1001
  bool _augmentationDataPresent = false;
1002
  bool _mayHaveEH = false;
1003
  uint32_t _offsetOfLSDA = ~0U;
1004
  uint32_t _offsetOfPersonality = ~0U;
1005
  uint32_t _offsetOfFDEPointerEncoding = ~0U;
1006
  uint32_t _augmentationDataLength = ~0U;
1007
};
1008
1009
typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1010
1011
static llvm::Error processAugmentationString(const uint8_t *augStr,
1012
                                             CIEInfo &cieInfo,
1013
22
                                             unsigned &len) {
1014
22
1015
22
  if (augStr[0] == '\0') {
1016
0
    len = 1;
1017
0
    return llvm::Error::success();
1018
0
  }
1019
22
1020
22
  if (augStr[0] != 'z')
1021
0
    return llvm::make_error<GenericError>("expected 'z' at start of "
1022
0
                                          "augmentation string");
1023
22
1024
22
  cieInfo._augmentationDataPresent = true;
1025
22
  uint64_t idx = 1;
1026
22
1027
22
  uint32_t offsetInAugmentationData = 0;
1028
58
  while (augStr[idx] != '\0') {
1029
36
    if (augStr[idx] == 'L') {
1030
7
      cieInfo._offsetOfLSDA = offsetInAugmentationData;
1031
7
      // This adds a single byte to the augmentation data.
1032
7
      ++offsetInAugmentationData;
1033
7
      ++idx;
1034
7
      continue;
1035
7
    }
1036
29
    if (augStr[idx] == 'P') {
1037
7
      cieInfo._offsetOfPersonality = offsetInAugmentationData;
1038
7
      // This adds a single byte to the augmentation data for the encoding,
1039
7
      // then a number of bytes for the pointer data.
1040
7
      // FIXME: We are assuming 4 is correct here for the pointer size as we
1041
7
      // always currently use delta32ToGOT.
1042
7
      offsetInAugmentationData += 5;
1043
7
      ++idx;
1044
7
      continue;
1045
7
    }
1046
22
    if (augStr[idx] == 'R') {
1047
22
      cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1048
22
      // This adds a single byte to the augmentation data.
1049
22
      ++offsetInAugmentationData;
1050
22
      ++idx;
1051
22
      continue;
1052
22
    }
1053
0
    if (augStr[idx] == 'e') {
1054
0
      if (augStr[idx + 1] != 'h')
1055
0
        return llvm::make_error<GenericError>("expected 'eh' in "
1056
0
                                              "augmentation string");
1057
0
      cieInfo._mayHaveEH = true;
1058
0
      idx += 2;
1059
0
      continue;
1060
0
    }
1061
0
    ++idx;
1062
0
  }
1063
22
1064
22
  cieInfo._augmentationDataLength = offsetInAugmentationData;
1065
22
1066
22
  len = idx + 1;
1067
22
  return llvm::Error::success();
1068
22
}
1069
1070
static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1071
                              MachOFile &file,
1072
                              mach_o::ArchHandler &handler,
1073
                              const Section *ehFrameSection,
1074
                              MachODefinedAtom *atom,
1075
                              uint64_t offset,
1076
22
                              CIEInfoMap &cieInfos) {
1077
22
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1078
22
  const uint8_t *frameData = atom->rawContent().data();
1079
22
1080
22
  CIEInfo cieInfo;
1081
22
1082
22
  uint32_t size = read32(frameData, isBig);
1083
22
  uint64_t cieIDField = size == 0xffffffffU
1084
22
                          ? 
sizeof(uint32_t) + sizeof(uint64_t)0
1085
22
                          : sizeof(uint32_t);
1086
22
  uint64_t versionField = cieIDField + sizeof(uint32_t);
1087
22
  uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1088
22
1089
22
  unsigned augmentationStringLength = 0;
1090
22
  if (auto err = processAugmentationString(frameData + augmentationStringField,
1091
0
                                           cieInfo, augmentationStringLength))
1092
0
    return err;
1093
22
1094
22
  if (cieInfo._offsetOfPersonality != ~0U) {
1095
7
    // If we have augmentation data for the personality function, then we may
1096
7
    // need to implicitly generate its relocation.
1097
7
1098
7
    // Parse the EH Data field which is pointer sized.
1099
7
    uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1100
7
    const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1101
7
    unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? 
(is64 0
?
80
:
40
) : 0);
1102
7
1103
7
    // Parse Code Align Factor which is a ULEB128.
1104
7
    uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1105
7
    unsigned lengthFieldSize = 0;
1106
7
    llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1107
7
1108
7
    // Parse Data Align Factor which is a SLEB128.
1109
7
    uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1110
7
    llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1111
7
1112
7
    // Parse Return Address Register which is a byte.
1113
7
    uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1114
7
1115
7
    // Parse the augmentation length which is a ULEB128.
1116
7
    uint64_t AugmentationLengthField = ReturnAddressField + 1;
1117
7
    uint64_t AugmentationLength =
1118
7
      llvm::decodeULEB128(frameData + AugmentationLengthField,
1119
7
                          &lengthFieldSize);
1120
7
1121
7
    if (AugmentationLength != cieInfo._augmentationDataLength)
1122
0
      return llvm::make_error<GenericError>("CIE augmentation data length "
1123
0
                                            "mismatch");
1124
7
1125
7
    // Get the start address of the augmentation data.
1126
7
    uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1127
7
1128
7
    // Parse the personality function from the augmentation data.
1129
7
    uint64_t PersonalityField =
1130
7
      AugmentationDataField + cieInfo._offsetOfPersonality;
1131
7
1132
7
    // Parse the personality encoding.
1133
7
    // FIXME: Verify that this is a 32-bit pcrel offset.
1134
7
    uint64_t PersonalityFunctionField = PersonalityField + 1;
1135
7
1136
7
    if (atom->begin() != atom->end()) {
1137
3
      // If we have an explicit relocation, then make sure it matches this
1138
3
      // offset as this is where we'd expect it to be applied to.
1139
3
      DefinedAtom::reference_iterator CurrentRef = atom->begin();
1140
3
      if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1141
0
        return llvm::make_error<GenericError>("CIE personality reloc at "
1142
0
                                              "wrong offset");
1143
3
1144
3
      if (++CurrentRef != atom->end())
1145
0
        return llvm::make_error<GenericError>("CIE contains too many relocs");
1146
4
    } else {
1147
4
      // Implicitly generate the personality function reloc.  It's assumed to
1148
4
      // be a delta32 offset to a GOT entry.
1149
4
      // FIXME: Parse the encoding and check this.
1150
4
      int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1151
4
      uint64_t funcAddress = ehFrameSection->address + offset +
1152
4
                             PersonalityFunctionField;
1153
4
      funcAddress += funcDelta;
1154
4
1155
4
      const MachODefinedAtom *func = nullptr;
1156
4
      Reference::Addend addend;
1157
4
      func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1158
4
                                     addend);
1159
4
      atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1160
4
                         handler.unwindRefToPersonalityFunctionKind(),
1161
4
                         PersonalityFunctionField, func, addend);
1162
4
    }
1163
15
  } else if (atom->begin() != atom->end()) {
1164
0
    // Otherwise, we expect there to be no relocations in this atom as the only
1165
0
    // relocation would have been to the personality function.
1166
0
    return llvm::make_error<GenericError>("unexpected relocation in CIE");
1167
0
  }
1168
22
1169
22
1170
22
  cieInfos[atom] = std::move(cieInfo);
1171
22
1172
22
  return llvm::Error::success();
1173
22
}
1174
1175
static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1176
                              MachOFile &file,
1177
                              mach_o::ArchHandler &handler,
1178
                              const Section *ehFrameSection,
1179
                              MachODefinedAtom *atom,
1180
                              uint64_t offset,
1181
26
                              const CIEInfoMap &cieInfos) {
1182
26
1183
26
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1184
26
  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1185
26
1186
26
  // Compiler wasn't lazy and actually told us what it meant.
1187
26
  // Unfortunately, the compiler may not have generated references for all of
1188
26
  // [cie, func, lsda] and so we still need to parse the FDE and add references
1189
26
  // for any the compiler didn't generate.
1190
26
  if (atom->begin() != atom->end())
1191
4
    atom->sortReferences();
1192
26
1193
26
  DefinedAtom::reference_iterator CurrentRef = atom->begin();
1194
26
1195
26
  // This helper returns the reference (if one exists) at the offset we are
1196
26
  // currently processing.  It automatically increments the ref iterator if we
1197
26
  // do return a ref, and throws an error if we pass over a ref without
1198
26
  // comsuming it.
1199
26
  auto currentRefGetter = [&CurrentRef,
1200
60
                           &atom](uint64_t Offset)->const Reference* {
1201
60
    // If there are no more refs found, then we are done.
1202
60
    if (CurrentRef == atom->end())
1203
50
      return nullptr;
1204
10
1205
10
    const Reference *Ref = *CurrentRef;
1206
10
1207
10
    // If we haven't reached the offset for this reference, then return that
1208
10
    // we don't yet have a reference to process.
1209
10
    if (Offset < Ref->offsetInAtom())
1210
2
      return nullptr;
1211
8
1212
8
    // If the offset is equal, then we want to process this ref.
1213
8
    if (Offset == Ref->offsetInAtom()) {
1214
8
      ++CurrentRef;
1215
8
      return Ref;
1216
8
    }
1217
0
1218
0
    // The current ref is at an offset which is earlier than the current
1219
0
    // offset, then we failed to consume it when we should have.  In this case
1220
0
    // throw an error.
1221
0
    llvm::report_fatal_error("Skipped reference when processing FDE");
1222
0
  };
1223
26
1224
26
  // Helper to either get the reference at this current location, and verify
1225
26
  // that it is of the expected type, or add a reference of that type.
1226
26
  // Returns the reference target.
1227
26
  auto verifyOrAddReference = [&](uint64_t targetAddress,
1228
26
                                  Reference::KindValue refKind,
1229
26
                                  uint64_t refAddress,
1230
60
                                  bool allowsAddend)->const Atom* {
1231
60
    if (auto *ref = currentRefGetter(refAddress)) {
1232
8
      // The compiler already emitted a relocation for the CIE ref.  This should
1233
8
      // have been converted to the correct type of reference in
1234
8
      // get[Pair]ReferenceInfo().
1235
8
      assert(ref->kindValue() == refKind &&
1236
8
             "Incorrect EHFrame reference kind");
1237
8
      return ref->target();
1238
8
    }
1239
52
    Reference::Addend addend;
1240
52
    auto *target = findAtomCoveringAddress(normalizedFile, file,
1241
52
                                           targetAddress, addend);
1242
52
    atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1243
52
                       refKind, refAddress, target, addend);
1244
52
1245
52
    if (!allowsAddend)
1246
52
      assert(!addend && "EHFrame reference cannot have addend");
1247
52
    return target;
1248
52
  };
1249
26
1250
26
  const uint8_t *startFrameData = atom->rawContent().data();
1251
26
  const uint8_t *frameData = startFrameData;
1252
26
1253
26
  uint32_t size = read32(frameData, isBig);
1254
26
  uint64_t cieFieldInFDE = size == 0xffffffffU
1255
26
    ? 
sizeof(uint32_t) + sizeof(uint64_t)0
1256
26
    : sizeof(uint32_t);
1257
26
1258
26
  // Linker needs to fixup a reference from the FDE to its parent CIE (a
1259
26
  // 32-bit byte offset backwards in the __eh_frame section).
1260
26
  uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1261
26
  uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1262
26
  cieAddress -= cieDelta;
1263
26
1264
26
  auto *cieRefTarget = verifyOrAddReference(cieAddress,
1265
26
                                            handler.unwindRefToCIEKind(),
1266
26
                                            cieFieldInFDE, false);
1267
26
  const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1268
26
  assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1269
26
         "FDE's CIE field does not point at the start of a CIE.");
1270
26
1271
26
  const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1272
26
1273
26
  // Linker needs to fixup reference from the FDE to the function it's
1274
26
  // describing. FIXME: there are actually different ways to do this, and the
1275
26
  // particular method used is specified in the CIE's augmentation fields
1276
26
  // (hopefully)
1277
26
  uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1278
26
1279
26
  int64_t functionFromFDE = readSPtr(is64, isBig,
1280
26
                                     frameData + rangeFieldInFDE);
1281
26
  uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1282
26
  rangeStart += functionFromFDE;
1283
26
1284
26
  verifyOrAddReference(rangeStart,
1285
26
                       handler.unwindRefToFunctionKind(),
1286
26
                       rangeFieldInFDE, true);
1287
26
1288
26
  // Handle the augmentation data if there is any.
1289
26
  if (cieInfo._augmentationDataPresent) {
1290
26
    // First process the augmentation data length field.
1291
26
    uint64_t augmentationDataLengthFieldInFDE =
1292
26
      rangeFieldInFDE + 2 * (is64 ? 
sizeof(uint64_t)22
:
sizeof(uint32_t)4
);
1293
26
    unsigned lengthFieldSize = 0;
1294
26
    uint64_t augmentationDataLength =
1295
26
      llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1296
26
                          &lengthFieldSize);
1297
26
1298
26
    if (cieInfo._offsetOfLSDA != ~0U && 
augmentationDataLength > 08
) {
1299
8
1300
8
      // Look at the augmentation data field.
1301
8
      uint64_t augmentationDataFieldInFDE =
1302
8
        augmentationDataLengthFieldInFDE + lengthFieldSize;
1303
8
1304
8
      int64_t lsdaFromFDE = readSPtr(is64, isBig,
1305
8
                                     frameData + augmentationDataFieldInFDE);
1306
8
      uint64_t lsdaStart =
1307
8
        ehFrameSection->address + offset + augmentationDataFieldInFDE +
1308
8
        lsdaFromFDE;
1309
8
1310
8
      verifyOrAddReference(lsdaStart,
1311
8
                           handler.unwindRefToFunctionKind(),
1312
8
                           augmentationDataFieldInFDE, true);
1313
8
    }
1314
26
  }
1315
26
1316
26
  return llvm::Error::success();
1317
26
}
1318
1319
llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1320
                                 MachOFile &file,
1321
177
                                 mach_o::ArchHandler &handler) {
1322
177
1323
177
  const Section *ehFrameSection = nullptr;
1324
177
  for (auto &section : normalizedFile.sections)
1325
311
    if (section.segmentName == "__TEXT" &&
1326
311
        
section.sectionName == "__eh_frame"208
) {
1327
19
      ehFrameSection = &section;
1328
19
      break;
1329
19
    }
1330
177
1331
177
  // No __eh_frame so nothing to do.
1332
177
  if (!ehFrameSection)
1333
158
    return llvm::Error::success();
1334
19
1335
19
  llvm::Error ehFrameErr = llvm::Error::success();
1336
19
  CIEInfoMap cieInfos;
1337
19
1338
19
  file.eachAtomInSection(*ehFrameSection,
1339
48
                         [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1340
48
    assert(atom->contentType() == DefinedAtom::typeCFI);
1341
48
1342
48
    // Bail out if we've encountered an error.
1343
48
    if (ehFrameErr)
1344
0
      return;
1345
48
1346
48
    const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1347
48
    if (ArchHandler::isDwarfCIE(isBig, atom))
1348
22
      ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1349
22
                              atom, offset, cieInfos);
1350
26
    else
1351
26
      ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1352
26
                              atom, offset, cieInfos);
1353
48
  });
1354
19
1355
19
  return ehFrameErr;
1356
19
}
1357
1358
llvm::Error parseObjCImageInfo(const Section &sect,
1359
                               const NormalizedFile &normalizedFile,
1360
9
                               MachOFile &file) {
1361
9
1362
9
  //  struct objc_image_info  {
1363
9
  //    uint32_t  version;  // initially 0
1364
9
  //    uint32_t  flags;
1365
9
  //  };
1366
9
1367
9
  ArrayRef<uint8_t> content = sect.content;
1368
9
  if (content.size() != 8)
1369
1
    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1370
1
                                          sect.sectionName +
1371
1
                                          " in file " + file.path() +
1372
1
                                          " should be 8 bytes in size");
1373
8
1374
8
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1375
8
  uint32_t version = read32(content.data(), isBig);
1376
8
  if (version)
1377
1
    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1378
1
                                          sect.sectionName +
1379
1
                                          " in file " + file.path() +
1380
1
                                          " should have version=0");
1381
7
1382
7
  uint32_t flags = read32(content.data() + 4, isBig);
1383
7
  if (flags & (MachOLinkingContext::objc_supports_gc |
1384
7
               MachOLinkingContext::objc_gc_only))
1385
1
    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1386
1
                                          sect.sectionName +
1387
1
                                          " in file " + file.path() +
1388
1
                                          " uses GC.  This is not supported");
1389
6
1390
6
  if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1391
3
    file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1392
3
  else
1393
3
    file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1394
6
1395
6
  file.setSwiftVersion((flags >> 8) & 0xFF);
1396
6
1397
6
  return llvm::Error::success();
1398
6
}
1399
1400
/// Converts normalized mach-o file into an lld::File and lld::Atoms.
1401
llvm::Expected<std::unique_ptr<lld::File>>
1402
objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1403
154
              bool copyRefs) {
1404
154
  std::unique_ptr<MachOFile> file(new MachOFile(path));
1405
154
  if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1406
4
    return std::move(ec);
1407
150
  return std::unique_ptr<File>(std::move(file));
1408
150
}
1409
1410
llvm::Expected<std::unique_ptr<lld::File>>
1411
dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1412
113
             bool copyRefs) {
1413
113
  // Instantiate SharedLibraryFile object.
1414
113
  std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1415
113
  if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1416
0
    return std::move(ec);
1417
113
  return std::unique_ptr<File>(std::move(file));
1418
113
}
1419
1420
} // anonymous namespace
1421
1422
namespace normalized {
1423
1424
310
static bool isObjCImageInfo(const Section &sect) {
1425
310
  return (sect.segmentName == "__OBJC" && 
sect.sectionName == "__image_info"0
) ||
1426
310
    (sect.segmentName == "__DATA" && 
sect.sectionName == "__objc_imageinfo"75
);
1427
310
}
1428
1429
llvm::Error
1430
normalizedObjectToAtoms(MachOFile *file,
1431
                        const NormalizedFile &normalizedFile,
1432
181
                        bool copyRefs) {
1433
181
  LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1434
181
                          << file->path() << "\n");
1435
181
  bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1436
181
1437
181
  // Create atoms from each section.
1438
322
  for (auto &sect : normalizedFile.sections) {
1439
322
1440
322
    // If this is a debug-info section parse it specially.
1441
322
    if (isDebugInfoSection(sect))
1442
12
      continue;
1443
310
1444
310
    // If the file contains an objc_image_info struct, then we should parse the
1445
310
    // ObjC flags and Swift version.
1446
310
    if (isObjCImageInfo(sect)) {
1447
9
      if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1448
3
        return ec;
1449
6
      // We then skip adding atoms for this section as we use the ObjCPass to
1450
6
      // re-emit this data after it has been aggregated for all files.
1451
6
      continue;
1452
6
    }
1453
301
1454
301
    bool customSectionName;
1455
301
    DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1456
301
                                                            customSectionName);
1457
301
    if (auto ec =  processSection(atomType, sect, customSectionName,
1458
0
                                  normalizedFile, *file, scatterable, copyRefs))
1459
0
      return ec;
1460
301
  }
1461
181
  // Create atoms from undefined symbols.
1462
181
  
for (auto &sym : normalizedFile.undefinedSymbols)178
{
1463
115
    // Undefinded symbols with n_value != 0 are actually tentative definitions.
1464
115
    if (sym.value == Hex64(0)) {
1465
110
      file->addUndefinedAtom(sym.name, copyRefs);
1466
110
    } else {
1467
5
      file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1468
5
                                DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1469
5
                                copyRefs);
1470
5
    }
1471
115
  }
1472
178
1473
178
  // Convert mach-o relocations to References
1474
178
  std::unique_ptr<mach_o::ArchHandler> handler
1475
178
                                     = ArchHandler::create(normalizedFile.arch);
1476
319
  for (auto &sect : normalizedFile.sections) {
1477
319
    if (isDebugInfoSection(sect))
1478
12
      continue;
1479
307
    if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1480
1
                                       *file, *handler))
1481
1
      return ec;
1482
307
  }
1483
178
1484
178
  // Add additional arch-specific References
1485
491
  
file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void 177
{
1486
491
    handler->addAdditionalReferences(*atom);
1487
491
  });
1488
177
1489
177
  // Each __eh_frame section needs references to both __text (the function we're
1490
177
  // providing unwind info for) and itself (FDE -> CIE). These aren't
1491
177
  // represented in the relocations on some architectures, so we have to add
1492
177
  // them back in manually there.
1493
177
  if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1494
0
    return ec;
1495
177
1496
177
  // Process mach-o data-in-code regions array. That information is encoded in
1497
177
  // atoms as References at each transition point.
1498
177
  unsigned nextIndex = 0;
1499
177
  for (const DataInCode &entry : normalizedFile.dataInCode) {
1500
28
    ++nextIndex;
1501
28
    const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1502
28
    if (!s) {
1503
0
      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1504
0
                                                  + Twine(entry.offset)
1505
0
                                                  + ") is not in any section"));
1506
0
    }
1507
28
    uint64_t offsetInSect = entry.offset - s->address;
1508
28
    uint32_t offsetInAtom;
1509
28
    MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1510
28
                                                           &offsetInAtom);
1511
28
    if (offsetInAtom + entry.length > atom->size()) {
1512
0
      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1513
0
                                                  "(offset="
1514
0
                                                  + Twine(entry.offset)
1515
0
                                                  + ", length="
1516
0
                                                  + Twine(entry.length)
1517
0
                                                  + ") crosses atom boundary."));
1518
0
    }
1519
28
    // Add reference that marks start of data-in-code.
1520
28
    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1521
28
                       handler->dataInCodeTransitionStart(*atom),
1522
28
                       offsetInAtom, atom, entry.kind);
1523
28
1524
28
    // Peek at next entry, if it starts where this one ends, skip ending ref.
1525
28
    if (nextIndex < normalizedFile.dataInCode.size()) {
1526
23
      const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1527
23
      if (nextEntry.offset == (entry.offset + entry.length))
1528
18
        continue;
1529
10
    }
1530
10
1531
10
    // If data goes to end of function, skip ending ref.
1532
10
    if ((offsetInAtom + entry.length) == atom->size())
1533
2
      continue;
1534
8
1535
8
    // Add reference that marks end of data-in-code.
1536
8
    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1537
8
                       handler->dataInCodeTransitionEnd(*atom),
1538
8
                       offsetInAtom+entry.length, atom, 0);
1539
8
  }
1540
177
1541
177
  // Cache some attributes on the file for use later.
1542
177
  file->setFlags(normalizedFile.flags);
1543
177
  file->setArch(normalizedFile.arch);
1544
177
  file->setOS(normalizedFile.os);
1545
177
  file->setMinVersion(normalizedFile.minOSverson);
1546
177
  file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1547
177
1548
177
  // Sort references in each atom to their canonical order.
1549
496
  for (const DefinedAtom* defAtom : file->defined()) {
1550
496
    reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1551
496
  }
1552
177
1553
177
  if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1554
0
    return err;
1555
177
1556
177
  return llvm::Error::success();
1557
177
}
1558
1559
llvm::Error
1560
normalizedDylibToAtoms(MachODylibFile *file,
1561
                       const NormalizedFile &normalizedFile,
1562
117
                       bool copyRefs) {
1563
117
  file->setInstallName(normalizedFile.installName);
1564
117
  file->setCompatVersion(normalizedFile.compatVersion);
1565
117
  file->setCurrentVersion(normalizedFile.currentVersion);
1566
117
1567
117
  // Tell MachODylibFile object about all symbols it exports.
1568
117
  if (!normalizedFile.exportInfo.empty()) {
1569
110
    // If exports trie exists, use it instead of traditional symbol table.
1570
147
    for (const Export &exp : normalizedFile.exportInfo) {
1571
147
      bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1572
147
      // StringRefs from export iterator are ephemeral, so force copy.
1573
147
      file->addExportedSymbol(exp.name, weakDef, true);
1574
147
    }
1575
110
  } else {
1576
15
    for (auto &sym : normalizedFile.globalSymbols) {
1577
15
      assert((sym.scope & N_EXT) && "only expect external symbols here");
1578
15
      bool weakDef = (sym.desc & N_WEAK_DEF);
1579
15
      file->addExportedSymbol(sym.name, weakDef, copyRefs);
1580
15
    }
1581
7
  }
1582
117
  // Tell MachODylibFile object about all dylibs it re-exports.
1583
117
  for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1584
5
    if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1585
1
      file->addReExportedDylib(dep.path);
1586
5
  }
1587
117
  return llvm::Error::success();
1588
117
}
1589
1590
void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1591
                                          StringRef &segmentName,
1592
                                          StringRef &sectionName,
1593
                                          SectionType &sectionType,
1594
                                          SectionAttr &sectionAttrs,
1595
139
                                          bool &relocsToDefinedCanBeImplicit) {
1596
139
1597
139
  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1598
1.26k
                                 p->atomType != DefinedAtom::typeUnknown; 
++p1.12k
) {
1599
1.26k
    if (p->atomType != atomType)
1600
1.12k
      continue;
1601
139
    // Wild carded entries are ignored for reverse lookups.
1602
139
    if (p->segmentName.empty() || p->sectionName.empty())
1603
0
      continue;
1604
139
    segmentName = p->segmentName;
1605
139
    sectionName = p->sectionName;
1606
139
    sectionType = p->sectionType;
1607
139
    sectionAttrs = 0;
1608
139
    relocsToDefinedCanBeImplicit = false;
1609
139
    if (atomType == DefinedAtom::typeCode)
1610
61
      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1611
139
    if (atomType == DefinedAtom::typeCFI)
1612
10
      relocsToDefinedCanBeImplicit = true;
1613
139
    return;
1614
139
  }
1615
139
  
llvm_unreachable0
("content type not yet supported");
1616
139
}
1617
1618
llvm::Expected<std::unique_ptr<lld::File>>
1619
normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1620
267
                  bool copyRefs) {
1621
267
  switch (normalizedFile.fileType) {
1622
267
  case MH_DYLIB:
1623
113
  case MH_DYLIB_STUB:
1624
113
    return dylibToAtoms(normalizedFile, path, copyRefs);
1625
154
  case MH_OBJECT:
1626
154
    return objectToAtoms(normalizedFile, path, copyRefs);
1627
113
  default:
1628
0
    llvm_unreachable("unhandled MachO file type!");
1629
267
  }
1630
267
}
1631
1632
} // namespace normalized
1633
} // namespace mach_o
1634
} // namespace lld