Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
10
///
11
/// \file Converts from in-memory normalized mach-o to in-memory Atoms.
12
///
13
///                  +------------+
14
///                  | normalized |
15
///                  +------------+
16
///                        |
17
///                        |
18
///                        v
19
///                    +-------+
20
///                    | Atoms |
21
///                    +-------+
22
23
#include "ArchHandler.h"
24
#include "Atoms.h"
25
#include "File.h"
26
#include "MachONormalizedFile.h"
27
#include "MachONormalizedFileBinaryUtils.h"
28
#include "lld/Core/Error.h"
29
#include "lld/Core/LLVM.h"
30
#include "llvm/BinaryFormat/Dwarf.h"
31
#include "llvm/BinaryFormat/MachO.h"
32
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
33
#include "llvm/Support/DataExtractor.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/Error.h"
36
#include "llvm/Support/Format.h"
37
#include "llvm/Support/LEB128.h"
38
#include "llvm/Support/raw_ostream.h"
39
40
using namespace llvm::MachO;
41
using namespace lld::mach_o::normalized;
42
43
#define DEBUG_TYPE "normalized-file-to-atoms"
44
45
namespace lld {
46
namespace mach_o {
47
48
49
namespace { // anonymous
50
51
52
#define ENTRY(seg, sect, type, atomType) \
53
  {seg, sect, type, DefinedAtom::atomType }
54
55
struct MachORelocatableSectionToAtomType {
56
  StringRef                 segmentName;
57
  StringRef                 sectionName;
58
  SectionType               sectionType;
59
  DefinedAtom::ContentType  atomType;
60
};
61
62
const MachORelocatableSectionToAtomType sectsToAtomType[] = {
63
  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
64
  ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
65
  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
66
  ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
67
  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
68
  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
69
  ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
70
  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
71
  ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
72
  ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
73
  ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
74
  ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
75
  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
76
  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
77
  ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
78
  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
79
  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
80
  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
81
                                                          typeInitializerPtr),
82
  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
83
                                                          typeTerminatorPtr),
84
  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
85
                                                          typeGOT),
86
  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
87
  ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
88
                                                          typeGOT),
89
  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
90
  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
91
                                                          typeThunkTLV),
92
  ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
93
  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
94
                                                        typeTLVInitialZeroFill),
95
  ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
96
  ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
97
  ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
98
  ENTRY("__LD",   "__compact_unwind", S_REGULAR,
99
                                                         typeCompactUnwindInfo),
100
  ENTRY("",       "",                 S_REGULAR,          typeUnknown)
101
};
102
#undef ENTRY
103
104
105
/// Figures out ContentType of a mach-o section.
106
DefinedAtom::ContentType atomTypeFromSection(const Section &section,
107
301
                                             bool &customSectionName) {
108
301
  // First look for match of name and type. Empty names in table are wildcards.
109
301
  customSectionName = false;
110
301
  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
111
2.49k
                                 
p->atomType != DefinedAtom::typeUnknown2.49k
;
++p2.18k
) {
112
2.48k
    if (p->sectionType != section.type)
113
1.50k
      continue;
114
977
    
if (977
!p->segmentName.equals(section.segmentName) && 977
!p->segmentName.empty()578
)
115
571
      continue;
116
406
    
if (406
!p->sectionName.equals(section.sectionName) && 406
!p->sectionName.empty()122
)
117
115
      continue;
118
291
    
customSectionName = p->segmentName.empty() && 291
p->sectionName.empty()7
;
119
2.48k
    return p->atomType;
120
2.48k
  }
121
301
  // Look for code denoted by section attributes
122
10
  
if (10
section.attributes & S_ATTR_PURE_INSTRUCTIONS10
)
123
2
    return DefinedAtom::typeCode;
124
8
125
8
  return DefinedAtom::typeUnknown;
126
8
}
127
128
enum AtomizeModel {
129
  atomizeAtSymbols,
130
  atomizeFixedSize,
131
  atomizePointerSize,
132
  atomizeUTF8,
133
  atomizeUTF16,
134
  atomizeCFI,
135
  atomizeCU,
136
  atomizeCFString
137
};
138
139
/// Returns info on how to atomize a section of the specified ContentType.
140
void sectionParseInfo(DefinedAtom::ContentType atomType,
141
                      unsigned int &sizeMultiple,
142
                      DefinedAtom::Scope &scope,
143
                      DefinedAtom::Merge &merge,
144
301
                      AtomizeModel &atomizeModel) {
145
301
  struct ParseInfo {
146
301
    DefinedAtom::ContentType  atomType;
147
301
    unsigned int              sizeMultiple;
148
301
    DefinedAtom::Scope        scope;
149
301
    DefinedAtom::Merge        merge;
150
301
    AtomizeModel              atomizeModel;
151
301
  };
152
301
153
301
  #define ENTRY(type, size, scope, merge, model) \
154
5.41k
    {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
155
301
156
301
  static const ParseInfo parseInfo[] = {
157
301
    ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
158
301
                                                            atomizeAtSymbols),
159
301
    ENTRY(typeData,              1, scopeGlobal,          mergeNo,
160
301
                                                            atomizeAtSymbols),
161
301
    ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
162
301
                                                            atomizeAtSymbols),
163
301
    ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
164
301
                                                            atomizeAtSymbols),
165
301
    ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
166
301
                                                            atomizeAtSymbols),
167
301
    ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
168
301
                                                            atomizeUTF8),
169
301
    ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
170
301
                                                            atomizeUTF16),
171
301
    ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
172
301
                                                            atomizeCFI),
173
301
    ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
174
301
                                                            atomizeFixedSize),
175
301
    ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
176
301
                                                            atomizeFixedSize),
177
301
    ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
178
301
                                                            atomizeFixedSize),
179
301
    ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
180
301
                                                            atomizeCFString),
181
301
    ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
182
301
                                                            atomizePointerSize),
183
301
    ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
184
301
                                                            atomizePointerSize),
185
301
    ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
186
301
                                                            atomizeCU),
187
301
    ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
188
301
                                                            atomizePointerSize),
189
301
    ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
190
301
                                                            atomizePointerSize),
191
301
    ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
192
301
                                                            atomizeAtSymbols)
193
301
  };
194
301
  #undef ENTRY
195
301
  const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
196
1.46k
  for (int i=0; 
i < tableLen1.46k
;
++i1.16k
) {
197
1.45k
    if (
parseInfo[i].atomType == atomType1.45k
) {
198
285
      sizeMultiple = parseInfo[i].sizeMultiple;
199
285
      scope        = parseInfo[i].scope;
200
285
      merge        = parseInfo[i].merge;
201
285
      atomizeModel = parseInfo[i].atomizeModel;
202
285
      return;
203
285
    }
204
1.45k
  }
205
301
206
301
  // Unknown type is atomized by symbols.
207
16
  sizeMultiple = 1;
208
16
  scope = DefinedAtom::scopeGlobal;
209
16
  merge = DefinedAtom::mergeNo;
210
16
  atomizeModel = atomizeAtSymbols;
211
16
}
212
213
214
443
Atom::Scope atomScope(uint8_t scope) {
215
443
  switch (scope) {
216
272
  case N_EXT:
217
272
    return Atom::scopeGlobal;
218
42
  case N_PEXT:
219
42
  case N_PEXT | N_EXT:
220
42
    return Atom::scopeLinkageUnit;
221
129
  case 0:
222
129
    return Atom::scopeTranslationUnit;
223
0
  }
224
0
  
llvm_unreachable0
("unknown scope value!");
225
0
}
226
227
void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
228
                            uint32_t sectionIndex,
229
454
                            SmallVector<const Symbol *, 64> &outSyms) {
230
640
  for (const Symbol &sym : inSymbols) {
231
640
    // Only look at definition symbols.
232
640
    if ((sym.type & N_TYPE) != N_SECT)
233
0
      continue;
234
640
    
if (640
sym.sect != sectionIndex640
)
235
281
      continue;
236
359
    outSyms.push_back(&sym);
237
359
  }
238
454
}
239
240
void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
241
                    MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
242
                    uint16_t symbolDescFlags, Atom::Scope symbolScope,
243
367
                    uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
244
367
  // Mach-O symbol table does have size in it. Instead the size is the
245
367
  // difference between this and the next symbol.
246
367
  uint64_t size = nextSymbolAddr - symbolAddr;
247
367
  uint64_t offset = symbolAddr - section.address;
248
355
  bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
249
367
  if (
isZeroFillSection(section.type)367
) {
250
3
    file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
251
3
                                noDeadStrip, copyRefs, &section);
252
367
  } else {
253
364
    DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
254
364
                              ? 
DefinedAtom::mergeAsWeak6
:
DefinedAtom::mergeNo358
;
255
364
    bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
256
364
    if (
atomType == DefinedAtom::typeUnknown364
) {
257
15
      // Mach-O needs a segment and section name.  Concatentate those two
258
15
      // with a / separator (e.g. "seg/sect") to fit into the lld model
259
15
      // of just a section name.
260
15
      std::string segSectName = section.segmentName.str()
261
15
                                + "/" + section.sectionName.str();
262
15
      file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
263
15
                                         merge, thumb, noDeadStrip, offset,
264
15
                                         size, segSectName, true, &section);
265
364
    } else {
266
349
      if ((atomType == lld::DefinedAtom::typeCode) &&
267
349
          
(symbolDescFlags & N_SYMBOL_RESOLVER)272
) {
268
2
        atomType = lld::DefinedAtom::typeResolver;
269
2
      }
270
349
      file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
271
349
                          offset, size, thumb, noDeadStrip, copyRefs, &section);
272
349
    }
273
364
  }
274
367
}
275
276
llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
277
                                   const Section &section,
278
                                   const NormalizedFile &normalizedFile,
279
                                   MachOFile &file, bool scatterable,
280
227
                                   bool copyRefs) {
281
227
  // Find section's index.
282
227
  uint32_t sectIndex = 1;
283
309
  for (auto &sect : normalizedFile.sections) {
284
309
    if (&sect == &section)
285
227
      break;
286
82
    ++sectIndex;
287
82
  }
288
227
289
227
  // Find all symbols in this section.
290
227
  SmallVector<const Symbol *, 64> symbols;
291
227
  appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
292
227
  appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);
293
227
294
227
  // Sort symbols.
295
227
  std::sort(symbols.begin(), symbols.end(),
296
205
            [](const Symbol *lhs, const Symbol *rhs) -> bool {
297
205
              if (lhs == rhs)
298
0
                return false;
299
205
              // First by address.
300
205
              uint64_t lhsAddr = lhs->value;
301
205
              uint64_t rhsAddr = rhs->value;
302
205
              if (lhsAddr != rhsAddr)
303
160
                return lhsAddr < rhsAddr;
304
45
               // If same address, one is an alias so sort by scope.
305
45
              Atom::Scope lScope = atomScope(lhs->scope);
306
45
              Atom::Scope rScope = atomScope(rhs->scope);
307
45
              if (lScope != rScope)
308
34
                return lScope < rScope;
309
11
              // If same address and scope, see if one might be better as
310
11
              // the alias.
311
11
              bool lPrivate = (lhs->name.front() == 'l');
312
11
              bool rPrivate = (rhs->name.front() == 'l');
313
11
              if (lPrivate != rPrivate)
314
4
                return lPrivate;
315
7
              // If same address and scope, sort by name.
316
7
              return lhs->name < rhs->name;
317
7
            });
318
227
319
227
  // Debug logging of symbols.
320
227
  //for (const Symbol *sym : symbols)
321
227
  //  llvm::errs() << "  sym: "
322
227
  //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
323
227
  //    << ", " << sym->name << "\n";
324
227
325
227
  // If section has no symbols and no content, there are no atoms.
326
227
  if (
symbols.empty() && 227
section.content.empty()11
)
327
3
    return llvm::Error::success();
328
224
329
224
  
if (224
symbols.empty()224
) {
330
8
    // Section has no symbols, put all content in one anoymous atom.
331
8
    atomFromSymbol(atomType, section, file, section.address, StringRef(),
332
8
                  0, Atom::scopeTranslationUnit,
333
8
                  section.address + section.content.size(),
334
8
                  scatterable, copyRefs);
335
8
  }
336
216
  else 
if (216
symbols.front()->value != section.address216
) {
337
11
    // Section has anonymous content before first symbol.
338
11
    atomFromSymbol(atomType, section, file, section.address, StringRef(),
339
11
                   0, Atom::scopeTranslationUnit, symbols.front()->value,
340
11
                   scatterable, copyRefs);
341
11
  }
342
224
343
224
  const Symbol *lastSym = nullptr;
344
359
  for (const Symbol *sym : symbols) {
345
359
    if (
lastSym != nullptr359
) {
346
143
      // Ignore any assembler added "ltmpNNN" symbol at start of section
347
143
      // if there is another symbol at the start.
348
143
      if ((lastSym->value != sym->value)
349
19
          || lastSym->value != section.address
350
143
          || 
!lastSym->name.startswith("ltmp")11
) {
351
132
        atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
352
132
                       lastSym->desc, atomScope(lastSym->scope), sym->value,
353
132
                       scatterable, copyRefs);
354
132
      }
355
143
    }
356
359
    lastSym = sym;
357
359
  }
358
224
  if (
lastSym != nullptr224
) {
359
216
    atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
360
216
                   lastSym->desc, atomScope(lastSym->scope),
361
216
                   section.address + section.content.size(),
362
216
                   scatterable, copyRefs);
363
216
  }
364
224
365
224
  // If object built without .subsections_via_symbols, add reference chain.
366
224
  if (
!scatterable224
) {
367
21
    MachODefinedAtom *prevAtom = nullptr;
368
21
    file.eachAtomInSection(section,
369
27
                           [&](MachODefinedAtom *atom, uint64_t offset)->void {
370
27
      if (prevAtom)
371
6
        prevAtom->addReference(Reference::KindNamespace::all,
372
6
                               Reference::KindArch::all,
373
6
                               Reference::kindLayoutAfter, 0, atom, 0);
374
27
      prevAtom = atom;
375
27
    });
376
21
  }
377
227
378
227
  return llvm::Error::success();
379
227
}
380
381
llvm::Error processSection(DefinedAtom::ContentType atomType,
382
                           const Section &section,
383
                           bool customSectionName,
384
                           const NormalizedFile &normalizedFile,
385
                           MachOFile &file, bool scatterable,
386
301
                           bool copyRefs) {
387
301
  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
388
301
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
389
301
390
301
  // Get info on how to atomize section.
391
301
  unsigned int       sizeMultiple;
392
301
  DefinedAtom::Scope scope;
393
301
  DefinedAtom::Merge merge;
394
301
  AtomizeModel       atomizeModel;
395
301
  sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
396
301
397
301
  // Validate section size.
398
301
  if ((section.content.size() % sizeMultiple) != 0)
399
0
    return llvm::make_error<GenericError>(Twine("Section ")
400
0
                                          + section.segmentName
401
0
                                          + "/" + section.sectionName
402
0
                                          + " has size ("
403
0
                                          + Twine(section.content.size())
404
0
                                          + ") which is not a multiple of "
405
0
                                          + Twine(sizeMultiple));
406
301
407
301
  
if (301
atomizeModel == atomizeAtSymbols301
) {
408
227
    // Break section up into atoms each with a fixed size.
409
227
    return processSymboledSection(atomType, section, normalizedFile, file,
410
227
                                  scatterable, copyRefs);
411
0
  } else {
412
74
    unsigned int size;
413
202
    for (unsigned int offset = 0, e = section.content.size(); 
offset != e202
;) {
414
128
      switch (atomizeModel) {
415
6
      case atomizeFixedSize:
416
6
        // Break section up into atoms each with a fixed size.
417
6
        size = sizeMultiple;
418
6
        break;
419
14
      case atomizePointerSize:
420
14
        // Break section up into atoms each the size of a pointer.
421
14
        size = is64 ? 
87
:
47
;
422
14
        break;
423
26
      case atomizeUTF8:
424
26
        // Break section up into zero terminated c-strings.
425
26
        size = 0;
426
176
        for (unsigned int i = offset; 
i < e176
;
++i150
) {
427
176
          if (
section.content[i] == 0176
) {
428
26
            size = i + 1 - offset;
429
26
            break;
430
26
          }
431
176
        }
432
26
        break;
433
2
      case atomizeUTF16:
434
2
        // Break section up into zero terminated UTF16 strings.
435
2
        size = 0;
436
11
        for (unsigned int i = offset; 
i < e11
;
i += 29
) {
437
11
          if (
(section.content[i] == 0) && 11
(section.content[i + 1] == 0)2
) {
438
2
            size = i + 2 - offset;
439
2
            break;
440
2
          }
441
11
        }
442
2
        break;
443
48
      case atomizeCFI:
444
48
        // Break section up into dwarf unwind CFIs (FDE or CIE).
445
48
        size = read32(&section.content[offset], isBig) + 4;
446
48
        if (
offset+size > section.content.size()48
) {
447
0
          return llvm::make_error<GenericError>(Twine("Section ")
448
0
                                                + section.segmentName
449
0
                                                + "/" + section.sectionName
450
0
                                                + " is malformed.  Size of CFI "
451
0
                                                "starting at offset ("
452
0
                                                + Twine(offset)
453
0
                                                + ") is past end of section.");
454
0
        }
455
48
        break;
456
28
      case atomizeCU:
457
28
        // Break section up into compact unwind entries.
458
28
        size = is64 ? 
3226
:
202
;
459
28
        break;
460
4
      case atomizeCFString:
461
4
        // Break section up into NS/CFString objects.
462
4
        size = is64 ? 
322
:
162
;
463
4
        break;
464
0
      case atomizeAtSymbols:
465
0
        break;
466
128
      }
467
128
      
if (128
size == 0128
) {
468
0
        return llvm::make_error<GenericError>(Twine("Section ")
469
0
                                              + section.segmentName
470
0
                                              + "/" + section.sectionName
471
0
                                              + " is malformed.  The last atom "
472
0
                                              "is not zero terminated.");
473
0
      }
474
128
      
if (128
customSectionName128
) {
475
8
        // Mach-O needs a segment and section name.  Concatentate those two
476
8
        // with a / separator (e.g. "seg/sect") to fit into the lld model
477
8
        // of just a section name.
478
8
        std::string segSectName = section.segmentName.str()
479
8
                                  + "/" + section.sectionName.str();
480
8
        file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
481
8
                                           merge, false, false, offset,
482
8
                                           size, segSectName, true, &section);
483
128
      } else {
484
120
        file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
485
120
                            false, false, copyRefs, &section);
486
120
      }
487
128
      offset += size;
488
128
    }
489
74
  }
490
74
  return llvm::Error::success();
491
301
}
492
493
const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
494
238
                                          uint64_t address) {
495
382
  for (const Section &s : normalizedFile.sections) {
496
382
    uint64_t sAddr = s.address;
497
382
    if (
(sAddr <= address) && 382
(address < sAddr+s.content.size())382
) {
498
238
      return &s;
499
238
    }
500
0
  }
501
0
  return nullptr;
502
0
}
503
504
const MachODefinedAtom *
505
findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
506
56
                        uint64_t addr, Reference::Addend &addend) {
507
56
  const Section *sect = nullptr;
508
56
  sect = findSectionCoveringAddress(normalizedFile, addr);
509
56
  if (!sect)
510
0
    return nullptr;
511
56
512
56
  uint32_t offsetInTarget;
513
56
  uint64_t offsetInSect = addr - sect->address;
514
56
  auto atom =
515
56
      file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
516
56
  addend = offsetInTarget;
517
56
  return atom;
518
56
}
519
520
// Walks all relocations for a section in a normalized .o file and
521
// creates corresponding lld::Reference objects.
522
llvm::Error convertRelocs(const Section &section,
523
                          const NormalizedFile &normalizedFile,
524
                          bool scatterable,
525
                          MachOFile &file,
526
307
                          ArchHandler &handler) {
527
307
  // Utility function for ArchHandler to find atom by its address.
528
307
  auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
529
307
                         const lld::Atom **atom, Reference::Addend *addend)
530
268
                         -> llvm::Error {
531
268
    if (sectIndex > normalizedFile.sections.size())
532
0
      return llvm::make_error<GenericError>(Twine("out of range section "
533
0
                                     "index (") + Twine(sectIndex) + ")");
534
268
    const Section *sect = nullptr;
535
268
    if (
sectIndex == 0268
) {
536
154
      sect = findSectionCoveringAddress(normalizedFile, addr);
537
154
      if (!sect)
538
0
        return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
539
0
                                       + ") is not in any section"));
540
114
    } else {
541
114
      sect = &normalizedFile.sections[sectIndex-1];
542
114
    }
543
268
    uint32_t offsetInTarget;
544
268
    uint64_t offsetInSect = addr - sect->address;
545
268
    *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
546
268
    *addend = offsetInTarget;
547
268
    return llvm::Error::success();
548
268
  };
549
307
550
307
  // Utility function for ArchHandler to find atom by its symbol index.
551
307
  auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
552
291
                           -> llvm::Error {
553
291
    // Find symbol from index.
554
291
    const Symbol *sym = nullptr;
555
291
    uint32_t numStabs  = normalizedFile.stabsSymbols.size();
556
291
    uint32_t numLocal  = normalizedFile.localSymbols.size();
557
291
    uint32_t numGlobal = normalizedFile.globalSymbols.size();
558
291
    uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
559
291
    assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
560
291
    if (
symbolIndex < numStabs+numLocal291
) {
561
76
      sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
562
291
    } else 
if (215
symbolIndex < numStabs+numLocal+numGlobal215
) {
563
26
      sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
564
215
    } else 
if (189
symbolIndex < numStabs+numLocal+numGlobal+numUndef189
) {
565
189
      sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
566
189
                                             numGlobal];
567
189
    } else {
568
0
      return llvm::make_error<GenericError>(Twine("symbol index (")
569
0
                                     + Twine(symbolIndex) + ") out of range");
570
0
    }
571
291
572
291
    // Find atom from symbol.
573
291
    
if (291
(sym->type & N_TYPE) == N_SECT291
) {
574
102
      if (sym->sect > normalizedFile.sections.size())
575
0
        return llvm::make_error<GenericError>(Twine("symbol section index (")
576
0
                                        + Twine(sym->sect) + ") out of range ");
577
102
      const Section &symSection = normalizedFile.sections[sym->sect-1];
578
102
      uint64_t targetOffsetInSect = sym->value - symSection.address;
579
102
      MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
580
102
                                                            targetOffsetInSect);
581
102
      if (
target102
) {
582
102
        *result = target;
583
102
        return llvm::Error::success();
584
102
      }
585
0
      return llvm::make_error<GenericError>("no atom found for defined symbol");
586
189
    } else 
if (189
(sym->type & N_TYPE) == N_UNDF189
) {
587
189
      const lld::Atom *target = file.findUndefAtom(sym->name);
588
189
      if (
target189
) {
589
189
        *result = target;
590
189
        return llvm::Error::success();
591
189
      }
592
0
      return llvm::make_error<GenericError>("no undefined atom found for sym");
593
0
    } else {
594
0
      // Search undefs
595
0
      return llvm::make_error<GenericError>("no atom found for symbol");
596
0
    }
597
0
  };
598
307
599
307
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
600
307
  // Use old-school iterator so that paired relocations can be grouped.
601
307
  for (auto it=section.relocations.begin(), e=section.relocations.end();
602
781
                                                                
it != e781
;
++it474
) {
603
475
    const Relocation &reloc = *it;
604
475
    // Find atom this relocation is in.
605
475
    if (reloc.offset > section.content.size())
606
0
      return llvm::make_error<GenericError>(
607
0
                                    Twine("r_address (") + Twine(reloc.offset)
608
0
                                    + ") is larger than section size ("
609
0
                                    + Twine(section.content.size()) + ")");
610
475
    uint32_t offsetInAtom;
611
475
    MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
612
475
                                                            reloc.offset,
613
475
                                                            &offsetInAtom);
614
475
    assert(inAtom && "r_address in range, should have found atom");
615
475
    uint64_t fixupAddress = section.address + reloc.offset;
616
475
617
475
    const lld::Atom *target = nullptr;
618
475
    Reference::Addend addend = 0;
619
475
    Reference::KindValue kind;
620
475
    if (
handler.isPairedReloc(reloc)475
) {
621
157
      // Handle paired relocations together.
622
157
      const Relocation &reloc2 = *++it;
623
157
      auto relocErr = handler.getPairReferenceInfo(
624
157
          reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
625
157
          atomByAddr, atomBySymbol, &kind, &target, &addend);
626
157
      if (
relocErr157
) {
627
1
        return handleErrors(std::move(relocErr),
628
1
                            [&](std::unique_ptr<GenericError> GE) {
629
1
          return llvm::make_error<GenericError>(
630
1
            Twine("bad relocation (") + GE->getMessage()
631
1
             + ") in section "
632
1
             + section.segmentName + "/" + section.sectionName
633
1
             + " (r1_address=" + Twine::utohexstr(reloc.offset)
634
1
             + ", r1_type=" + Twine(reloc.type)
635
1
             + ", r1_extern=" + Twine(reloc.isExtern)
636
1
             + ", r1_length=" + Twine((int)reloc.length)
637
1
             + ", r1_pcrel=" + Twine(reloc.pcRel)
638
1
             + (!reloc.scattered ? (Twine(", r1_symbolnum=")
639
1
                                    + Twine(reloc.symbol))
640
0
                                 : (Twine(", r1_scattered=1, r1_value=")
641
0
                                    + Twine(reloc.value)))
642
1
             + ")"
643
1
             + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
644
1
             + ", r2_type=" + Twine(reloc2.type)
645
1
             + ", r2_extern=" + Twine(reloc2.isExtern)
646
1
             + ", r2_length=" + Twine((int)reloc2.length)
647
1
             + ", r2_pcrel=" + Twine(reloc2.pcRel)
648
1
             + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
649
1
                                     + Twine(reloc2.symbol))
650
0
                                  : (Twine(", r2_scattered=1, r2_value=")
651
0
                                     + Twine(reloc2.value)))
652
1
             + ")" );
653
1
          });
654
1
      }
655
475
    }
656
318
    else {
657
318
      // Use ArchHandler to convert relocation record into information
658
318
      // needed to instantiate an lld::Reference object.
659
318
      auto relocErr = handler.getReferenceInfo(
660
318
          reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
661
318
          atomBySymbol, &kind, &target, &addend);
662
318
      if (
relocErr318
) {
663
0
        return handleErrors(std::move(relocErr),
664
0
                            [&](std::unique_ptr<GenericError> GE) {
665
0
          return llvm::make_error<GenericError>(
666
0
            Twine("bad relocation (") + GE->getMessage()
667
0
             + ") in section "
668
0
             + section.segmentName + "/" + section.sectionName
669
0
             + " (r_address=" + Twine::utohexstr(reloc.offset)
670
0
             + ", r_type=" + Twine(reloc.type)
671
0
             + ", r_extern=" + Twine(reloc.isExtern)
672
0
             + ", r_length=" + Twine((int)reloc.length)
673
0
             + ", r_pcrel=" + Twine(reloc.pcRel)
674
0
             + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
675
0
                                 : (Twine(", r_scattered=1, r_value=")
676
0
                                    + Twine(reloc.value)))
677
0
             + ")" );
678
0
          });
679
0
      }
680
474
    }
681
474
    // Instantiate an lld::Reference object and add to its atom.
682
474
    inAtom->addReference(Reference::KindNamespace::mach_o,
683
474
                         handler.kindArch(),
684
474
                         kind, offsetInAtom, target, addend);
685
474
  }
686
307
687
306
  return llvm::Error::success();
688
307
}
689
690
641
bool isDebugInfoSection(const Section &section) {
691
641
  if ((section.attributes & S_ATTR_DEBUG) == 0)
692
603
    return false;
693
38
  return section.segmentName.equals("__DWARF");
694
38
}
695
696
0
static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
697
0
  std::string strName = name.str();
698
0
  for (auto *atom : file.defined())
699
0
    
if (0
atom->name() == strName0
)
700
0
      return atom;
701
0
  return nullptr;
702
0
}
703
704
2
static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
705
2
  char *strCopy = alloc.Allocate<char>(str.size() + 1);
706
2
  memcpy(strCopy, str.data(), str.size());
707
2
  strCopy[str.size()] = '\0';
708
2
  return strCopy;
709
2
}
710
711
llvm::Error parseStabs(MachOFile &file,
712
                       const NormalizedFile &normalizedFile,
713
176
                       bool copyRefs) {
714
176
715
176
  if (normalizedFile.stabsSymbols.empty())
716
176
    return llvm::Error::success();
717
0
718
0
  // FIXME: Kill this off when we can move to sane yaml parsing.
719
0
  std::unique_ptr<BumpPtrAllocator> allocator;
720
0
  if (copyRefs)
721
0
    allocator = llvm::make_unique<BumpPtrAllocator>();
722
0
723
0
  enum { start, inBeginEnd } state = start;
724
0
725
0
  const Atom *currentAtom = nullptr;
726
0
  uint64_t currentAtomAddress = 0;
727
0
  StabsDebugInfo::StabsList stabsList;
728
0
  for (const auto &stabSym : normalizedFile.stabsSymbols) {
729
0
    Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
730
0
              stabSym.value, stabSym.name);
731
0
    switch (state) {
732
0
    case start:
733
0
      switch (static_cast<StabType>(stabSym.type)) {
734
0
      case N_BNSYM:
735
0
        state = inBeginEnd;
736
0
        currentAtomAddress = stabSym.value;
737
0
        Reference::Addend addend;
738
0
        currentAtom = findAtomCoveringAddress(normalizedFile, file,
739
0
                                              currentAtomAddress, addend);
740
0
        if (addend != 0)
741
0
          return llvm::make_error<GenericError>(
742
0
                   "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
743
0
                   file.path());
744
0
        
if (0
currentAtom0
)
745
0
          stab.atom = currentAtom;
746
0
        else {
747
0
          // FIXME: ld64 just issues a warning here - should we match that?
748
0
          return llvm::make_error<GenericError>(
749
0
                   "can't find atom for stabs BNSYM at " +
750
0
                   Twine::utohexstr(stabSym.value) + " in " + file.path());
751
0
        }
752
0
        break;
753
0
      case N_SO:
754
0
      case N_OSO:
755
0
        // Not associated with an atom, just copy.
756
0
        if (copyRefs)
757
0
          stab.str = copyDebugString(stabSym.name, *allocator);
758
0
        else
759
0
          stab.str = stabSym.name;
760
0
        break;
761
0
      case N_GSYM: {
762
0
        auto colonIdx = stabSym.name.find(':');
763
0
        if (
colonIdx != StringRef::npos0
) {
764
0
          StringRef name = stabSym.name.substr(0, colonIdx);
765
0
          currentAtom = findDefinedAtomByName(file, "_" + name);
766
0
          stab.atom = currentAtom;
767
0
          if (copyRefs)
768
0
            stab.str = copyDebugString(stabSym.name, *allocator);
769
0
          else
770
0
            stab.str = stabSym.name;
771
0
        } else {
772
0
          currentAtom = findDefinedAtomByName(file, stabSym.name);
773
0
          stab.atom = currentAtom;
774
0
          if (copyRefs)
775
0
            stab.str = copyDebugString(stabSym.name, *allocator);
776
0
          else
777
0
            stab.str = stabSym.name;
778
0
        }
779
0
        if (stab.atom == nullptr)
780
0
          return llvm::make_error<GenericError>(
781
0
                   "can't find atom for N_GSYM stabs" + stabSym.name +
782
0
                   " in " + file.path());
783
0
        break;
784
0
      }
785
0
      case N_FUN:
786
0
        return llvm::make_error<GenericError>(
787
0
                 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
788
0
      default:
789
0
        return llvm::make_error<GenericError>(
790
0
                 "unrecognized stab symbol '" + stabSym.name + "'");
791
0
      }
792
0
      break;
793
0
    case inBeginEnd:
794
0
      stab.atom = currentAtom;
795
0
      switch (static_cast<StabType>(stabSym.type)) {
796
0
      case N_ENSYM:
797
0
        state = start;
798
0
        currentAtom = nullptr;
799
0
        break;
800
0
      case N_FUN:
801
0
        // Just copy the string.
802
0
        if (copyRefs)
803
0
          stab.str = copyDebugString(stabSym.name, *allocator);
804
0
        else
805
0
          stab.str = stabSym.name;
806
0
        break;
807
0
      default:
808
0
        return llvm::make_error<GenericError>(
809
0
                 "unrecognized stab symbol '" + stabSym.name + "'");
810
0
      }
811
0
    }
812
0
    llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
813
0
    stabsList.push_back(stab);
814
0
  }
815
0
816
0
  file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
817
0
818
0
  // FIXME: Kill this off when we fix YAML memory ownership.
819
0
  file.debugInfo()->setAllocator(std::move(allocator));
820
0
821
0
  return llvm::Error::success();
822
176
}
823
824
static llvm::DataExtractor
825
dataExtractorFromSection(const NormalizedFile &normalizedFile,
826
4
                         const Section &S) {
827
4
  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
828
4
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
829
4
  StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
830
4
                    S.content.size());
831
4
  return llvm::DataExtractor(SecData, !isBig, is64 ? 
84
:
40
);
832
4
}
833
834
// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
835
//        inspection" code if possible.
836
static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
837
1
                                  uint64_t abbrCode) {
838
1
  uint64_t curCode;
839
1
  uint32_t offset = 0;
840
1
  while (
(curCode = abbrevData.getULEB128(&offset)) != abbrCode1
) {
841
0
    // Tag
842
0
    abbrevData.getULEB128(&offset);
843
0
    // DW_CHILDREN
844
0
    abbrevData.getU8(&offset);
845
0
    // Attributes
846
0
    while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
847
0
      ;
848
0
  }
849
1
  return offset;
850
1
}
851
852
// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
853
//        inspection" code if possible.
854
static Expected<const char *>
855
getIndexedString(const NormalizedFile &normalizedFile,
856
                 llvm::dwarf::Form form, llvm::DataExtractor infoData,
857
2
                 uint32_t &infoOffset, const Section &stringsSection) {
858
2
  if (form == llvm::dwarf::DW_FORM_string)
859
0
   return infoData.getCStr(&infoOffset);
860
2
  
if (2
form != llvm::dwarf::DW_FORM_strp2
)
861
0
    return llvm::make_error<GenericError>(
862
0
        "string field encoded without DW_FORM_strp");
863
2
  uint32_t stringOffset = infoData.getU32(&infoOffset);
864
2
  llvm::DataExtractor stringsData =
865
2
    dataExtractorFromSection(normalizedFile, stringsSection);
866
2
  return stringsData.getCStr(&stringOffset);
867
2
}
868
869
// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
870
//        inspection" code if possible.
871
static llvm::Expected<TranslationUnitSource>
872
readCompUnit(const NormalizedFile &normalizedFile,
873
             const Section &info,
874
             const Section &abbrev,
875
             const Section &strings,
876
1
             StringRef path) {
877
1
  // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
878
1
  //        inspection" code if possible.
879
1
  uint32_t offset = 0;
880
1
  llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
881
1
  auto infoData = dataExtractorFromSection(normalizedFile, info);
882
1
  uint32_t length = infoData.getU32(&offset);
883
1
  if (
length == 0xffffffff1
) {
884
0
    Format = llvm::dwarf::DwarfFormat::DWARF64;
885
0
    infoData.getU64(&offset);
886
0
  }
887
1
  else 
if (1
length > 0xffffff001
)
888
0
    return llvm::make_error<GenericError>("Malformed DWARF in " + path);
889
1
890
1
  uint16_t version = infoData.getU16(&offset);
891
1
892
1
  if (
version < 2 || 1
version > 41
)
893
0
    return llvm::make_error<GenericError>("Unsupported DWARF version in " +
894
0
                                          path);
895
1
896
1
  infoData.getU32(&offset); // Abbrev offset (should be zero)
897
1
  uint8_t addrSize = infoData.getU8(&offset);
898
1
899
1
  uint32_t abbrCode = infoData.getULEB128(&offset);
900
1
  auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
901
1
  uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
902
1
  uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
903
1
  if (tag != llvm::dwarf::DW_TAG_compile_unit)
904
0
    return llvm::make_error<GenericError>("top level DIE is not a compile unit");
905
1
  // DW_CHILDREN
906
1
  abbrevData.getU8(&abbrevOffset);
907
1
  uint32_t name;
908
1
  llvm::dwarf::Form form;
909
1
  llvm::DWARFFormParams formParams = {version, addrSize, Format};
910
1
  TranslationUnitSource tu;
911
8
  while ((name = abbrevData.getULEB128(&abbrevOffset)) |
912
8
         (form = static_cast<llvm::dwarf::Form>(
913
8
             abbrevData.getULEB128(&abbrevOffset))) &&
914
8
         
(name != 0 || 7
form != 00
)) {
915
7
    switch (name) {
916
1
    case llvm::dwarf::DW_AT_name: {
917
1
      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
918
1
                                        strings))
919
1
          tu.name = *eName;
920
1
      else
921
0
        return eName.takeError();
922
1
      break;
923
1
    }
924
1
    case llvm::dwarf::DW_AT_comp_dir: {
925
1
      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
926
1
                                        strings))
927
1
        tu.path = *eName;
928
1
      else
929
0
        return eName.takeError();
930
1
      break;
931
1
    }
932
5
    default:
933
5
      llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
934
7
    }
935
7
  }
936
1
  return tu;
937
1
}
938
939
llvm::Error parseDebugInfo(MachOFile &file,
940
177
                           const NormalizedFile &normalizedFile, bool copyRefs) {
941
177
942
177
  // Find the interesting debug info sections.
943
177
  const Section *debugInfo = nullptr;
944
177
  const Section *debugAbbrev = nullptr;
945
177
  const Section *debugStrings = nullptr;
946
177
947
317
  for (auto &s : normalizedFile.sections) {
948
317
    if (
s.segmentName == "__DWARF"317
) {
949
12
      if (s.sectionName == "__debug_info")
950
1
        debugInfo = &s;
951
11
      else 
if (11
s.sectionName == "__debug_abbrev"11
)
952
1
        debugAbbrev = &s;
953
10
      else 
if (10
s.sectionName == "__debug_str"10
)
954
1
        debugStrings = &s;
955
12
    }
956
317
  }
957
177
958
177
  if (!debugInfo)
959
176
    return parseStabs(file, normalizedFile, copyRefs);
960
1
961
1
  
if (1
debugInfo->content.size() == 01
)
962
0
    return llvm::Error::success();
963
1
964
1
  
if (1
debugInfo->content.size() < 121
)
965
0
    return llvm::make_error<GenericError>("Malformed __debug_info section in " +
966
0
                                          file.path() + ": too small");
967
1
968
1
  
if (1
!debugAbbrev1
)
969
0
    return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
970
0
                                          file.path());
971
1
972
1
  
if (auto 1
tuOrErr1
= readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
973
1
                                  *debugStrings, file.path())) {
974
1
    // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
975
1
    //        memory ownership.
976
1
    std::unique_ptr<BumpPtrAllocator> allocator;
977
1
    if (
copyRefs1
) {
978
1
      allocator = llvm::make_unique<BumpPtrAllocator>();
979
1
      tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
980
1
      tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
981
1
    }
982
1
    file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
983
1
    if (copyRefs)
984
1
      file.debugInfo()->setAllocator(std::move(allocator));
985
1
  } else
986
0
    return tuOrErr.takeError();
987
1
988
1
  return llvm::Error::success();
989
1
}
990
991
34
static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
992
34
  if (is64)
993
30
    return read64(addr, isBig);
994
4
995
4
  int32_t res = read32(addr, isBig);
996
4
  return res;
997
4
}
998
999
/// --- Augmentation String Processing ---
1000
1001
struct CIEInfo {
1002
  bool _augmentationDataPresent = false;
1003
  bool _mayHaveEH = false;
1004
  uint32_t _offsetOfLSDA = ~0U;
1005
  uint32_t _offsetOfPersonality = ~0U;
1006
  uint32_t _offsetOfFDEPointerEncoding = ~0U;
1007
  uint32_t _augmentationDataLength = ~0U;
1008
};
1009
1010
typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1011
1012
static llvm::Error processAugmentationString(const uint8_t *augStr,
1013
                                             CIEInfo &cieInfo,
1014
22
                                             unsigned &len) {
1015
22
1016
22
  if (
augStr[0] == '\0'22
) {
1017
0
    len = 1;
1018
0
    return llvm::Error::success();
1019
0
  }
1020
22
1021
22
  
if (22
augStr[0] != 'z'22
)
1022
0
    return llvm::make_error<GenericError>("expected 'z' at start of "
1023
0
                                          "augmentation string");
1024
22
1025
22
  cieInfo._augmentationDataPresent = true;
1026
22
  uint64_t idx = 1;
1027
22
1028
22
  uint32_t offsetInAugmentationData = 0;
1029
58
  while (
augStr[idx] != '\0'58
) {
1030
36
    if (
augStr[idx] == 'L'36
) {
1031
7
      cieInfo._offsetOfLSDA = offsetInAugmentationData;
1032
7
      // This adds a single byte to the augmentation data.
1033
7
      ++offsetInAugmentationData;
1034
7
      ++idx;
1035
7
      continue;
1036
7
    }
1037
29
    
if (29
augStr[idx] == 'P'29
) {
1038
7
      cieInfo._offsetOfPersonality = offsetInAugmentationData;
1039
7
      // This adds a single byte to the augmentation data for the encoding,
1040
7
      // then a number of bytes for the pointer data.
1041
7
      // FIXME: We are assuming 4 is correct here for the pointer size as we
1042
7
      // always currently use delta32ToGOT.
1043
7
      offsetInAugmentationData += 5;
1044
7
      ++idx;
1045
7
      continue;
1046
7
    }
1047
22
    
if (22
augStr[idx] == 'R'22
) {
1048
22
      cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1049
22
      // This adds a single byte to the augmentation data.
1050
22
      ++offsetInAugmentationData;
1051
22
      ++idx;
1052
22
      continue;
1053
22
    }
1054
0
    
if (0
augStr[idx] == 'e'0
) {
1055
0
      if (augStr[idx + 1] != 'h')
1056
0
        return llvm::make_error<GenericError>("expected 'eh' in "
1057
0
                                              "augmentation string");
1058
0
      cieInfo._mayHaveEH = true;
1059
0
      idx += 2;
1060
0
      continue;
1061
0
    }
1062
0
    ++idx;
1063
0
  }
1064
22
1065
22
  cieInfo._augmentationDataLength = offsetInAugmentationData;
1066
22
1067
22
  len = idx + 1;
1068
22
  return llvm::Error::success();
1069
22
}
1070
1071
static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1072
                              MachOFile &file,
1073
                              mach_o::ArchHandler &handler,
1074
                              const Section *ehFrameSection,
1075
                              MachODefinedAtom *atom,
1076
                              uint64_t offset,
1077
22
                              CIEInfoMap &cieInfos) {
1078
22
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1079
22
  const uint8_t *frameData = atom->rawContent().data();
1080
22
1081
22
  CIEInfo cieInfo;
1082
22
1083
22
  uint32_t size = read32(frameData, isBig);
1084
22
  uint64_t cieIDField = size == 0xffffffffU
1085
0
                          ? sizeof(uint32_t) + sizeof(uint64_t)
1086
22
                          : sizeof(uint32_t);
1087
22
  uint64_t versionField = cieIDField + sizeof(uint32_t);
1088
22
  uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1089
22
1090
22
  unsigned augmentationStringLength = 0;
1091
22
  if (auto err = processAugmentationString(frameData + augmentationStringField,
1092
22
                                           cieInfo, augmentationStringLength))
1093
0
    return err;
1094
22
1095
22
  
if (22
cieInfo._offsetOfPersonality != ~0U22
) {
1096
7
    // If we have augmentation data for the personality function, then we may
1097
7
    // need to implicitly generate its relocation.
1098
7
1099
7
    // Parse the EH Data field which is pointer sized.
1100
7
    uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1101
7
    const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1102
7
    unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? 
(is64 ? 0
80
:
40
) :
07
);
1103
7
1104
7
    // Parse Code Align Factor which is a ULEB128.
1105
7
    uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1106
7
    unsigned lengthFieldSize = 0;
1107
7
    llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1108
7
1109
7
    // Parse Data Align Factor which is a SLEB128.
1110
7
    uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1111
7
    llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1112
7
1113
7
    // Parse Return Address Register which is a byte.
1114
7
    uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1115
7
1116
7
    // Parse the augmentation length which is a ULEB128.
1117
7
    uint64_t AugmentationLengthField = ReturnAddressField + 1;
1118
7
    uint64_t AugmentationLength =
1119
7
      llvm::decodeULEB128(frameData + AugmentationLengthField,
1120
7
                          &lengthFieldSize);
1121
7
1122
7
    if (AugmentationLength != cieInfo._augmentationDataLength)
1123
0
      return llvm::make_error<GenericError>("CIE augmentation data length "
1124
0
                                            "mismatch");
1125
7
1126
7
    // Get the start address of the augmentation data.
1127
7
    uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1128
7
1129
7
    // Parse the personality function from the augmentation data.
1130
7
    uint64_t PersonalityField =
1131
7
      AugmentationDataField + cieInfo._offsetOfPersonality;
1132
7
1133
7
    // Parse the personality encoding.
1134
7
    // FIXME: Verify that this is a 32-bit pcrel offset.
1135
7
    uint64_t PersonalityFunctionField = PersonalityField + 1;
1136
7
1137
7
    if (
atom->begin() != atom->end()7
) {
1138
3
      // If we have an explicit relocation, then make sure it matches this
1139
3
      // offset as this is where we'd expect it to be applied to.
1140
3
      DefinedAtom::reference_iterator CurrentRef = atom->begin();
1141
3
      if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1142
0
        return llvm::make_error<GenericError>("CIE personality reloc at "
1143
0
                                              "wrong offset");
1144
3
1145
3
      
if (3
++CurrentRef != atom->end()3
)
1146
0
        return llvm::make_error<GenericError>("CIE contains too many relocs");
1147
4
    } else {
1148
4
      // Implicitly generate the personality function reloc.  It's assumed to
1149
4
      // be a delta32 offset to a GOT entry.
1150
4
      // FIXME: Parse the encoding and check this.
1151
4
      int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1152
4
      uint64_t funcAddress = ehFrameSection->address + offset +
1153
4
                             PersonalityFunctionField;
1154
4
      funcAddress += funcDelta;
1155
4
1156
4
      const MachODefinedAtom *func = nullptr;
1157
4
      Reference::Addend addend;
1158
4
      func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1159
4
                                     addend);
1160
4
      atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1161
4
                         handler.unwindRefToPersonalityFunctionKind(),
1162
4
                         PersonalityFunctionField, func, addend);
1163
4
    }
1164
22
  } else 
if (15
atom->begin() != atom->end()15
) {
1165
0
    // Otherwise, we expect there to be no relocations in this atom as the only
1166
0
    // relocation would have been to the personality function.
1167
0
    return llvm::make_error<GenericError>("unexpected relocation in CIE");
1168
0
  }
1169
22
1170
22
1171
22
  cieInfos[atom] = std::move(cieInfo);
1172
22
1173
22
  return llvm::Error::success();
1174
22
}
1175
1176
static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1177
                              MachOFile &file,
1178
                              mach_o::ArchHandler &handler,
1179
                              const Section *ehFrameSection,
1180
                              MachODefinedAtom *atom,
1181
                              uint64_t offset,
1182
26
                              const CIEInfoMap &cieInfos) {
1183
26
1184
26
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1185
26
  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1186
26
1187
26
  // Compiler wasn't lazy and actually told us what it meant.
1188
26
  // Unfortunately, the compiler may not have generated references for all of
1189
26
  // [cie, func, lsda] and so we still need to parse the FDE and add references
1190
26
  // for any the compiler didn't generate.
1191
26
  if (atom->begin() != atom->end())
1192
4
    atom->sortReferences();
1193
26
1194
26
  DefinedAtom::reference_iterator CurrentRef = atom->begin();
1195
26
1196
26
  // This helper returns the reference (if one exists) at the offset we are
1197
26
  // currently processing.  It automatically increments the ref iterator if we
1198
26
  // do return a ref, and throws an error if we pass over a ref without
1199
26
  // comsuming it.
1200
26
  auto currentRefGetter = [&CurrentRef,
1201
60
                           &atom](uint64_t Offset)->const Reference* {
1202
60
    // If there are no more refs found, then we are done.
1203
60
    if (CurrentRef == atom->end())
1204
50
      return nullptr;
1205
10
1206
10
    const Reference *Ref = *CurrentRef;
1207
10
1208
10
    // If we haven't reached the offset for this reference, then return that
1209
10
    // we don't yet have a reference to process.
1210
10
    if (Offset < Ref->offsetInAtom())
1211
2
      return nullptr;
1212
8
1213
8
    // If the offset is equal, then we want to process this ref.
1214
8
    
if (8
Offset == Ref->offsetInAtom()8
) {
1215
8
      ++CurrentRef;
1216
8
      return Ref;
1217
8
    }
1218
0
1219
0
    // The current ref is at an offset which is earlier than the current
1220
0
    // offset, then we failed to consume it when we should have.  In this case
1221
0
    // throw an error.
1222
0
    llvm::report_fatal_error("Skipped reference when processing FDE");
1223
0
  };
1224
26
1225
26
  // Helper to either get the reference at this current location, and verify
1226
26
  // that it is of the expected type, or add a reference of that type.
1227
26
  // Returns the reference target.
1228
26
  auto verifyOrAddReference = [&](uint64_t targetAddress,
1229
26
                                  Reference::KindValue refKind,
1230
26
                                  uint64_t refAddress,
1231
60
                                  bool allowsAddend)->const Atom* {
1232
60
    if (auto *
ref60
= currentRefGetter(refAddress)) {
1233
8
      // The compiler already emitted a relocation for the CIE ref.  This should
1234
8
      // have been converted to the correct type of reference in
1235
8
      // get[Pair]ReferenceInfo().
1236
8
      assert(ref->kindValue() == refKind &&
1237
8
             "Incorrect EHFrame reference kind");
1238
8
      return ref->target();
1239
8
    }
1240
52
    Reference::Addend addend;
1241
52
    auto *target = findAtomCoveringAddress(normalizedFile, file,
1242
52
                                           targetAddress, addend);
1243
52
    atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1244
52
                       refKind, refAddress, target, addend);
1245
52
1246
52
    if (!allowsAddend)
1247
60
      assert(!addend && "EHFrame reference cannot have addend");
1248
60
    return target;
1249
60
  };
1250
26
1251
26
  const uint8_t *startFrameData = atom->rawContent().data();
1252
26
  const uint8_t *frameData = startFrameData;
1253
26
1254
26
  uint32_t size = read32(frameData, isBig);
1255
26
  uint64_t cieFieldInFDE = size == 0xffffffffU
1256
0
    ? sizeof(uint32_t) + sizeof(uint64_t)
1257
26
    : sizeof(uint32_t);
1258
26
1259
26
  // Linker needs to fixup a reference from the FDE to its parent CIE (a
1260
26
  // 32-bit byte offset backwards in the __eh_frame section).
1261
26
  uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1262
26
  uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1263
26
  cieAddress -= cieDelta;
1264
26
1265
26
  auto *cieRefTarget = verifyOrAddReference(cieAddress,
1266
26
                                            handler.unwindRefToCIEKind(),
1267
26
                                            cieFieldInFDE, false);
1268
26
  const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1269
26
  assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1270
26
         "FDE's CIE field does not point at the start of a CIE.");
1271
26
1272
26
  const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1273
26
1274
26
  // Linker needs to fixup reference from the FDE to the function it's
1275
26
  // describing. FIXME: there are actually different ways to do this, and the
1276
26
  // particular method used is specified in the CIE's augmentation fields
1277
26
  // (hopefully)
1278
26
  uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1279
26
1280
26
  int64_t functionFromFDE = readSPtr(is64, isBig,
1281
26
                                     frameData + rangeFieldInFDE);
1282
26
  uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1283
26
  rangeStart += functionFromFDE;
1284
26
1285
26
  verifyOrAddReference(rangeStart,
1286
26
                       handler.unwindRefToFunctionKind(),
1287
26
                       rangeFieldInFDE, true);
1288
26
1289
26
  // Handle the augmentation data if there is any.
1290
26
  if (
cieInfo._augmentationDataPresent26
) {
1291
26
    // First process the augmentation data length field.
1292
26
    uint64_t augmentationDataLengthFieldInFDE =
1293
26
      rangeFieldInFDE + 2 * (is64 ? 
sizeof(uint64_t)22
:
sizeof(uint32_t)4
);
1294
26
    unsigned lengthFieldSize = 0;
1295
26
    uint64_t augmentationDataLength =
1296
26
      llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1297
26
                          &lengthFieldSize);
1298
26
1299
26
    if (
cieInfo._offsetOfLSDA != ~0U && 26
augmentationDataLength > 08
) {
1300
8
1301
8
      // Look at the augmentation data field.
1302
8
      uint64_t augmentationDataFieldInFDE =
1303
8
        augmentationDataLengthFieldInFDE + lengthFieldSize;
1304
8
1305
8
      int64_t lsdaFromFDE = readSPtr(is64, isBig,
1306
8
                                     frameData + augmentationDataFieldInFDE);
1307
8
      uint64_t lsdaStart =
1308
8
        ehFrameSection->address + offset + augmentationDataFieldInFDE +
1309
8
        lsdaFromFDE;
1310
8
1311
8
      verifyOrAddReference(lsdaStart,
1312
8
                           handler.unwindRefToFunctionKind(),
1313
8
                           augmentationDataFieldInFDE, true);
1314
8
    }
1315
26
  }
1316
26
1317
26
  return llvm::Error::success();
1318
26
}
1319
1320
llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1321
                                 MachOFile &file,
1322
177
                                 mach_o::ArchHandler &handler) {
1323
177
1324
177
  const Section *ehFrameSection = nullptr;
1325
177
  for (auto &section : normalizedFile.sections)
1326
311
    
if (311
section.segmentName == "__TEXT" &&
1327
311
        
section.sectionName == "__eh_frame"208
) {
1328
19
      ehFrameSection = &section;
1329
19
      break;
1330
19
    }
1331
177
1332
177
  // No __eh_frame so nothing to do.
1333
177
  if (!ehFrameSection)
1334
158
    return llvm::Error::success();
1335
19
1336
19
  llvm::Error ehFrameErr = llvm::Error::success();
1337
19
  CIEInfoMap cieInfos;
1338
19
1339
19
  file.eachAtomInSection(*ehFrameSection,
1340
48
                         [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1341
48
    assert(atom->contentType() == DefinedAtom::typeCFI);
1342
48
1343
48
    // Bail out if we've encountered an error.
1344
48
    if (ehFrameErr)
1345
0
      return;
1346
48
1347
48
    const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1348
48
    if (ArchHandler::isDwarfCIE(isBig, atom))
1349
22
      ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1350
22
                              atom, offset, cieInfos);
1351
48
    else
1352
26
      ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1353
26
                              atom, offset, cieInfos);
1354
48
  });
1355
177
1356
177
  return ehFrameErr;
1357
177
}
1358
1359
llvm::Error parseObjCImageInfo(const Section &sect,
1360
                               const NormalizedFile &normalizedFile,
1361
9
                               MachOFile &file) {
1362
9
1363
9
  //  struct objc_image_info  {
1364
9
  //    uint32_t  version;  // initially 0
1365
9
  //    uint32_t  flags;
1366
9
  //  };
1367
9
1368
9
  ArrayRef<uint8_t> content = sect.content;
1369
9
  if (content.size() != 8)
1370
1
    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1371
1
                                          sect.sectionName +
1372
1
                                          " in file " + file.path() +
1373
1
                                          " should be 8 bytes in size");
1374
8
1375
8
  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1376
8
  uint32_t version = read32(content.data(), isBig);
1377
8
  if (version)
1378
1
    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1379
1
                                          sect.sectionName +
1380
1
                                          " in file " + file.path() +
1381
1
                                          " should have version=0");
1382
7
1383
7
  uint32_t flags = read32(content.data() + 4, isBig);
1384
7
  if (flags & (MachOLinkingContext::objc_supports_gc |
1385
7
               MachOLinkingContext::objc_gc_only))
1386
1
    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1387
1
                                          sect.sectionName +
1388
1
                                          " in file " + file.path() +
1389
1
                                          " uses GC.  This is not supported");
1390
6
1391
6
  
if (6
flags & MachOLinkingContext::objc_retainReleaseForSimulator6
)
1392
3
    file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1393
6
  else
1394
3
    file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1395
9
1396
9
  file.setSwiftVersion((flags >> 8) & 0xFF);
1397
9
1398
9
  return llvm::Error::success();
1399
9
}
1400
1401
/// Converts normalized mach-o file into an lld::File and lld::Atoms.
1402
llvm::Expected<std::unique_ptr<lld::File>>
1403
objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1404
154
              bool copyRefs) {
1405
154
  std::unique_ptr<MachOFile> file(new MachOFile(path));
1406
154
  if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1407
4
    return std::move(ec);
1408
150
  return std::unique_ptr<File>(std::move(file));
1409
150
}
1410
1411
llvm::Expected<std::unique_ptr<lld::File>>
1412
dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1413
112
             bool copyRefs) {
1414
112
  // Instantiate SharedLibraryFile object.
1415
112
  std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1416
112
  if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1417
0
    return std::move(ec);
1418
112
  return std::unique_ptr<File>(std::move(file));
1419
112
}
1420
1421
} // anonymous namespace
1422
1423
namespace normalized {
1424
1425
310
static bool isObjCImageInfo(const Section &sect) {
1426
0
  return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1427
310
    
(sect.segmentName == "__DATA" && 310
sect.sectionName == "__objc_imageinfo"75
);
1428
310
}
1429
1430
llvm::Error
1431
normalizedObjectToAtoms(MachOFile *file,
1432
                        const NormalizedFile &normalizedFile,
1433
181
                        bool copyRefs) {
1434
181
  DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1435
181
                    << file->path() << "\n");
1436
181
  bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1437
181
1438
181
  // Create atoms from each section.
1439
322
  for (auto &sect : normalizedFile.sections) {
1440
322
1441
322
    // If this is a debug-info section parse it specially.
1442
322
    if (isDebugInfoSection(sect))
1443
12
      continue;
1444
310
1445
310
    // If the file contains an objc_image_info struct, then we should parse the
1446
310
    // ObjC flags and Swift version.
1447
310
    
if (310
isObjCImageInfo(sect)310
) {
1448
9
      if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1449
3
        return ec;
1450
6
      // We then skip adding atoms for this section as we use the ObjCPass to
1451
6
      // re-emit this data after it has been aggregated for all files.
1452
6
      continue;
1453
6
    }
1454
301
1455
301
    bool customSectionName;
1456
301
    DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1457
301
                                                            customSectionName);
1458
301
    if (auto ec =  processSection(atomType, sect, customSectionName,
1459
301
                                  normalizedFile, *file, scatterable, copyRefs))
1460
0
      return ec;
1461
178
  }
1462
178
  // Create atoms from undefined symbols.
1463
178
  
for (auto &sym : normalizedFile.undefinedSymbols) 178
{
1464
115
    // Undefinded symbols with n_value != 0 are actually tentative definitions.
1465
115
    if (
sym.value == Hex64(0)115
) {
1466
110
      file->addUndefinedAtom(sym.name, copyRefs);
1467
115
    } else {
1468
5
      file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1469
5
                                DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1470
5
                                copyRefs);
1471
5
    }
1472
115
  }
1473
178
1474
178
  // Convert mach-o relocations to References
1475
178
  std::unique_ptr<mach_o::ArchHandler> handler
1476
178
                                     = ArchHandler::create(normalizedFile.arch);
1477
319
  for (auto &sect : normalizedFile.sections) {
1478
319
    if (isDebugInfoSection(sect))
1479
12
      continue;
1480
307
    
if (llvm::Error 307
ec307
= convertRelocs(sect, normalizedFile, scatterable,
1481
307
                                       *file, *handler))
1482
1
      return ec;
1483
177
  }
1484
177
1485
177
  // Add additional arch-specific References
1486
177
  
file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void 177
{
1487
491
    handler->addAdditionalReferences(*atom);
1488
491
  });
1489
177
1490
177
  // Each __eh_frame section needs references to both __text (the function we're
1491
177
  // providing unwind info for) and itself (FDE -> CIE). These aren't
1492
177
  // represented in the relocations on some architectures, so we have to add
1493
177
  // them back in manually there.
1494
177
  if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1495
0
    return ec;
1496
177
1497
177
  // Process mach-o data-in-code regions array. That information is encoded in
1498
177
  // atoms as References at each transition point.
1499
177
  unsigned nextIndex = 0;
1500
28
  for (const DataInCode &entry : normalizedFile.dataInCode) {
1501
28
    ++nextIndex;
1502
28
    const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1503
28
    if (
!s28
) {
1504
0
      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1505
0
                                                  + Twine(entry.offset)
1506
0
                                                  + ") is not in any section"));
1507
0
    }
1508
28
    uint64_t offsetInSect = entry.offset - s->address;
1509
28
    uint32_t offsetInAtom;
1510
28
    MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1511
28
                                                           &offsetInAtom);
1512
28
    if (
offsetInAtom + entry.length > atom->size()28
) {
1513
0
      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1514
0
                                                  "(offset="
1515
0
                                                  + Twine(entry.offset)
1516
0
                                                  + ", length="
1517
0
                                                  + Twine(entry.length)
1518
0
                                                  + ") crosses atom boundary."));
1519
0
    }
1520
28
    // Add reference that marks start of data-in-code.
1521
28
    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1522
28
                       handler->dataInCodeTransitionStart(*atom),
1523
28
                       offsetInAtom, atom, entry.kind);
1524
28
1525
28
    // Peek at next entry, if it starts where this one ends, skip ending ref.
1526
28
    if (
nextIndex < normalizedFile.dataInCode.size()28
) {
1527
23
      const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1528
23
      if (nextEntry.offset == (entry.offset + entry.length))
1529
18
        continue;
1530
10
    }
1531
10
1532
10
    // If data goes to end of function, skip ending ref.
1533
10
    
if (10
(offsetInAtom + entry.length) == atom->size()10
)
1534
2
      continue;
1535
8
1536
8
    // Add reference that marks end of data-in-code.
1537
8
    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1538
8
                       handler->dataInCodeTransitionEnd(*atom),
1539
8
                       offsetInAtom+entry.length, atom, 0);
1540
8
  }
1541
177
1542
177
  // Cache some attributes on the file for use later.
1543
177
  file->setFlags(normalizedFile.flags);
1544
177
  file->setArch(normalizedFile.arch);
1545
177
  file->setOS(normalizedFile.os);
1546
177
  file->setMinVersion(normalizedFile.minOSverson);
1547
177
  file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1548
177
1549
177
  // Sort references in each atom to their canonical order.
1550
496
  for (const DefinedAtom* defAtom : file->defined()) {
1551
496
    reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1552
496
  }
1553
177
1554
177
  if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1555
0
    return err;
1556
177
1557
177
  return llvm::Error::success();
1558
177
}
1559
1560
llvm::Error
1561
normalizedDylibToAtoms(MachODylibFile *file,
1562
                       const NormalizedFile &normalizedFile,
1563
116
                       bool copyRefs) {
1564
116
  file->setInstallName(normalizedFile.installName);
1565
116
  file->setCompatVersion(normalizedFile.compatVersion);
1566
116
  file->setCurrentVersion(normalizedFile.currentVersion);
1567
116
1568
116
  // Tell MachODylibFile object about all symbols it exports.
1569
116
  if (
!normalizedFile.exportInfo.empty()116
) {
1570
109
    // If exports trie exists, use it instead of traditional symbol table.
1571
146
    for (const Export &exp : normalizedFile.exportInfo) {
1572
146
      bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1573
146
      // StringRefs from export iterator are ephemeral, so force copy.
1574
146
      file->addExportedSymbol(exp.name, weakDef, true);
1575
146
    }
1576
116
  } else {
1577
15
    for (auto &sym : normalizedFile.globalSymbols) {
1578
15
      assert((sym.scope & N_EXT) && "only expect external symbols here");
1579
15
      bool weakDef = (sym.desc & N_WEAK_DEF);
1580
15
      file->addExportedSymbol(sym.name, weakDef, copyRefs);
1581
15
    }
1582
7
  }
1583
116
  // Tell MachODylibFile object about all dylibs it re-exports.
1584
5
  for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1585
5
    if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1586
1
      file->addReExportedDylib(dep.path);
1587
5
  }
1588
116
  return llvm::Error::success();
1589
116
}
1590
1591
void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1592
                                          StringRef &segmentName,
1593
                                          StringRef &sectionName,
1594
                                          SectionType &sectionType,
1595
                                          SectionAttr &sectionAttrs,
1596
138
                                          bool &relocsToDefinedCanBeImplicit) {
1597
138
1598
138
  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1599
1.26k
                                 
p->atomType != DefinedAtom::typeUnknown1.26k
;
++p1.12k
) {
1600
1.26k
    if (p->atomType != atomType)
1601
1.12k
      continue;
1602
138
    // Wild carded entries are ignored for reverse lookups.
1603
138
    
if (138
p->segmentName.empty() || 138
p->sectionName.empty()138
)
1604
0
      continue;
1605
138
    segmentName = p->segmentName;
1606
138
    sectionName = p->sectionName;
1607
138
    sectionType = p->sectionType;
1608
138
    sectionAttrs = 0;
1609
138
    relocsToDefinedCanBeImplicit = false;
1610
138
    if (atomType == DefinedAtom::typeCode)
1611
60
      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1612
138
    if (atomType == DefinedAtom::typeCFI)
1613
10
      relocsToDefinedCanBeImplicit = true;
1614
1.26k
    return;
1615
1.26k
  }
1616
0
  
llvm_unreachable0
("content type not yet supported");
1617
0
}
1618
1619
llvm::Expected<std::unique_ptr<lld::File>>
1620
normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1621
266
                  bool copyRefs) {
1622
266
  switch (normalizedFile.fileType) {
1623
112
  case MH_DYLIB:
1624
112
  case MH_DYLIB_STUB:
1625
112
    return dylibToAtoms(normalizedFile, path, copyRefs);
1626
154
  case MH_OBJECT:
1627
154
    return objectToAtoms(normalizedFile, path, copyRefs);
1628
0
  default:
1629
0
    llvm_unreachable("unhandled MachO file type!");
1630
0
  }
1631
0
}
1632
1633
} // namespace normalized
1634
} // namespace mach_o
1635
} // namespace lld