/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// |
2 | | // |
3 | | // The LLVM Linker |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | /// |
11 | | /// \file Converts from in-memory normalized mach-o to in-memory Atoms. |
12 | | /// |
13 | | /// +------------+ |
14 | | /// | normalized | |
15 | | /// +------------+ |
16 | | /// | |
17 | | /// | |
18 | | /// v |
19 | | /// +-------+ |
20 | | /// | Atoms | |
21 | | /// +-------+ |
22 | | |
23 | | #include "ArchHandler.h" |
24 | | #include "Atoms.h" |
25 | | #include "File.h" |
26 | | #include "MachONormalizedFile.h" |
27 | | #include "MachONormalizedFileBinaryUtils.h" |
28 | | #include "lld/Core/Error.h" |
29 | | #include "lld/Core/LLVM.h" |
30 | | #include "llvm/BinaryFormat/Dwarf.h" |
31 | | #include "llvm/BinaryFormat/MachO.h" |
32 | | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
33 | | #include "llvm/Support/DataExtractor.h" |
34 | | #include "llvm/Support/Debug.h" |
35 | | #include "llvm/Support/Error.h" |
36 | | #include "llvm/Support/Format.h" |
37 | | #include "llvm/Support/LEB128.h" |
38 | | #include "llvm/Support/raw_ostream.h" |
39 | | |
40 | | using namespace llvm::MachO; |
41 | | using namespace lld::mach_o::normalized; |
42 | | |
43 | | #define DEBUG_TYPE "normalized-file-to-atoms" |
44 | | |
45 | | namespace lld { |
46 | | namespace mach_o { |
47 | | |
48 | | |
49 | | namespace { // anonymous |
50 | | |
51 | | |
52 | | #define ENTRY(seg, sect, type, atomType) \ |
53 | | {seg, sect, type, DefinedAtom::atomType } |
54 | | |
55 | | struct MachORelocatableSectionToAtomType { |
56 | | StringRef segmentName; |
57 | | StringRef sectionName; |
58 | | SectionType sectionType; |
59 | | DefinedAtom::ContentType atomType; |
60 | | }; |
61 | | |
62 | | const MachORelocatableSectionToAtomType sectsToAtomType[] = { |
63 | | ENTRY("__TEXT", "__text", S_REGULAR, typeCode), |
64 | | ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), |
65 | | ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), |
66 | | ENTRY("", "", S_CSTRING_LITERALS, typeCString), |
67 | | ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), |
68 | | ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), |
69 | | ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), |
70 | | ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), |
71 | | ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), |
72 | | ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), |
73 | | ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), |
74 | | ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), |
75 | | ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), |
76 | | ENTRY("__DATA", "__data", S_REGULAR, typeData), |
77 | | ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), |
78 | | ENTRY("__DATA", "__const", S_REGULAR, typeConstData), |
79 | | ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), |
80 | | ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, |
81 | | typeInitializerPtr), |
82 | | ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, |
83 | | typeTerminatorPtr), |
84 | | ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, |
85 | | typeGOT), |
86 | | ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), |
87 | | ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, |
88 | | typeGOT), |
89 | | ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), |
90 | | ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, |
91 | | typeThunkTLV), |
92 | | ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), |
93 | | ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, |
94 | | typeTLVInitialZeroFill), |
95 | | ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), |
96 | | ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), |
97 | | ENTRY("", "", S_INTERPOSING, typeInterposingTuples), |
98 | | ENTRY("__LD", "__compact_unwind", S_REGULAR, |
99 | | typeCompactUnwindInfo), |
100 | | ENTRY("", "", S_REGULAR, typeUnknown) |
101 | | }; |
102 | | #undef ENTRY |
103 | | |
104 | | |
105 | | /// Figures out ContentType of a mach-o section. |
106 | | DefinedAtom::ContentType atomTypeFromSection(const Section §ion, |
107 | 301 | bool &customSectionName) { |
108 | 301 | // First look for match of name and type. Empty names in table are wildcards. |
109 | 301 | customSectionName = false; |
110 | 301 | for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; |
111 | 2.49k | p->atomType != DefinedAtom::typeUnknown2.49k ; ++p2.18k ) { |
112 | 2.48k | if (p->sectionType != section.type) |
113 | 1.50k | continue; |
114 | 977 | if (977 !p->segmentName.equals(section.segmentName) && 977 !p->segmentName.empty()578 ) |
115 | 571 | continue; |
116 | 406 | if (406 !p->sectionName.equals(section.sectionName) && 406 !p->sectionName.empty()122 ) |
117 | 115 | continue; |
118 | 291 | customSectionName = p->segmentName.empty() && 291 p->sectionName.empty()7 ; |
119 | 2.48k | return p->atomType; |
120 | 2.48k | } |
121 | 301 | // Look for code denoted by section attributes |
122 | 10 | if (10 section.attributes & S_ATTR_PURE_INSTRUCTIONS10 ) |
123 | 2 | return DefinedAtom::typeCode; |
124 | 8 | |
125 | 8 | return DefinedAtom::typeUnknown; |
126 | 8 | } |
127 | | |
128 | | enum AtomizeModel { |
129 | | atomizeAtSymbols, |
130 | | atomizeFixedSize, |
131 | | atomizePointerSize, |
132 | | atomizeUTF8, |
133 | | atomizeUTF16, |
134 | | atomizeCFI, |
135 | | atomizeCU, |
136 | | atomizeCFString |
137 | | }; |
138 | | |
139 | | /// Returns info on how to atomize a section of the specified ContentType. |
140 | | void sectionParseInfo(DefinedAtom::ContentType atomType, |
141 | | unsigned int &sizeMultiple, |
142 | | DefinedAtom::Scope &scope, |
143 | | DefinedAtom::Merge &merge, |
144 | 301 | AtomizeModel &atomizeModel) { |
145 | 301 | struct ParseInfo { |
146 | 301 | DefinedAtom::ContentType atomType; |
147 | 301 | unsigned int sizeMultiple; |
148 | 301 | DefinedAtom::Scope scope; |
149 | 301 | DefinedAtom::Merge merge; |
150 | 301 | AtomizeModel atomizeModel; |
151 | 301 | }; |
152 | 301 | |
153 | 301 | #define ENTRY(type, size, scope, merge, model) \ |
154 | 5.41k | {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } |
155 | 301 | |
156 | 301 | static const ParseInfo parseInfo[] = { |
157 | 301 | ENTRY(typeCode, 1, scopeGlobal, mergeNo, |
158 | 301 | atomizeAtSymbols), |
159 | 301 | ENTRY(typeData, 1, scopeGlobal, mergeNo, |
160 | 301 | atomizeAtSymbols), |
161 | 301 | ENTRY(typeConstData, 1, scopeGlobal, mergeNo, |
162 | 301 | atomizeAtSymbols), |
163 | 301 | ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, |
164 | 301 | atomizeAtSymbols), |
165 | 301 | ENTRY(typeConstant, 1, scopeGlobal, mergeNo, |
166 | 301 | atomizeAtSymbols), |
167 | 301 | ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, |
168 | 301 | atomizeUTF8), |
169 | 301 | ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, |
170 | 301 | atomizeUTF16), |
171 | 301 | ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, |
172 | 301 | atomizeCFI), |
173 | 301 | ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, |
174 | 301 | atomizeFixedSize), |
175 | 301 | ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, |
176 | 301 | atomizeFixedSize), |
177 | 301 | ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, |
178 | 301 | atomizeFixedSize), |
179 | 301 | ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, |
180 | 301 | atomizeCFString), |
181 | 301 | ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, |
182 | 301 | atomizePointerSize), |
183 | 301 | ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, |
184 | 301 | atomizePointerSize), |
185 | 301 | ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, |
186 | 301 | atomizeCU), |
187 | 301 | ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, |
188 | 301 | atomizePointerSize), |
189 | 301 | ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, |
190 | 301 | atomizePointerSize), |
191 | 301 | ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, |
192 | 301 | atomizeAtSymbols) |
193 | 301 | }; |
194 | 301 | #undef ENTRY |
195 | 301 | const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); |
196 | 1.46k | for (int i=0; i < tableLen1.46k ; ++i1.16k ) { |
197 | 1.45k | if (parseInfo[i].atomType == atomType1.45k ) { |
198 | 285 | sizeMultiple = parseInfo[i].sizeMultiple; |
199 | 285 | scope = parseInfo[i].scope; |
200 | 285 | merge = parseInfo[i].merge; |
201 | 285 | atomizeModel = parseInfo[i].atomizeModel; |
202 | 285 | return; |
203 | 285 | } |
204 | 1.45k | } |
205 | 301 | |
206 | 301 | // Unknown type is atomized by symbols. |
207 | 16 | sizeMultiple = 1; |
208 | 16 | scope = DefinedAtom::scopeGlobal; |
209 | 16 | merge = DefinedAtom::mergeNo; |
210 | 16 | atomizeModel = atomizeAtSymbols; |
211 | 16 | } |
212 | | |
213 | | |
214 | 443 | Atom::Scope atomScope(uint8_t scope) { |
215 | 443 | switch (scope) { |
216 | 272 | case N_EXT: |
217 | 272 | return Atom::scopeGlobal; |
218 | 42 | case N_PEXT: |
219 | 42 | case N_PEXT | N_EXT: |
220 | 42 | return Atom::scopeLinkageUnit; |
221 | 129 | case 0: |
222 | 129 | return Atom::scopeTranslationUnit; |
223 | 0 | } |
224 | 0 | llvm_unreachable0 ("unknown scope value!"); |
225 | 0 | } |
226 | | |
227 | | void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, |
228 | | uint32_t sectionIndex, |
229 | 454 | SmallVector<const Symbol *, 64> &outSyms) { |
230 | 640 | for (const Symbol &sym : inSymbols) { |
231 | 640 | // Only look at definition symbols. |
232 | 640 | if ((sym.type & N_TYPE) != N_SECT) |
233 | 0 | continue; |
234 | 640 | if (640 sym.sect != sectionIndex640 ) |
235 | 281 | continue; |
236 | 359 | outSyms.push_back(&sym); |
237 | 359 | } |
238 | 454 | } |
239 | | |
240 | | void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, |
241 | | MachOFile &file, uint64_t symbolAddr, StringRef symbolName, |
242 | | uint16_t symbolDescFlags, Atom::Scope symbolScope, |
243 | 367 | uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { |
244 | 367 | // Mach-O symbol table does have size in it. Instead the size is the |
245 | 367 | // difference between this and the next symbol. |
246 | 367 | uint64_t size = nextSymbolAddr - symbolAddr; |
247 | 367 | uint64_t offset = symbolAddr - section.address; |
248 | 355 | bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; |
249 | 367 | if (isZeroFillSection(section.type)367 ) { |
250 | 3 | file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, |
251 | 3 | noDeadStrip, copyRefs, §ion); |
252 | 367 | } else { |
253 | 364 | DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) |
254 | 364 | ? DefinedAtom::mergeAsWeak6 : DefinedAtom::mergeNo358 ; |
255 | 364 | bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); |
256 | 364 | if (atomType == DefinedAtom::typeUnknown364 ) { |
257 | 15 | // Mach-O needs a segment and section name. Concatentate those two |
258 | 15 | // with a / separator (e.g. "seg/sect") to fit into the lld model |
259 | 15 | // of just a section name. |
260 | 15 | std::string segSectName = section.segmentName.str() |
261 | 15 | + "/" + section.sectionName.str(); |
262 | 15 | file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, |
263 | 15 | merge, thumb, noDeadStrip, offset, |
264 | 15 | size, segSectName, true, §ion); |
265 | 364 | } else { |
266 | 349 | if ((atomType == lld::DefinedAtom::typeCode) && |
267 | 349 | (symbolDescFlags & N_SYMBOL_RESOLVER)272 ) { |
268 | 2 | atomType = lld::DefinedAtom::typeResolver; |
269 | 2 | } |
270 | 349 | file.addDefinedAtom(symbolName, symbolScope, atomType, merge, |
271 | 349 | offset, size, thumb, noDeadStrip, copyRefs, §ion); |
272 | 349 | } |
273 | 364 | } |
274 | 367 | } |
275 | | |
276 | | llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, |
277 | | const Section §ion, |
278 | | const NormalizedFile &normalizedFile, |
279 | | MachOFile &file, bool scatterable, |
280 | 227 | bool copyRefs) { |
281 | 227 | // Find section's index. |
282 | 227 | uint32_t sectIndex = 1; |
283 | 309 | for (auto § : normalizedFile.sections) { |
284 | 309 | if (§ == §ion) |
285 | 227 | break; |
286 | 82 | ++sectIndex; |
287 | 82 | } |
288 | 227 | |
289 | 227 | // Find all symbols in this section. |
290 | 227 | SmallVector<const Symbol *, 64> symbols; |
291 | 227 | appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); |
292 | 227 | appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); |
293 | 227 | |
294 | 227 | // Sort symbols. |
295 | 227 | std::sort(symbols.begin(), symbols.end(), |
296 | 205 | [](const Symbol *lhs, const Symbol *rhs) -> bool { |
297 | 205 | if (lhs == rhs) |
298 | 0 | return false; |
299 | 205 | // First by address. |
300 | 205 | uint64_t lhsAddr = lhs->value; |
301 | 205 | uint64_t rhsAddr = rhs->value; |
302 | 205 | if (lhsAddr != rhsAddr) |
303 | 160 | return lhsAddr < rhsAddr; |
304 | 45 | // If same address, one is an alias so sort by scope. |
305 | 45 | Atom::Scope lScope = atomScope(lhs->scope); |
306 | 45 | Atom::Scope rScope = atomScope(rhs->scope); |
307 | 45 | if (lScope != rScope) |
308 | 34 | return lScope < rScope; |
309 | 11 | // If same address and scope, see if one might be better as |
310 | 11 | // the alias. |
311 | 11 | bool lPrivate = (lhs->name.front() == 'l'); |
312 | 11 | bool rPrivate = (rhs->name.front() == 'l'); |
313 | 11 | if (lPrivate != rPrivate) |
314 | 4 | return lPrivate; |
315 | 7 | // If same address and scope, sort by name. |
316 | 7 | return lhs->name < rhs->name; |
317 | 7 | }); |
318 | 227 | |
319 | 227 | // Debug logging of symbols. |
320 | 227 | //for (const Symbol *sym : symbols) |
321 | 227 | // llvm::errs() << " sym: " |
322 | 227 | // << llvm::format("0x%08llx ", (uint64_t)sym->value) |
323 | 227 | // << ", " << sym->name << "\n"; |
324 | 227 | |
325 | 227 | // If section has no symbols and no content, there are no atoms. |
326 | 227 | if (symbols.empty() && 227 section.content.empty()11 ) |
327 | 3 | return llvm::Error::success(); |
328 | 224 | |
329 | 224 | if (224 symbols.empty()224 ) { |
330 | 8 | // Section has no symbols, put all content in one anoymous atom. |
331 | 8 | atomFromSymbol(atomType, section, file, section.address, StringRef(), |
332 | 8 | 0, Atom::scopeTranslationUnit, |
333 | 8 | section.address + section.content.size(), |
334 | 8 | scatterable, copyRefs); |
335 | 8 | } |
336 | 216 | else if (216 symbols.front()->value != section.address216 ) { |
337 | 11 | // Section has anonymous content before first symbol. |
338 | 11 | atomFromSymbol(atomType, section, file, section.address, StringRef(), |
339 | 11 | 0, Atom::scopeTranslationUnit, symbols.front()->value, |
340 | 11 | scatterable, copyRefs); |
341 | 11 | } |
342 | 224 | |
343 | 224 | const Symbol *lastSym = nullptr; |
344 | 359 | for (const Symbol *sym : symbols) { |
345 | 359 | if (lastSym != nullptr359 ) { |
346 | 143 | // Ignore any assembler added "ltmpNNN" symbol at start of section |
347 | 143 | // if there is another symbol at the start. |
348 | 143 | if ((lastSym->value != sym->value) |
349 | 19 | || lastSym->value != section.address |
350 | 143 | || !lastSym->name.startswith("ltmp")11 ) { |
351 | 132 | atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, |
352 | 132 | lastSym->desc, atomScope(lastSym->scope), sym->value, |
353 | 132 | scatterable, copyRefs); |
354 | 132 | } |
355 | 143 | } |
356 | 359 | lastSym = sym; |
357 | 359 | } |
358 | 224 | if (lastSym != nullptr224 ) { |
359 | 216 | atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, |
360 | 216 | lastSym->desc, atomScope(lastSym->scope), |
361 | 216 | section.address + section.content.size(), |
362 | 216 | scatterable, copyRefs); |
363 | 216 | } |
364 | 224 | |
365 | 224 | // If object built without .subsections_via_symbols, add reference chain. |
366 | 224 | if (!scatterable224 ) { |
367 | 21 | MachODefinedAtom *prevAtom = nullptr; |
368 | 21 | file.eachAtomInSection(section, |
369 | 27 | [&](MachODefinedAtom *atom, uint64_t offset)->void { |
370 | 27 | if (prevAtom) |
371 | 6 | prevAtom->addReference(Reference::KindNamespace::all, |
372 | 6 | Reference::KindArch::all, |
373 | 6 | Reference::kindLayoutAfter, 0, atom, 0); |
374 | 27 | prevAtom = atom; |
375 | 27 | }); |
376 | 21 | } |
377 | 227 | |
378 | 227 | return llvm::Error::success(); |
379 | 227 | } |
380 | | |
381 | | llvm::Error processSection(DefinedAtom::ContentType atomType, |
382 | | const Section §ion, |
383 | | bool customSectionName, |
384 | | const NormalizedFile &normalizedFile, |
385 | | MachOFile &file, bool scatterable, |
386 | 301 | bool copyRefs) { |
387 | 301 | const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); |
388 | 301 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
389 | 301 | |
390 | 301 | // Get info on how to atomize section. |
391 | 301 | unsigned int sizeMultiple; |
392 | 301 | DefinedAtom::Scope scope; |
393 | 301 | DefinedAtom::Merge merge; |
394 | 301 | AtomizeModel atomizeModel; |
395 | 301 | sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); |
396 | 301 | |
397 | 301 | // Validate section size. |
398 | 301 | if ((section.content.size() % sizeMultiple) != 0) |
399 | 0 | return llvm::make_error<GenericError>(Twine("Section ") |
400 | 0 | + section.segmentName |
401 | 0 | + "/" + section.sectionName |
402 | 0 | + " has size (" |
403 | 0 | + Twine(section.content.size()) |
404 | 0 | + ") which is not a multiple of " |
405 | 0 | + Twine(sizeMultiple)); |
406 | 301 | |
407 | 301 | if (301 atomizeModel == atomizeAtSymbols301 ) { |
408 | 227 | // Break section up into atoms each with a fixed size. |
409 | 227 | return processSymboledSection(atomType, section, normalizedFile, file, |
410 | 227 | scatterable, copyRefs); |
411 | 0 | } else { |
412 | 74 | unsigned int size; |
413 | 202 | for (unsigned int offset = 0, e = section.content.size(); offset != e202 ;) { |
414 | 128 | switch (atomizeModel) { |
415 | 6 | case atomizeFixedSize: |
416 | 6 | // Break section up into atoms each with a fixed size. |
417 | 6 | size = sizeMultiple; |
418 | 6 | break; |
419 | 14 | case atomizePointerSize: |
420 | 14 | // Break section up into atoms each the size of a pointer. |
421 | 14 | size = is64 ? 87 : 47 ; |
422 | 14 | break; |
423 | 26 | case atomizeUTF8: |
424 | 26 | // Break section up into zero terminated c-strings. |
425 | 26 | size = 0; |
426 | 176 | for (unsigned int i = offset; i < e176 ; ++i150 ) { |
427 | 176 | if (section.content[i] == 0176 ) { |
428 | 26 | size = i + 1 - offset; |
429 | 26 | break; |
430 | 26 | } |
431 | 176 | } |
432 | 26 | break; |
433 | 2 | case atomizeUTF16: |
434 | 2 | // Break section up into zero terminated UTF16 strings. |
435 | 2 | size = 0; |
436 | 11 | for (unsigned int i = offset; i < e11 ; i += 29 ) { |
437 | 11 | if ((section.content[i] == 0) && 11 (section.content[i + 1] == 0)2 ) { |
438 | 2 | size = i + 2 - offset; |
439 | 2 | break; |
440 | 2 | } |
441 | 11 | } |
442 | 2 | break; |
443 | 48 | case atomizeCFI: |
444 | 48 | // Break section up into dwarf unwind CFIs (FDE or CIE). |
445 | 48 | size = read32(§ion.content[offset], isBig) + 4; |
446 | 48 | if (offset+size > section.content.size()48 ) { |
447 | 0 | return llvm::make_error<GenericError>(Twine("Section ") |
448 | 0 | + section.segmentName |
449 | 0 | + "/" + section.sectionName |
450 | 0 | + " is malformed. Size of CFI " |
451 | 0 | "starting at offset (" |
452 | 0 | + Twine(offset) |
453 | 0 | + ") is past end of section."); |
454 | 0 | } |
455 | 48 | break; |
456 | 28 | case atomizeCU: |
457 | 28 | // Break section up into compact unwind entries. |
458 | 28 | size = is64 ? 3226 : 202 ; |
459 | 28 | break; |
460 | 4 | case atomizeCFString: |
461 | 4 | // Break section up into NS/CFString objects. |
462 | 4 | size = is64 ? 322 : 162 ; |
463 | 4 | break; |
464 | 0 | case atomizeAtSymbols: |
465 | 0 | break; |
466 | 128 | } |
467 | 128 | if (128 size == 0128 ) { |
468 | 0 | return llvm::make_error<GenericError>(Twine("Section ") |
469 | 0 | + section.segmentName |
470 | 0 | + "/" + section.sectionName |
471 | 0 | + " is malformed. The last atom " |
472 | 0 | "is not zero terminated."); |
473 | 0 | } |
474 | 128 | if (128 customSectionName128 ) { |
475 | 8 | // Mach-O needs a segment and section name. Concatentate those two |
476 | 8 | // with a / separator (e.g. "seg/sect") to fit into the lld model |
477 | 8 | // of just a section name. |
478 | 8 | std::string segSectName = section.segmentName.str() |
479 | 8 | + "/" + section.sectionName.str(); |
480 | 8 | file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, |
481 | 8 | merge, false, false, offset, |
482 | 8 | size, segSectName, true, §ion); |
483 | 128 | } else { |
484 | 120 | file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, |
485 | 120 | false, false, copyRefs, §ion); |
486 | 120 | } |
487 | 128 | offset += size; |
488 | 128 | } |
489 | 74 | } |
490 | 74 | return llvm::Error::success(); |
491 | 301 | } |
492 | | |
493 | | const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, |
494 | 238 | uint64_t address) { |
495 | 382 | for (const Section &s : normalizedFile.sections) { |
496 | 382 | uint64_t sAddr = s.address; |
497 | 382 | if ((sAddr <= address) && 382 (address < sAddr+s.content.size())382 ) { |
498 | 238 | return &s; |
499 | 238 | } |
500 | 0 | } |
501 | 0 | return nullptr; |
502 | 0 | } |
503 | | |
504 | | const MachODefinedAtom * |
505 | | findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, |
506 | 56 | uint64_t addr, Reference::Addend &addend) { |
507 | 56 | const Section *sect = nullptr; |
508 | 56 | sect = findSectionCoveringAddress(normalizedFile, addr); |
509 | 56 | if (!sect) |
510 | 0 | return nullptr; |
511 | 56 | |
512 | 56 | uint32_t offsetInTarget; |
513 | 56 | uint64_t offsetInSect = addr - sect->address; |
514 | 56 | auto atom = |
515 | 56 | file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); |
516 | 56 | addend = offsetInTarget; |
517 | 56 | return atom; |
518 | 56 | } |
519 | | |
520 | | // Walks all relocations for a section in a normalized .o file and |
521 | | // creates corresponding lld::Reference objects. |
522 | | llvm::Error convertRelocs(const Section §ion, |
523 | | const NormalizedFile &normalizedFile, |
524 | | bool scatterable, |
525 | | MachOFile &file, |
526 | 307 | ArchHandler &handler) { |
527 | 307 | // Utility function for ArchHandler to find atom by its address. |
528 | 307 | auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, |
529 | 307 | const lld::Atom **atom, Reference::Addend *addend) |
530 | 268 | -> llvm::Error { |
531 | 268 | if (sectIndex > normalizedFile.sections.size()) |
532 | 0 | return llvm::make_error<GenericError>(Twine("out of range section " |
533 | 0 | "index (") + Twine(sectIndex) + ")"); |
534 | 268 | const Section *sect = nullptr; |
535 | 268 | if (sectIndex == 0268 ) { |
536 | 154 | sect = findSectionCoveringAddress(normalizedFile, addr); |
537 | 154 | if (!sect) |
538 | 0 | return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) |
539 | 0 | + ") is not in any section")); |
540 | 114 | } else { |
541 | 114 | sect = &normalizedFile.sections[sectIndex-1]; |
542 | 114 | } |
543 | 268 | uint32_t offsetInTarget; |
544 | 268 | uint64_t offsetInSect = addr - sect->address; |
545 | 268 | *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); |
546 | 268 | *addend = offsetInTarget; |
547 | 268 | return llvm::Error::success(); |
548 | 268 | }; |
549 | 307 | |
550 | 307 | // Utility function for ArchHandler to find atom by its symbol index. |
551 | 307 | auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) |
552 | 291 | -> llvm::Error { |
553 | 291 | // Find symbol from index. |
554 | 291 | const Symbol *sym = nullptr; |
555 | 291 | uint32_t numStabs = normalizedFile.stabsSymbols.size(); |
556 | 291 | uint32_t numLocal = normalizedFile.localSymbols.size(); |
557 | 291 | uint32_t numGlobal = normalizedFile.globalSymbols.size(); |
558 | 291 | uint32_t numUndef = normalizedFile.undefinedSymbols.size(); |
559 | 291 | assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); |
560 | 291 | if (symbolIndex < numStabs+numLocal291 ) { |
561 | 76 | sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; |
562 | 291 | } else if (215 symbolIndex < numStabs+numLocal+numGlobal215 ) { |
563 | 26 | sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; |
564 | 215 | } else if (189 symbolIndex < numStabs+numLocal+numGlobal+numUndef189 ) { |
565 | 189 | sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- |
566 | 189 | numGlobal]; |
567 | 189 | } else { |
568 | 0 | return llvm::make_error<GenericError>(Twine("symbol index (") |
569 | 0 | + Twine(symbolIndex) + ") out of range"); |
570 | 0 | } |
571 | 291 | |
572 | 291 | // Find atom from symbol. |
573 | 291 | if (291 (sym->type & N_TYPE) == N_SECT291 ) { |
574 | 102 | if (sym->sect > normalizedFile.sections.size()) |
575 | 0 | return llvm::make_error<GenericError>(Twine("symbol section index (") |
576 | 0 | + Twine(sym->sect) + ") out of range "); |
577 | 102 | const Section &symSection = normalizedFile.sections[sym->sect-1]; |
578 | 102 | uint64_t targetOffsetInSect = sym->value - symSection.address; |
579 | 102 | MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, |
580 | 102 | targetOffsetInSect); |
581 | 102 | if (target102 ) { |
582 | 102 | *result = target; |
583 | 102 | return llvm::Error::success(); |
584 | 102 | } |
585 | 0 | return llvm::make_error<GenericError>("no atom found for defined symbol"); |
586 | 189 | } else if (189 (sym->type & N_TYPE) == N_UNDF189 ) { |
587 | 189 | const lld::Atom *target = file.findUndefAtom(sym->name); |
588 | 189 | if (target189 ) { |
589 | 189 | *result = target; |
590 | 189 | return llvm::Error::success(); |
591 | 189 | } |
592 | 0 | return llvm::make_error<GenericError>("no undefined atom found for sym"); |
593 | 0 | } else { |
594 | 0 | // Search undefs |
595 | 0 | return llvm::make_error<GenericError>("no atom found for symbol"); |
596 | 0 | } |
597 | 0 | }; |
598 | 307 | |
599 | 307 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
600 | 307 | // Use old-school iterator so that paired relocations can be grouped. |
601 | 307 | for (auto it=section.relocations.begin(), e=section.relocations.end(); |
602 | 781 | it != e781 ; ++it474 ) { |
603 | 475 | const Relocation &reloc = *it; |
604 | 475 | // Find atom this relocation is in. |
605 | 475 | if (reloc.offset > section.content.size()) |
606 | 0 | return llvm::make_error<GenericError>( |
607 | 0 | Twine("r_address (") + Twine(reloc.offset) |
608 | 0 | + ") is larger than section size (" |
609 | 0 | + Twine(section.content.size()) + ")"); |
610 | 475 | uint32_t offsetInAtom; |
611 | 475 | MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, |
612 | 475 | reloc.offset, |
613 | 475 | &offsetInAtom); |
614 | 475 | assert(inAtom && "r_address in range, should have found atom"); |
615 | 475 | uint64_t fixupAddress = section.address + reloc.offset; |
616 | 475 | |
617 | 475 | const lld::Atom *target = nullptr; |
618 | 475 | Reference::Addend addend = 0; |
619 | 475 | Reference::KindValue kind; |
620 | 475 | if (handler.isPairedReloc(reloc)475 ) { |
621 | 157 | // Handle paired relocations together. |
622 | 157 | const Relocation &reloc2 = *++it; |
623 | 157 | auto relocErr = handler.getPairReferenceInfo( |
624 | 157 | reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, |
625 | 157 | atomByAddr, atomBySymbol, &kind, &target, &addend); |
626 | 157 | if (relocErr157 ) { |
627 | 1 | return handleErrors(std::move(relocErr), |
628 | 1 | [&](std::unique_ptr<GenericError> GE) { |
629 | 1 | return llvm::make_error<GenericError>( |
630 | 1 | Twine("bad relocation (") + GE->getMessage() |
631 | 1 | + ") in section " |
632 | 1 | + section.segmentName + "/" + section.sectionName |
633 | 1 | + " (r1_address=" + Twine::utohexstr(reloc.offset) |
634 | 1 | + ", r1_type=" + Twine(reloc.type) |
635 | 1 | + ", r1_extern=" + Twine(reloc.isExtern) |
636 | 1 | + ", r1_length=" + Twine((int)reloc.length) |
637 | 1 | + ", r1_pcrel=" + Twine(reloc.pcRel) |
638 | 1 | + (!reloc.scattered ? (Twine(", r1_symbolnum=") |
639 | 1 | + Twine(reloc.symbol)) |
640 | 0 | : (Twine(", r1_scattered=1, r1_value=") |
641 | 0 | + Twine(reloc.value))) |
642 | 1 | + ")" |
643 | 1 | + ", (r2_address=" + Twine::utohexstr(reloc2.offset) |
644 | 1 | + ", r2_type=" + Twine(reloc2.type) |
645 | 1 | + ", r2_extern=" + Twine(reloc2.isExtern) |
646 | 1 | + ", r2_length=" + Twine((int)reloc2.length) |
647 | 1 | + ", r2_pcrel=" + Twine(reloc2.pcRel) |
648 | 1 | + (!reloc2.scattered ? (Twine(", r2_symbolnum=") |
649 | 1 | + Twine(reloc2.symbol)) |
650 | 0 | : (Twine(", r2_scattered=1, r2_value=") |
651 | 0 | + Twine(reloc2.value))) |
652 | 1 | + ")" ); |
653 | 1 | }); |
654 | 1 | } |
655 | 475 | } |
656 | 318 | else { |
657 | 318 | // Use ArchHandler to convert relocation record into information |
658 | 318 | // needed to instantiate an lld::Reference object. |
659 | 318 | auto relocErr = handler.getReferenceInfo( |
660 | 318 | reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, |
661 | 318 | atomBySymbol, &kind, &target, &addend); |
662 | 318 | if (relocErr318 ) { |
663 | 0 | return handleErrors(std::move(relocErr), |
664 | 0 | [&](std::unique_ptr<GenericError> GE) { |
665 | 0 | return llvm::make_error<GenericError>( |
666 | 0 | Twine("bad relocation (") + GE->getMessage() |
667 | 0 | + ") in section " |
668 | 0 | + section.segmentName + "/" + section.sectionName |
669 | 0 | + " (r_address=" + Twine::utohexstr(reloc.offset) |
670 | 0 | + ", r_type=" + Twine(reloc.type) |
671 | 0 | + ", r_extern=" + Twine(reloc.isExtern) |
672 | 0 | + ", r_length=" + Twine((int)reloc.length) |
673 | 0 | + ", r_pcrel=" + Twine(reloc.pcRel) |
674 | 0 | + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) |
675 | 0 | : (Twine(", r_scattered=1, r_value=") |
676 | 0 | + Twine(reloc.value))) |
677 | 0 | + ")" ); |
678 | 0 | }); |
679 | 0 | } |
680 | 474 | } |
681 | 474 | // Instantiate an lld::Reference object and add to its atom. |
682 | 474 | inAtom->addReference(Reference::KindNamespace::mach_o, |
683 | 474 | handler.kindArch(), |
684 | 474 | kind, offsetInAtom, target, addend); |
685 | 474 | } |
686 | 307 | |
687 | 306 | return llvm::Error::success(); |
688 | 307 | } |
689 | | |
690 | 641 | bool isDebugInfoSection(const Section §ion) { |
691 | 641 | if ((section.attributes & S_ATTR_DEBUG) == 0) |
692 | 603 | return false; |
693 | 38 | return section.segmentName.equals("__DWARF"); |
694 | 38 | } |
695 | | |
696 | 0 | static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { |
697 | 0 | std::string strName = name.str(); |
698 | 0 | for (auto *atom : file.defined()) |
699 | 0 | if (0 atom->name() == strName0 ) |
700 | 0 | return atom; |
701 | 0 | return nullptr; |
702 | 0 | } |
703 | | |
704 | 2 | static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { |
705 | 2 | char *strCopy = alloc.Allocate<char>(str.size() + 1); |
706 | 2 | memcpy(strCopy, str.data(), str.size()); |
707 | 2 | strCopy[str.size()] = '\0'; |
708 | 2 | return strCopy; |
709 | 2 | } |
710 | | |
711 | | llvm::Error parseStabs(MachOFile &file, |
712 | | const NormalizedFile &normalizedFile, |
713 | 176 | bool copyRefs) { |
714 | 176 | |
715 | 176 | if (normalizedFile.stabsSymbols.empty()) |
716 | 176 | return llvm::Error::success(); |
717 | 0 |
|
718 | 0 | // FIXME: Kill this off when we can move to sane yaml parsing. |
719 | 0 | std::unique_ptr<BumpPtrAllocator> allocator; |
720 | 0 | if (copyRefs) |
721 | 0 | allocator = llvm::make_unique<BumpPtrAllocator>(); |
722 | 0 |
|
723 | 0 | enum { start, inBeginEnd } state = start; |
724 | 0 |
|
725 | 0 | const Atom *currentAtom = nullptr; |
726 | 0 | uint64_t currentAtomAddress = 0; |
727 | 0 | StabsDebugInfo::StabsList stabsList; |
728 | 0 | for (const auto &stabSym : normalizedFile.stabsSymbols) { |
729 | 0 | Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, |
730 | 0 | stabSym.value, stabSym.name); |
731 | 0 | switch (state) { |
732 | 0 | case start: |
733 | 0 | switch (static_cast<StabType>(stabSym.type)) { |
734 | 0 | case N_BNSYM: |
735 | 0 | state = inBeginEnd; |
736 | 0 | currentAtomAddress = stabSym.value; |
737 | 0 | Reference::Addend addend; |
738 | 0 | currentAtom = findAtomCoveringAddress(normalizedFile, file, |
739 | 0 | currentAtomAddress, addend); |
740 | 0 | if (addend != 0) |
741 | 0 | return llvm::make_error<GenericError>( |
742 | 0 | "Non-zero addend for BNSYM '" + stabSym.name + "' in " + |
743 | 0 | file.path()); |
744 | 0 | if (0 currentAtom0 ) |
745 | 0 | stab.atom = currentAtom; |
746 | 0 | else { |
747 | 0 | // FIXME: ld64 just issues a warning here - should we match that? |
748 | 0 | return llvm::make_error<GenericError>( |
749 | 0 | "can't find atom for stabs BNSYM at " + |
750 | 0 | Twine::utohexstr(stabSym.value) + " in " + file.path()); |
751 | 0 | } |
752 | 0 | break; |
753 | 0 | case N_SO: |
754 | 0 | case N_OSO: |
755 | 0 | // Not associated with an atom, just copy. |
756 | 0 | if (copyRefs) |
757 | 0 | stab.str = copyDebugString(stabSym.name, *allocator); |
758 | 0 | else |
759 | 0 | stab.str = stabSym.name; |
760 | 0 | break; |
761 | 0 | case N_GSYM: { |
762 | 0 | auto colonIdx = stabSym.name.find(':'); |
763 | 0 | if (colonIdx != StringRef::npos0 ) { |
764 | 0 | StringRef name = stabSym.name.substr(0, colonIdx); |
765 | 0 | currentAtom = findDefinedAtomByName(file, "_" + name); |
766 | 0 | stab.atom = currentAtom; |
767 | 0 | if (copyRefs) |
768 | 0 | stab.str = copyDebugString(stabSym.name, *allocator); |
769 | 0 | else |
770 | 0 | stab.str = stabSym.name; |
771 | 0 | } else { |
772 | 0 | currentAtom = findDefinedAtomByName(file, stabSym.name); |
773 | 0 | stab.atom = currentAtom; |
774 | 0 | if (copyRefs) |
775 | 0 | stab.str = copyDebugString(stabSym.name, *allocator); |
776 | 0 | else |
777 | 0 | stab.str = stabSym.name; |
778 | 0 | } |
779 | 0 | if (stab.atom == nullptr) |
780 | 0 | return llvm::make_error<GenericError>( |
781 | 0 | "can't find atom for N_GSYM stabs" + stabSym.name + |
782 | 0 | " in " + file.path()); |
783 | 0 | break; |
784 | 0 | } |
785 | 0 | case N_FUN: |
786 | 0 | return llvm::make_error<GenericError>( |
787 | 0 | "old-style N_FUN stab '" + stabSym.name + "' unsupported"); |
788 | 0 | default: |
789 | 0 | return llvm::make_error<GenericError>( |
790 | 0 | "unrecognized stab symbol '" + stabSym.name + "'"); |
791 | 0 | } |
792 | 0 | break; |
793 | 0 | case inBeginEnd: |
794 | 0 | stab.atom = currentAtom; |
795 | 0 | switch (static_cast<StabType>(stabSym.type)) { |
796 | 0 | case N_ENSYM: |
797 | 0 | state = start; |
798 | 0 | currentAtom = nullptr; |
799 | 0 | break; |
800 | 0 | case N_FUN: |
801 | 0 | // Just copy the string. |
802 | 0 | if (copyRefs) |
803 | 0 | stab.str = copyDebugString(stabSym.name, *allocator); |
804 | 0 | else |
805 | 0 | stab.str = stabSym.name; |
806 | 0 | break; |
807 | 0 | default: |
808 | 0 | return llvm::make_error<GenericError>( |
809 | 0 | "unrecognized stab symbol '" + stabSym.name + "'"); |
810 | 0 | } |
811 | 0 | } |
812 | 0 | llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; |
813 | 0 | stabsList.push_back(stab); |
814 | 0 | } |
815 | 0 |
|
816 | 0 | file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList))); |
817 | 0 |
|
818 | 0 | // FIXME: Kill this off when we fix YAML memory ownership. |
819 | 0 | file.debugInfo()->setAllocator(std::move(allocator)); |
820 | 0 |
|
821 | 0 | return llvm::Error::success(); |
822 | 176 | } |
823 | | |
824 | | static llvm::DataExtractor |
825 | | dataExtractorFromSection(const NormalizedFile &normalizedFile, |
826 | 4 | const Section &S) { |
827 | 4 | const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); |
828 | 4 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
829 | 4 | StringRef SecData(reinterpret_cast<const char*>(S.content.data()), |
830 | 4 | S.content.size()); |
831 | 4 | return llvm::DataExtractor(SecData, !isBig, is64 ? 84 : 40 ); |
832 | 4 | } |
833 | | |
834 | | // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE |
835 | | // inspection" code if possible. |
836 | | static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, |
837 | 1 | uint64_t abbrCode) { |
838 | 1 | uint64_t curCode; |
839 | 1 | uint32_t offset = 0; |
840 | 1 | while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode1 ) { |
841 | 0 | // Tag |
842 | 0 | abbrevData.getULEB128(&offset); |
843 | 0 | // DW_CHILDREN |
844 | 0 | abbrevData.getU8(&offset); |
845 | 0 | // Attributes |
846 | 0 | while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) |
847 | 0 | ; |
848 | 0 | } |
849 | 1 | return offset; |
850 | 1 | } |
851 | | |
852 | | // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE |
853 | | // inspection" code if possible. |
854 | | static Expected<const char *> |
855 | | getIndexedString(const NormalizedFile &normalizedFile, |
856 | | llvm::dwarf::Form form, llvm::DataExtractor infoData, |
857 | 2 | uint32_t &infoOffset, const Section &stringsSection) { |
858 | 2 | if (form == llvm::dwarf::DW_FORM_string) |
859 | 0 | return infoData.getCStr(&infoOffset); |
860 | 2 | if (2 form != llvm::dwarf::DW_FORM_strp2 ) |
861 | 0 | return llvm::make_error<GenericError>( |
862 | 0 | "string field encoded without DW_FORM_strp"); |
863 | 2 | uint32_t stringOffset = infoData.getU32(&infoOffset); |
864 | 2 | llvm::DataExtractor stringsData = |
865 | 2 | dataExtractorFromSection(normalizedFile, stringsSection); |
866 | 2 | return stringsData.getCStr(&stringOffset); |
867 | 2 | } |
868 | | |
869 | | // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE |
870 | | // inspection" code if possible. |
871 | | static llvm::Expected<TranslationUnitSource> |
872 | | readCompUnit(const NormalizedFile &normalizedFile, |
873 | | const Section &info, |
874 | | const Section &abbrev, |
875 | | const Section &strings, |
876 | 1 | StringRef path) { |
877 | 1 | // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE |
878 | 1 | // inspection" code if possible. |
879 | 1 | uint32_t offset = 0; |
880 | 1 | llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; |
881 | 1 | auto infoData = dataExtractorFromSection(normalizedFile, info); |
882 | 1 | uint32_t length = infoData.getU32(&offset); |
883 | 1 | if (length == 0xffffffff1 ) { |
884 | 0 | Format = llvm::dwarf::DwarfFormat::DWARF64; |
885 | 0 | infoData.getU64(&offset); |
886 | 0 | } |
887 | 1 | else if (1 length > 0xffffff001 ) |
888 | 0 | return llvm::make_error<GenericError>("Malformed DWARF in " + path); |
889 | 1 | |
890 | 1 | uint16_t version = infoData.getU16(&offset); |
891 | 1 | |
892 | 1 | if (version < 2 || 1 version > 41 ) |
893 | 0 | return llvm::make_error<GenericError>("Unsupported DWARF version in " + |
894 | 0 | path); |
895 | 1 | |
896 | 1 | infoData.getU32(&offset); // Abbrev offset (should be zero) |
897 | 1 | uint8_t addrSize = infoData.getU8(&offset); |
898 | 1 | |
899 | 1 | uint32_t abbrCode = infoData.getULEB128(&offset); |
900 | 1 | auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); |
901 | 1 | uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); |
902 | 1 | uint64_t tag = abbrevData.getULEB128(&abbrevOffset); |
903 | 1 | if (tag != llvm::dwarf::DW_TAG_compile_unit) |
904 | 0 | return llvm::make_error<GenericError>("top level DIE is not a compile unit"); |
905 | 1 | // DW_CHILDREN |
906 | 1 | abbrevData.getU8(&abbrevOffset); |
907 | 1 | uint32_t name; |
908 | 1 | llvm::dwarf::Form form; |
909 | 1 | llvm::DWARFFormParams formParams = {version, addrSize, Format}; |
910 | 1 | TranslationUnitSource tu; |
911 | 8 | while ((name = abbrevData.getULEB128(&abbrevOffset)) | |
912 | 8 | (form = static_cast<llvm::dwarf::Form>( |
913 | 8 | abbrevData.getULEB128(&abbrevOffset))) && |
914 | 8 | (name != 0 || 7 form != 00 )) { |
915 | 7 | switch (name) { |
916 | 1 | case llvm::dwarf::DW_AT_name: { |
917 | 1 | if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, |
918 | 1 | strings)) |
919 | 1 | tu.name = *eName; |
920 | 1 | else |
921 | 0 | return eName.takeError(); |
922 | 1 | break; |
923 | 1 | } |
924 | 1 | case llvm::dwarf::DW_AT_comp_dir: { |
925 | 1 | if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, |
926 | 1 | strings)) |
927 | 1 | tu.path = *eName; |
928 | 1 | else |
929 | 0 | return eName.takeError(); |
930 | 1 | break; |
931 | 1 | } |
932 | 5 | default: |
933 | 5 | llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); |
934 | 7 | } |
935 | 7 | } |
936 | 1 | return tu; |
937 | 1 | } |
938 | | |
939 | | llvm::Error parseDebugInfo(MachOFile &file, |
940 | 177 | const NormalizedFile &normalizedFile, bool copyRefs) { |
941 | 177 | |
942 | 177 | // Find the interesting debug info sections. |
943 | 177 | const Section *debugInfo = nullptr; |
944 | 177 | const Section *debugAbbrev = nullptr; |
945 | 177 | const Section *debugStrings = nullptr; |
946 | 177 | |
947 | 317 | for (auto &s : normalizedFile.sections) { |
948 | 317 | if (s.segmentName == "__DWARF"317 ) { |
949 | 12 | if (s.sectionName == "__debug_info") |
950 | 1 | debugInfo = &s; |
951 | 11 | else if (11 s.sectionName == "__debug_abbrev"11 ) |
952 | 1 | debugAbbrev = &s; |
953 | 10 | else if (10 s.sectionName == "__debug_str"10 ) |
954 | 1 | debugStrings = &s; |
955 | 12 | } |
956 | 317 | } |
957 | 177 | |
958 | 177 | if (!debugInfo) |
959 | 176 | return parseStabs(file, normalizedFile, copyRefs); |
960 | 1 | |
961 | 1 | if (1 debugInfo->content.size() == 01 ) |
962 | 0 | return llvm::Error::success(); |
963 | 1 | |
964 | 1 | if (1 debugInfo->content.size() < 121 ) |
965 | 0 | return llvm::make_error<GenericError>("Malformed __debug_info section in " + |
966 | 0 | file.path() + ": too small"); |
967 | 1 | |
968 | 1 | if (1 !debugAbbrev1 ) |
969 | 0 | return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + |
970 | 0 | file.path()); |
971 | 1 | |
972 | 1 | if (auto 1 tuOrErr1 = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, |
973 | 1 | *debugStrings, file.path())) { |
974 | 1 | // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML |
975 | 1 | // memory ownership. |
976 | 1 | std::unique_ptr<BumpPtrAllocator> allocator; |
977 | 1 | if (copyRefs1 ) { |
978 | 1 | allocator = llvm::make_unique<BumpPtrAllocator>(); |
979 | 1 | tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); |
980 | 1 | tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); |
981 | 1 | } |
982 | 1 | file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); |
983 | 1 | if (copyRefs) |
984 | 1 | file.debugInfo()->setAllocator(std::move(allocator)); |
985 | 1 | } else |
986 | 0 | return tuOrErr.takeError(); |
987 | 1 | |
988 | 1 | return llvm::Error::success(); |
989 | 1 | } |
990 | | |
991 | 34 | static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { |
992 | 34 | if (is64) |
993 | 30 | return read64(addr, isBig); |
994 | 4 | |
995 | 4 | int32_t res = read32(addr, isBig); |
996 | 4 | return res; |
997 | 4 | } |
998 | | |
999 | | /// --- Augmentation String Processing --- |
1000 | | |
1001 | | struct CIEInfo { |
1002 | | bool _augmentationDataPresent = false; |
1003 | | bool _mayHaveEH = false; |
1004 | | uint32_t _offsetOfLSDA = ~0U; |
1005 | | uint32_t _offsetOfPersonality = ~0U; |
1006 | | uint32_t _offsetOfFDEPointerEncoding = ~0U; |
1007 | | uint32_t _augmentationDataLength = ~0U; |
1008 | | }; |
1009 | | |
1010 | | typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; |
1011 | | |
1012 | | static llvm::Error processAugmentationString(const uint8_t *augStr, |
1013 | | CIEInfo &cieInfo, |
1014 | 22 | unsigned &len) { |
1015 | 22 | |
1016 | 22 | if (augStr[0] == '\0'22 ) { |
1017 | 0 | len = 1; |
1018 | 0 | return llvm::Error::success(); |
1019 | 0 | } |
1020 | 22 | |
1021 | 22 | if (22 augStr[0] != 'z'22 ) |
1022 | 0 | return llvm::make_error<GenericError>("expected 'z' at start of " |
1023 | 0 | "augmentation string"); |
1024 | 22 | |
1025 | 22 | cieInfo._augmentationDataPresent = true; |
1026 | 22 | uint64_t idx = 1; |
1027 | 22 | |
1028 | 22 | uint32_t offsetInAugmentationData = 0; |
1029 | 58 | while (augStr[idx] != '\0'58 ) { |
1030 | 36 | if (augStr[idx] == 'L'36 ) { |
1031 | 7 | cieInfo._offsetOfLSDA = offsetInAugmentationData; |
1032 | 7 | // This adds a single byte to the augmentation data. |
1033 | 7 | ++offsetInAugmentationData; |
1034 | 7 | ++idx; |
1035 | 7 | continue; |
1036 | 7 | } |
1037 | 29 | if (29 augStr[idx] == 'P'29 ) { |
1038 | 7 | cieInfo._offsetOfPersonality = offsetInAugmentationData; |
1039 | 7 | // This adds a single byte to the augmentation data for the encoding, |
1040 | 7 | // then a number of bytes for the pointer data. |
1041 | 7 | // FIXME: We are assuming 4 is correct here for the pointer size as we |
1042 | 7 | // always currently use delta32ToGOT. |
1043 | 7 | offsetInAugmentationData += 5; |
1044 | 7 | ++idx; |
1045 | 7 | continue; |
1046 | 7 | } |
1047 | 22 | if (22 augStr[idx] == 'R'22 ) { |
1048 | 22 | cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; |
1049 | 22 | // This adds a single byte to the augmentation data. |
1050 | 22 | ++offsetInAugmentationData; |
1051 | 22 | ++idx; |
1052 | 22 | continue; |
1053 | 22 | } |
1054 | 0 | if (0 augStr[idx] == 'e'0 ) { |
1055 | 0 | if (augStr[idx + 1] != 'h') |
1056 | 0 | return llvm::make_error<GenericError>("expected 'eh' in " |
1057 | 0 | "augmentation string"); |
1058 | 0 | cieInfo._mayHaveEH = true; |
1059 | 0 | idx += 2; |
1060 | 0 | continue; |
1061 | 0 | } |
1062 | 0 | ++idx; |
1063 | 0 | } |
1064 | 22 | |
1065 | 22 | cieInfo._augmentationDataLength = offsetInAugmentationData; |
1066 | 22 | |
1067 | 22 | len = idx + 1; |
1068 | 22 | return llvm::Error::success(); |
1069 | 22 | } |
1070 | | |
1071 | | static llvm::Error processCIE(const NormalizedFile &normalizedFile, |
1072 | | MachOFile &file, |
1073 | | mach_o::ArchHandler &handler, |
1074 | | const Section *ehFrameSection, |
1075 | | MachODefinedAtom *atom, |
1076 | | uint64_t offset, |
1077 | 22 | CIEInfoMap &cieInfos) { |
1078 | 22 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
1079 | 22 | const uint8_t *frameData = atom->rawContent().data(); |
1080 | 22 | |
1081 | 22 | CIEInfo cieInfo; |
1082 | 22 | |
1083 | 22 | uint32_t size = read32(frameData, isBig); |
1084 | 22 | uint64_t cieIDField = size == 0xffffffffU |
1085 | 0 | ? sizeof(uint32_t) + sizeof(uint64_t) |
1086 | 22 | : sizeof(uint32_t); |
1087 | 22 | uint64_t versionField = cieIDField + sizeof(uint32_t); |
1088 | 22 | uint64_t augmentationStringField = versionField + sizeof(uint8_t); |
1089 | 22 | |
1090 | 22 | unsigned augmentationStringLength = 0; |
1091 | 22 | if (auto err = processAugmentationString(frameData + augmentationStringField, |
1092 | 22 | cieInfo, augmentationStringLength)) |
1093 | 0 | return err; |
1094 | 22 | |
1095 | 22 | if (22 cieInfo._offsetOfPersonality != ~0U22 ) { |
1096 | 7 | // If we have augmentation data for the personality function, then we may |
1097 | 7 | // need to implicitly generate its relocation. |
1098 | 7 | |
1099 | 7 | // Parse the EH Data field which is pointer sized. |
1100 | 7 | uint64_t EHDataField = augmentationStringField + augmentationStringLength; |
1101 | 7 | const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); |
1102 | 7 | unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 0 80 : 40 ) : 07 ); |
1103 | 7 | |
1104 | 7 | // Parse Code Align Factor which is a ULEB128. |
1105 | 7 | uint64_t CodeAlignField = EHDataField + EHDataFieldSize; |
1106 | 7 | unsigned lengthFieldSize = 0; |
1107 | 7 | llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); |
1108 | 7 | |
1109 | 7 | // Parse Data Align Factor which is a SLEB128. |
1110 | 7 | uint64_t DataAlignField = CodeAlignField + lengthFieldSize; |
1111 | 7 | llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); |
1112 | 7 | |
1113 | 7 | // Parse Return Address Register which is a byte. |
1114 | 7 | uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; |
1115 | 7 | |
1116 | 7 | // Parse the augmentation length which is a ULEB128. |
1117 | 7 | uint64_t AugmentationLengthField = ReturnAddressField + 1; |
1118 | 7 | uint64_t AugmentationLength = |
1119 | 7 | llvm::decodeULEB128(frameData + AugmentationLengthField, |
1120 | 7 | &lengthFieldSize); |
1121 | 7 | |
1122 | 7 | if (AugmentationLength != cieInfo._augmentationDataLength) |
1123 | 0 | return llvm::make_error<GenericError>("CIE augmentation data length " |
1124 | 0 | "mismatch"); |
1125 | 7 | |
1126 | 7 | // Get the start address of the augmentation data. |
1127 | 7 | uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; |
1128 | 7 | |
1129 | 7 | // Parse the personality function from the augmentation data. |
1130 | 7 | uint64_t PersonalityField = |
1131 | 7 | AugmentationDataField + cieInfo._offsetOfPersonality; |
1132 | 7 | |
1133 | 7 | // Parse the personality encoding. |
1134 | 7 | // FIXME: Verify that this is a 32-bit pcrel offset. |
1135 | 7 | uint64_t PersonalityFunctionField = PersonalityField + 1; |
1136 | 7 | |
1137 | 7 | if (atom->begin() != atom->end()7 ) { |
1138 | 3 | // If we have an explicit relocation, then make sure it matches this |
1139 | 3 | // offset as this is where we'd expect it to be applied to. |
1140 | 3 | DefinedAtom::reference_iterator CurrentRef = atom->begin(); |
1141 | 3 | if (CurrentRef->offsetInAtom() != PersonalityFunctionField) |
1142 | 0 | return llvm::make_error<GenericError>("CIE personality reloc at " |
1143 | 0 | "wrong offset"); |
1144 | 3 | |
1145 | 3 | if (3 ++CurrentRef != atom->end()3 ) |
1146 | 0 | return llvm::make_error<GenericError>("CIE contains too many relocs"); |
1147 | 4 | } else { |
1148 | 4 | // Implicitly generate the personality function reloc. It's assumed to |
1149 | 4 | // be a delta32 offset to a GOT entry. |
1150 | 4 | // FIXME: Parse the encoding and check this. |
1151 | 4 | int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); |
1152 | 4 | uint64_t funcAddress = ehFrameSection->address + offset + |
1153 | 4 | PersonalityFunctionField; |
1154 | 4 | funcAddress += funcDelta; |
1155 | 4 | |
1156 | 4 | const MachODefinedAtom *func = nullptr; |
1157 | 4 | Reference::Addend addend; |
1158 | 4 | func = findAtomCoveringAddress(normalizedFile, file, funcAddress, |
1159 | 4 | addend); |
1160 | 4 | atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), |
1161 | 4 | handler.unwindRefToPersonalityFunctionKind(), |
1162 | 4 | PersonalityFunctionField, func, addend); |
1163 | 4 | } |
1164 | 22 | } else if (15 atom->begin() != atom->end()15 ) { |
1165 | 0 | // Otherwise, we expect there to be no relocations in this atom as the only |
1166 | 0 | // relocation would have been to the personality function. |
1167 | 0 | return llvm::make_error<GenericError>("unexpected relocation in CIE"); |
1168 | 0 | } |
1169 | 22 | |
1170 | 22 | |
1171 | 22 | cieInfos[atom] = std::move(cieInfo); |
1172 | 22 | |
1173 | 22 | return llvm::Error::success(); |
1174 | 22 | } |
1175 | | |
1176 | | static llvm::Error processFDE(const NormalizedFile &normalizedFile, |
1177 | | MachOFile &file, |
1178 | | mach_o::ArchHandler &handler, |
1179 | | const Section *ehFrameSection, |
1180 | | MachODefinedAtom *atom, |
1181 | | uint64_t offset, |
1182 | 26 | const CIEInfoMap &cieInfos) { |
1183 | 26 | |
1184 | 26 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
1185 | 26 | const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); |
1186 | 26 | |
1187 | 26 | // Compiler wasn't lazy and actually told us what it meant. |
1188 | 26 | // Unfortunately, the compiler may not have generated references for all of |
1189 | 26 | // [cie, func, lsda] and so we still need to parse the FDE and add references |
1190 | 26 | // for any the compiler didn't generate. |
1191 | 26 | if (atom->begin() != atom->end()) |
1192 | 4 | atom->sortReferences(); |
1193 | 26 | |
1194 | 26 | DefinedAtom::reference_iterator CurrentRef = atom->begin(); |
1195 | 26 | |
1196 | 26 | // This helper returns the reference (if one exists) at the offset we are |
1197 | 26 | // currently processing. It automatically increments the ref iterator if we |
1198 | 26 | // do return a ref, and throws an error if we pass over a ref without |
1199 | 26 | // comsuming it. |
1200 | 26 | auto currentRefGetter = [&CurrentRef, |
1201 | 60 | &atom](uint64_t Offset)->const Reference* { |
1202 | 60 | // If there are no more refs found, then we are done. |
1203 | 60 | if (CurrentRef == atom->end()) |
1204 | 50 | return nullptr; |
1205 | 10 | |
1206 | 10 | const Reference *Ref = *CurrentRef; |
1207 | 10 | |
1208 | 10 | // If we haven't reached the offset for this reference, then return that |
1209 | 10 | // we don't yet have a reference to process. |
1210 | 10 | if (Offset < Ref->offsetInAtom()) |
1211 | 2 | return nullptr; |
1212 | 8 | |
1213 | 8 | // If the offset is equal, then we want to process this ref. |
1214 | 8 | if (8 Offset == Ref->offsetInAtom()8 ) { |
1215 | 8 | ++CurrentRef; |
1216 | 8 | return Ref; |
1217 | 8 | } |
1218 | 0 |
|
1219 | 0 | // The current ref is at an offset which is earlier than the current |
1220 | 0 | // offset, then we failed to consume it when we should have. In this case |
1221 | 0 | // throw an error. |
1222 | 0 | llvm::report_fatal_error("Skipped reference when processing FDE"); |
1223 | 0 | }; |
1224 | 26 | |
1225 | 26 | // Helper to either get the reference at this current location, and verify |
1226 | 26 | // that it is of the expected type, or add a reference of that type. |
1227 | 26 | // Returns the reference target. |
1228 | 26 | auto verifyOrAddReference = [&](uint64_t targetAddress, |
1229 | 26 | Reference::KindValue refKind, |
1230 | 26 | uint64_t refAddress, |
1231 | 60 | bool allowsAddend)->const Atom* { |
1232 | 60 | if (auto *ref60 = currentRefGetter(refAddress)) { |
1233 | 8 | // The compiler already emitted a relocation for the CIE ref. This should |
1234 | 8 | // have been converted to the correct type of reference in |
1235 | 8 | // get[Pair]ReferenceInfo(). |
1236 | 8 | assert(ref->kindValue() == refKind && |
1237 | 8 | "Incorrect EHFrame reference kind"); |
1238 | 8 | return ref->target(); |
1239 | 8 | } |
1240 | 52 | Reference::Addend addend; |
1241 | 52 | auto *target = findAtomCoveringAddress(normalizedFile, file, |
1242 | 52 | targetAddress, addend); |
1243 | 52 | atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), |
1244 | 52 | refKind, refAddress, target, addend); |
1245 | 52 | |
1246 | 52 | if (!allowsAddend) |
1247 | 60 | assert(!addend && "EHFrame reference cannot have addend"); |
1248 | 60 | return target; |
1249 | 60 | }; |
1250 | 26 | |
1251 | 26 | const uint8_t *startFrameData = atom->rawContent().data(); |
1252 | 26 | const uint8_t *frameData = startFrameData; |
1253 | 26 | |
1254 | 26 | uint32_t size = read32(frameData, isBig); |
1255 | 26 | uint64_t cieFieldInFDE = size == 0xffffffffU |
1256 | 0 | ? sizeof(uint32_t) + sizeof(uint64_t) |
1257 | 26 | : sizeof(uint32_t); |
1258 | 26 | |
1259 | 26 | // Linker needs to fixup a reference from the FDE to its parent CIE (a |
1260 | 26 | // 32-bit byte offset backwards in the __eh_frame section). |
1261 | 26 | uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); |
1262 | 26 | uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; |
1263 | 26 | cieAddress -= cieDelta; |
1264 | 26 | |
1265 | 26 | auto *cieRefTarget = verifyOrAddReference(cieAddress, |
1266 | 26 | handler.unwindRefToCIEKind(), |
1267 | 26 | cieFieldInFDE, false); |
1268 | 26 | const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); |
1269 | 26 | assert(cie && cie->contentType() == DefinedAtom::typeCFI && |
1270 | 26 | "FDE's CIE field does not point at the start of a CIE."); |
1271 | 26 | |
1272 | 26 | const CIEInfo &cieInfo = cieInfos.find(cie)->second; |
1273 | 26 | |
1274 | 26 | // Linker needs to fixup reference from the FDE to the function it's |
1275 | 26 | // describing. FIXME: there are actually different ways to do this, and the |
1276 | 26 | // particular method used is specified in the CIE's augmentation fields |
1277 | 26 | // (hopefully) |
1278 | 26 | uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); |
1279 | 26 | |
1280 | 26 | int64_t functionFromFDE = readSPtr(is64, isBig, |
1281 | 26 | frameData + rangeFieldInFDE); |
1282 | 26 | uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; |
1283 | 26 | rangeStart += functionFromFDE; |
1284 | 26 | |
1285 | 26 | verifyOrAddReference(rangeStart, |
1286 | 26 | handler.unwindRefToFunctionKind(), |
1287 | 26 | rangeFieldInFDE, true); |
1288 | 26 | |
1289 | 26 | // Handle the augmentation data if there is any. |
1290 | 26 | if (cieInfo._augmentationDataPresent26 ) { |
1291 | 26 | // First process the augmentation data length field. |
1292 | 26 | uint64_t augmentationDataLengthFieldInFDE = |
1293 | 26 | rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t)22 : sizeof(uint32_t)4 ); |
1294 | 26 | unsigned lengthFieldSize = 0; |
1295 | 26 | uint64_t augmentationDataLength = |
1296 | 26 | llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, |
1297 | 26 | &lengthFieldSize); |
1298 | 26 | |
1299 | 26 | if (cieInfo._offsetOfLSDA != ~0U && 26 augmentationDataLength > 08 ) { |
1300 | 8 | |
1301 | 8 | // Look at the augmentation data field. |
1302 | 8 | uint64_t augmentationDataFieldInFDE = |
1303 | 8 | augmentationDataLengthFieldInFDE + lengthFieldSize; |
1304 | 8 | |
1305 | 8 | int64_t lsdaFromFDE = readSPtr(is64, isBig, |
1306 | 8 | frameData + augmentationDataFieldInFDE); |
1307 | 8 | uint64_t lsdaStart = |
1308 | 8 | ehFrameSection->address + offset + augmentationDataFieldInFDE + |
1309 | 8 | lsdaFromFDE; |
1310 | 8 | |
1311 | 8 | verifyOrAddReference(lsdaStart, |
1312 | 8 | handler.unwindRefToFunctionKind(), |
1313 | 8 | augmentationDataFieldInFDE, true); |
1314 | 8 | } |
1315 | 26 | } |
1316 | 26 | |
1317 | 26 | return llvm::Error::success(); |
1318 | 26 | } |
1319 | | |
1320 | | llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, |
1321 | | MachOFile &file, |
1322 | 177 | mach_o::ArchHandler &handler) { |
1323 | 177 | |
1324 | 177 | const Section *ehFrameSection = nullptr; |
1325 | 177 | for (auto §ion : normalizedFile.sections) |
1326 | 311 | if (311 section.segmentName == "__TEXT" && |
1327 | 311 | section.sectionName == "__eh_frame"208 ) { |
1328 | 19 | ehFrameSection = §ion; |
1329 | 19 | break; |
1330 | 19 | } |
1331 | 177 | |
1332 | 177 | // No __eh_frame so nothing to do. |
1333 | 177 | if (!ehFrameSection) |
1334 | 158 | return llvm::Error::success(); |
1335 | 19 | |
1336 | 19 | llvm::Error ehFrameErr = llvm::Error::success(); |
1337 | 19 | CIEInfoMap cieInfos; |
1338 | 19 | |
1339 | 19 | file.eachAtomInSection(*ehFrameSection, |
1340 | 48 | [&](MachODefinedAtom *atom, uint64_t offset) -> void { |
1341 | 48 | assert(atom->contentType() == DefinedAtom::typeCFI); |
1342 | 48 | |
1343 | 48 | // Bail out if we've encountered an error. |
1344 | 48 | if (ehFrameErr) |
1345 | 0 | return; |
1346 | 48 | |
1347 | 48 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
1348 | 48 | if (ArchHandler::isDwarfCIE(isBig, atom)) |
1349 | 22 | ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, |
1350 | 22 | atom, offset, cieInfos); |
1351 | 48 | else |
1352 | 26 | ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, |
1353 | 26 | atom, offset, cieInfos); |
1354 | 48 | }); |
1355 | 177 | |
1356 | 177 | return ehFrameErr; |
1357 | 177 | } |
1358 | | |
1359 | | llvm::Error parseObjCImageInfo(const Section §, |
1360 | | const NormalizedFile &normalizedFile, |
1361 | 9 | MachOFile &file) { |
1362 | 9 | |
1363 | 9 | // struct objc_image_info { |
1364 | 9 | // uint32_t version; // initially 0 |
1365 | 9 | // uint32_t flags; |
1366 | 9 | // }; |
1367 | 9 | |
1368 | 9 | ArrayRef<uint8_t> content = sect.content; |
1369 | 9 | if (content.size() != 8) |
1370 | 1 | return llvm::make_error<GenericError>(sect.segmentName + "/" + |
1371 | 1 | sect.sectionName + |
1372 | 1 | " in file " + file.path() + |
1373 | 1 | " should be 8 bytes in size"); |
1374 | 8 | |
1375 | 8 | const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); |
1376 | 8 | uint32_t version = read32(content.data(), isBig); |
1377 | 8 | if (version) |
1378 | 1 | return llvm::make_error<GenericError>(sect.segmentName + "/" + |
1379 | 1 | sect.sectionName + |
1380 | 1 | " in file " + file.path() + |
1381 | 1 | " should have version=0"); |
1382 | 7 | |
1383 | 7 | uint32_t flags = read32(content.data() + 4, isBig); |
1384 | 7 | if (flags & (MachOLinkingContext::objc_supports_gc | |
1385 | 7 | MachOLinkingContext::objc_gc_only)) |
1386 | 1 | return llvm::make_error<GenericError>(sect.segmentName + "/" + |
1387 | 1 | sect.sectionName + |
1388 | 1 | " in file " + file.path() + |
1389 | 1 | " uses GC. This is not supported"); |
1390 | 6 | |
1391 | 6 | if (6 flags & MachOLinkingContext::objc_retainReleaseForSimulator6 ) |
1392 | 3 | file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); |
1393 | 6 | else |
1394 | 3 | file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); |
1395 | 9 | |
1396 | 9 | file.setSwiftVersion((flags >> 8) & 0xFF); |
1397 | 9 | |
1398 | 9 | return llvm::Error::success(); |
1399 | 9 | } |
1400 | | |
1401 | | /// Converts normalized mach-o file into an lld::File and lld::Atoms. |
1402 | | llvm::Expected<std::unique_ptr<lld::File>> |
1403 | | objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, |
1404 | 154 | bool copyRefs) { |
1405 | 154 | std::unique_ptr<MachOFile> file(new MachOFile(path)); |
1406 | 154 | if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) |
1407 | 4 | return std::move(ec); |
1408 | 150 | return std::unique_ptr<File>(std::move(file)); |
1409 | 150 | } |
1410 | | |
1411 | | llvm::Expected<std::unique_ptr<lld::File>> |
1412 | | dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, |
1413 | 112 | bool copyRefs) { |
1414 | 112 | // Instantiate SharedLibraryFile object. |
1415 | 112 | std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); |
1416 | 112 | if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) |
1417 | 0 | return std::move(ec); |
1418 | 112 | return std::unique_ptr<File>(std::move(file)); |
1419 | 112 | } |
1420 | | |
1421 | | } // anonymous namespace |
1422 | | |
1423 | | namespace normalized { |
1424 | | |
1425 | 310 | static bool isObjCImageInfo(const Section §) { |
1426 | 0 | return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || |
1427 | 310 | (sect.segmentName == "__DATA" && 310 sect.sectionName == "__objc_imageinfo"75 ); |
1428 | 310 | } |
1429 | | |
1430 | | llvm::Error |
1431 | | normalizedObjectToAtoms(MachOFile *file, |
1432 | | const NormalizedFile &normalizedFile, |
1433 | 181 | bool copyRefs) { |
1434 | 181 | DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " |
1435 | 181 | << file->path() << "\n"); |
1436 | 181 | bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); |
1437 | 181 | |
1438 | 181 | // Create atoms from each section. |
1439 | 322 | for (auto § : normalizedFile.sections) { |
1440 | 322 | |
1441 | 322 | // If this is a debug-info section parse it specially. |
1442 | 322 | if (isDebugInfoSection(sect)) |
1443 | 12 | continue; |
1444 | 310 | |
1445 | 310 | // If the file contains an objc_image_info struct, then we should parse the |
1446 | 310 | // ObjC flags and Swift version. |
1447 | 310 | if (310 isObjCImageInfo(sect)310 ) { |
1448 | 9 | if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) |
1449 | 3 | return ec; |
1450 | 6 | // We then skip adding atoms for this section as we use the ObjCPass to |
1451 | 6 | // re-emit this data after it has been aggregated for all files. |
1452 | 6 | continue; |
1453 | 6 | } |
1454 | 301 | |
1455 | 301 | bool customSectionName; |
1456 | 301 | DefinedAtom::ContentType atomType = atomTypeFromSection(sect, |
1457 | 301 | customSectionName); |
1458 | 301 | if (auto ec = processSection(atomType, sect, customSectionName, |
1459 | 301 | normalizedFile, *file, scatterable, copyRefs)) |
1460 | 0 | return ec; |
1461 | 178 | } |
1462 | 178 | // Create atoms from undefined symbols. |
1463 | 178 | for (auto &sym : normalizedFile.undefinedSymbols) 178 { |
1464 | 115 | // Undefinded symbols with n_value != 0 are actually tentative definitions. |
1465 | 115 | if (sym.value == Hex64(0)115 ) { |
1466 | 110 | file->addUndefinedAtom(sym.name, copyRefs); |
1467 | 115 | } else { |
1468 | 5 | file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, |
1469 | 5 | DefinedAtom::Alignment(1 << (sym.desc >> 8)), |
1470 | 5 | copyRefs); |
1471 | 5 | } |
1472 | 115 | } |
1473 | 178 | |
1474 | 178 | // Convert mach-o relocations to References |
1475 | 178 | std::unique_ptr<mach_o::ArchHandler> handler |
1476 | 178 | = ArchHandler::create(normalizedFile.arch); |
1477 | 319 | for (auto § : normalizedFile.sections) { |
1478 | 319 | if (isDebugInfoSection(sect)) |
1479 | 12 | continue; |
1480 | 307 | if (llvm::Error 307 ec307 = convertRelocs(sect, normalizedFile, scatterable, |
1481 | 307 | *file, *handler)) |
1482 | 1 | return ec; |
1483 | 177 | } |
1484 | 177 | |
1485 | 177 | // Add additional arch-specific References |
1486 | 177 | file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void 177 { |
1487 | 491 | handler->addAdditionalReferences(*atom); |
1488 | 491 | }); |
1489 | 177 | |
1490 | 177 | // Each __eh_frame section needs references to both __text (the function we're |
1491 | 177 | // providing unwind info for) and itself (FDE -> CIE). These aren't |
1492 | 177 | // represented in the relocations on some architectures, so we have to add |
1493 | 177 | // them back in manually there. |
1494 | 177 | if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) |
1495 | 0 | return ec; |
1496 | 177 | |
1497 | 177 | // Process mach-o data-in-code regions array. That information is encoded in |
1498 | 177 | // atoms as References at each transition point. |
1499 | 177 | unsigned nextIndex = 0; |
1500 | 28 | for (const DataInCode &entry : normalizedFile.dataInCode) { |
1501 | 28 | ++nextIndex; |
1502 | 28 | const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); |
1503 | 28 | if (!s28 ) { |
1504 | 0 | return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" |
1505 | 0 | + Twine(entry.offset) |
1506 | 0 | + ") is not in any section")); |
1507 | 0 | } |
1508 | 28 | uint64_t offsetInSect = entry.offset - s->address; |
1509 | 28 | uint32_t offsetInAtom; |
1510 | 28 | MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, |
1511 | 28 | &offsetInAtom); |
1512 | 28 | if (offsetInAtom + entry.length > atom->size()28 ) { |
1513 | 0 | return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " |
1514 | 0 | "(offset=" |
1515 | 0 | + Twine(entry.offset) |
1516 | 0 | + ", length=" |
1517 | 0 | + Twine(entry.length) |
1518 | 0 | + ") crosses atom boundary.")); |
1519 | 0 | } |
1520 | 28 | // Add reference that marks start of data-in-code. |
1521 | 28 | atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), |
1522 | 28 | handler->dataInCodeTransitionStart(*atom), |
1523 | 28 | offsetInAtom, atom, entry.kind); |
1524 | 28 | |
1525 | 28 | // Peek at next entry, if it starts where this one ends, skip ending ref. |
1526 | 28 | if (nextIndex < normalizedFile.dataInCode.size()28 ) { |
1527 | 23 | const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; |
1528 | 23 | if (nextEntry.offset == (entry.offset + entry.length)) |
1529 | 18 | continue; |
1530 | 10 | } |
1531 | 10 | |
1532 | 10 | // If data goes to end of function, skip ending ref. |
1533 | 10 | if (10 (offsetInAtom + entry.length) == atom->size()10 ) |
1534 | 2 | continue; |
1535 | 8 | |
1536 | 8 | // Add reference that marks end of data-in-code. |
1537 | 8 | atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), |
1538 | 8 | handler->dataInCodeTransitionEnd(*atom), |
1539 | 8 | offsetInAtom+entry.length, atom, 0); |
1540 | 8 | } |
1541 | 177 | |
1542 | 177 | // Cache some attributes on the file for use later. |
1543 | 177 | file->setFlags(normalizedFile.flags); |
1544 | 177 | file->setArch(normalizedFile.arch); |
1545 | 177 | file->setOS(normalizedFile.os); |
1546 | 177 | file->setMinVersion(normalizedFile.minOSverson); |
1547 | 177 | file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); |
1548 | 177 | |
1549 | 177 | // Sort references in each atom to their canonical order. |
1550 | 496 | for (const DefinedAtom* defAtom : file->defined()) { |
1551 | 496 | reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); |
1552 | 496 | } |
1553 | 177 | |
1554 | 177 | if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) |
1555 | 0 | return err; |
1556 | 177 | |
1557 | 177 | return llvm::Error::success(); |
1558 | 177 | } |
1559 | | |
1560 | | llvm::Error |
1561 | | normalizedDylibToAtoms(MachODylibFile *file, |
1562 | | const NormalizedFile &normalizedFile, |
1563 | 116 | bool copyRefs) { |
1564 | 116 | file->setInstallName(normalizedFile.installName); |
1565 | 116 | file->setCompatVersion(normalizedFile.compatVersion); |
1566 | 116 | file->setCurrentVersion(normalizedFile.currentVersion); |
1567 | 116 | |
1568 | 116 | // Tell MachODylibFile object about all symbols it exports. |
1569 | 116 | if (!normalizedFile.exportInfo.empty()116 ) { |
1570 | 109 | // If exports trie exists, use it instead of traditional symbol table. |
1571 | 146 | for (const Export &exp : normalizedFile.exportInfo) { |
1572 | 146 | bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); |
1573 | 146 | // StringRefs from export iterator are ephemeral, so force copy. |
1574 | 146 | file->addExportedSymbol(exp.name, weakDef, true); |
1575 | 146 | } |
1576 | 116 | } else { |
1577 | 15 | for (auto &sym : normalizedFile.globalSymbols) { |
1578 | 15 | assert((sym.scope & N_EXT) && "only expect external symbols here"); |
1579 | 15 | bool weakDef = (sym.desc & N_WEAK_DEF); |
1580 | 15 | file->addExportedSymbol(sym.name, weakDef, copyRefs); |
1581 | 15 | } |
1582 | 7 | } |
1583 | 116 | // Tell MachODylibFile object about all dylibs it re-exports. |
1584 | 5 | for (const DependentDylib &dep : normalizedFile.dependentDylibs) { |
1585 | 5 | if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) |
1586 | 1 | file->addReExportedDylib(dep.path); |
1587 | 5 | } |
1588 | 116 | return llvm::Error::success(); |
1589 | 116 | } |
1590 | | |
1591 | | void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, |
1592 | | StringRef &segmentName, |
1593 | | StringRef §ionName, |
1594 | | SectionType §ionType, |
1595 | | SectionAttr §ionAttrs, |
1596 | 138 | bool &relocsToDefinedCanBeImplicit) { |
1597 | 138 | |
1598 | 138 | for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; |
1599 | 1.26k | p->atomType != DefinedAtom::typeUnknown1.26k ; ++p1.12k ) { |
1600 | 1.26k | if (p->atomType != atomType) |
1601 | 1.12k | continue; |
1602 | 138 | // Wild carded entries are ignored for reverse lookups. |
1603 | 138 | if (138 p->segmentName.empty() || 138 p->sectionName.empty()138 ) |
1604 | 0 | continue; |
1605 | 138 | segmentName = p->segmentName; |
1606 | 138 | sectionName = p->sectionName; |
1607 | 138 | sectionType = p->sectionType; |
1608 | 138 | sectionAttrs = 0; |
1609 | 138 | relocsToDefinedCanBeImplicit = false; |
1610 | 138 | if (atomType == DefinedAtom::typeCode) |
1611 | 60 | sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; |
1612 | 138 | if (atomType == DefinedAtom::typeCFI) |
1613 | 10 | relocsToDefinedCanBeImplicit = true; |
1614 | 1.26k | return; |
1615 | 1.26k | } |
1616 | 0 | llvm_unreachable0 ("content type not yet supported"); |
1617 | 0 | } |
1618 | | |
1619 | | llvm::Expected<std::unique_ptr<lld::File>> |
1620 | | normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, |
1621 | 266 | bool copyRefs) { |
1622 | 266 | switch (normalizedFile.fileType) { |
1623 | 112 | case MH_DYLIB: |
1624 | 112 | case MH_DYLIB_STUB: |
1625 | 112 | return dylibToAtoms(normalizedFile, path, copyRefs); |
1626 | 154 | case MH_OBJECT: |
1627 | 154 | return objectToAtoms(normalizedFile, path, copyRefs); |
1628 | 0 | default: |
1629 | 0 | llvm_unreachable("unhandled MachO file type!"); |
1630 | 0 | } |
1631 | 0 | } |
1632 | | |
1633 | | } // namespace normalized |
1634 | | } // namespace mach_o |
1635 | | } // namespace lld |