Skip to content

Commit a3d95a5

Browse files
int3smeenai
authored andcommitted
[lld-macho] Add basic symbol table output
This diff implements basic support for writing a symbol table. - Attributes are loosely supported for extern symbols and not at all for other types Immediate future work will involve implementing section merging. Initial version by Kellie Medlin <kelliem@fb.com> Differential Revision: https://reviews.llvm.org/D76742
1 parent e4872d7 commit a3d95a5

File tree

4 files changed

+176
-2
lines changed

4 files changed

+176
-2
lines changed

lld/MachO/SyntheticSections.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "SyntheticSections.h"
1010
#include "InputFiles.h"
1111
#include "OutputSegment.h"
12+
#include "SymbolTable.h"
1213
#include "Symbols.h"
1314
#include "Writer.h"
1415

@@ -128,6 +129,59 @@ void BindingSection::writeTo(uint8_t *buf) {
128129
memcpy(buf, contents.data(), contents.size());
129130
}
130131

132+
SymtabSection::SymtabSection(StringTableSection &stringTableSection)
133+
: stringTableSection(stringTableSection) {
134+
segname = segment_names::linkEdit;
135+
name = section_names::symbolTable;
136+
}
137+
138+
size_t SymtabSection::getSize() const {
139+
return symbols.size() * sizeof(nlist_64);
140+
}
141+
142+
void SymtabSection::finalizeContents() {
143+
// TODO: We should filter out some symbols.
144+
for (Symbol *sym : symtab->getSymbols())
145+
symbols.push_back({sym, stringTableSection.addString(sym->getName())});
146+
}
147+
148+
void SymtabSection::writeTo(uint8_t *buf) {
149+
auto *nList = reinterpret_cast<nlist_64 *>(buf);
150+
for (const SymtabEntry &entry : symbols) {
151+
// TODO support other symbol types
152+
// TODO populate n_desc
153+
if (auto defined = dyn_cast<Defined>(entry.sym)) {
154+
nList->n_strx = entry.strx;
155+
nList->n_type = N_EXT | N_SECT;
156+
nList->n_sect = defined->isec->sectionIndex;
157+
// For the N_SECT symbol type, n_value is the address of the symbol
158+
nList->n_value = defined->value + defined->isec->addr;
159+
}
160+
161+
++nList;
162+
}
163+
}
164+
165+
StringTableSection::StringTableSection() {
166+
segname = segment_names::linkEdit;
167+
name = section_names::stringTable;
168+
}
169+
170+
uint32_t StringTableSection::addString(StringRef str) {
171+
uint32_t strx = size;
172+
strings.push_back(str);
173+
size += str.size() + 1; // account for null terminator
174+
return strx;
175+
}
176+
177+
void StringTableSection::writeTo(uint8_t *buf) {
178+
uint32_t off = 0;
179+
for (StringRef str : strings) {
180+
memcpy(buf + off, str.data(), str.size());
181+
off += str.size() + 1; // account for null terminator
182+
}
183+
}
184+
131185
InStruct in;
132186

133187
} // namespace macho

lld/MachO/SyntheticSections.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ namespace section_names {
2323
constexpr const char *pageZero = "__pagezero";
2424
constexpr const char *header = "__mach_header";
2525
constexpr const char *binding = "__binding";
26+
constexpr const char *symbolTable = "__symbol_table";
27+
constexpr const char *stringTable = "__string_table";
2628

2729
} // namespace section_names
2830

@@ -93,6 +95,49 @@ class BindingSection : public InputSection {
9395
SmallVector<char, 128> contents;
9496
};
9597

98+
// Stores the strings referenced by the symbol table.
99+
class StringTableSection : public InputSection {
100+
public:
101+
StringTableSection();
102+
// Returns the start offset of the added string.
103+
uint32_t addString(StringRef);
104+
size_t getSize() const override { return size; }
105+
// Like other sections in __LINKEDIT, the string table section is special: its
106+
// offsets are recorded in the LC_SYMTAB load command, instead of in section
107+
// headers.
108+
bool isHidden() const override { return true; }
109+
void writeTo(uint8_t *buf) override;
110+
111+
private:
112+
// An n_strx value of 0 always indicates the empty string, so we must locate
113+
// our non-empty string values at positive offsets in the string table.
114+
// Therefore we insert a dummy value at position zero.
115+
std::vector<StringRef> strings{"\0"};
116+
size_t size = 1;
117+
};
118+
119+
struct SymtabEntry {
120+
Symbol *sym;
121+
size_t strx;
122+
};
123+
124+
class SymtabSection : public InputSection {
125+
public:
126+
SymtabSection(StringTableSection &);
127+
void finalizeContents();
128+
size_t getNumSymbols() const { return symbols.size(); }
129+
size_t getSize() const override;
130+
// Like other sections in __LINKEDIT, the symtab section is special: its
131+
// offsets are recorded in the LC_SYMTAB load command, instead of in section
132+
// headers.
133+
bool isHidden() const override { return true; }
134+
void writeTo(uint8_t *buf) override;
135+
136+
private:
137+
StringTableSection &stringTableSection;
138+
std::vector<SymtabEntry> symbols;
139+
};
140+
96141
struct InStruct {
97142
GotSection *got = nullptr;
98143
};

lld/MachO/Writer.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ class Writer {
5252
uint64_t fileOff = 0;
5353
MachHeaderSection *headerSection = nullptr;
5454
BindingSection *bindingSection = nullptr;
55+
SymtabSection *symtabSection = nullptr;
56+
StringTableSection *stringTableSection = nullptr;
5557
};
5658

5759
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -163,13 +165,23 @@ class LCMain : public LoadCommand {
163165

164166
class LCSymtab : public LoadCommand {
165167
public:
168+
LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
169+
: symtabSection(symtabSection), stringTableSection(stringTableSection) {}
170+
166171
uint32_t getSize() const override { return sizeof(symtab_command); }
167172

168173
void writeTo(uint8_t *buf) const override {
169174
auto *c = reinterpret_cast<symtab_command *>(buf);
170175
c->cmd = LC_SYMTAB;
171176
c->cmdsize = getSize();
177+
c->symoff = symtabSection->getFileOffset();
178+
c->nsyms = symtabSection->getNumSymbols();
179+
c->stroff = stringTableSection->getFileOffset();
180+
c->strsize = stringTableSection->getFileSize();
172181
}
182+
183+
SymtabSection *symtabSection = nullptr;
184+
StringTableSection *stringTableSection = nullptr;
173185
};
174186

175187
class LCLoadDylib : public LoadCommand {
@@ -238,7 +250,12 @@ class SectionComparator {
238250
{defaultPosition, {}},
239251
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
240252
// sections must be ordered after other sections).
241-
{segment_names::linkEdit, {section_names::binding}},
253+
{segment_names::linkEdit,
254+
{
255+
section_names::binding,
256+
section_names::symbolTable,
257+
section_names::stringTable,
258+
}},
242259
};
243260

244261
for (uint32_t i = 0, n = ordering.size(); i < n; ++i) {
@@ -294,7 +311,8 @@ void Writer::scanRelocations() {
294311
void Writer::createLoadCommands() {
295312
headerSection->addLoadCommand(make<LCDyldInfo>(bindingSection));
296313
headerSection->addLoadCommand(make<LCLoadDylinker>());
297-
headerSection->addLoadCommand(make<LCSymtab>());
314+
headerSection->addLoadCommand(
315+
make<LCSymtab>(symtabSection, stringTableSection));
298316
headerSection->addLoadCommand(make<LCDysymtab>());
299317
headerSection->addLoadCommand(make<LCMain>());
300318

@@ -323,6 +341,8 @@ void Writer::createLoadCommands() {
323341
void Writer::createHiddenSections() {
324342
headerSection = createInputSection<MachHeaderSection>();
325343
bindingSection = createInputSection<BindingSection>();
344+
stringTableSection = createInputSection<StringTableSection>();
345+
symtabSection = createInputSection<SymtabSection>(*stringTableSection);
326346
createInputSection<PageZeroSection>();
327347
}
328348

@@ -405,6 +425,7 @@ void Writer::run() {
405425

406426
// Fill __LINKEDIT contents.
407427
bindingSection->finalizeContents();
428+
symtabSection->finalizeContents();
408429

409430
// Now that __LINKEDIT is filled out, do a proper calculation of its
410431
// addresses and offsets. We don't have to recalculate the other segments

lld/test/MachO/symtab.s

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# REQUIRES: x86
2+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
3+
# RUN: lld -flavor darwinnew -o %t %t.o
4+
# RUN: llvm-readobj -symbols %t | FileCheck %s
5+
6+
# CHECK: Symbols [
7+
# CHECK-NEXT: Symbol {
8+
# CHECK-NEXT: Name: _main
9+
# CHECK-NEXT: Extern
10+
# CHECK-NEXT: Type: Section (0xE)
11+
# CHECK-NEXT: Section: __text (0x1)
12+
# CHECK-NEXT: RefType:
13+
# CHECK-NEXT: Flags [ (0x0)
14+
# CHECK-NEXT: ]
15+
# CHECK-NEXT: Value:
16+
# CHECK-NEXT: }
17+
# CHECK-NEXT: Symbol {
18+
# CHECK-NEXT: Name: bar
19+
# CHECK-NEXT: Extern
20+
# CHECK-NEXT: Type: Section (0xE)
21+
# CHECK-NEXT: Section: __text (0x1)
22+
# CHECK-NEXT: RefType:
23+
# CHECK-NEXT: Flags [ (0x0)
24+
# CHECK-NEXT: ]
25+
# CHECK-NEXT: Value:
26+
# CHECK-NEXT: }
27+
# CHECK-NEXT: Symbol {
28+
# CHECK-NEXT: Name: foo
29+
# CHECK-NEXT: Extern
30+
# CHECK-NEXT: Type: Section (0xE)
31+
# CHECK-NEXT: Section: __data
32+
# CHECK-NEXT: RefType:
33+
# CHECK-NEXT: Flags [ (0x0)
34+
# CHECK-NEXT: ]
35+
# CHECK-NEXT: Value:
36+
# CHECK-NEXT: }
37+
# CHECK-NEXT: ]
38+
39+
.data
40+
.global foo
41+
foo:
42+
.asciz "Hello world!\n"
43+
44+
.text
45+
.global bar
46+
.global _main
47+
48+
_main:
49+
mov $0, %rax
50+
ret
51+
52+
bar:
53+
mov $2, %rax
54+
ret

0 commit comments

Comments
 (0)