-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[llvm-objcopy][COFF] Update .symidx values after stripping #153322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
After deleting debug sections symbol indexes are shifted but WinCFGuard sections encode these indices into section data that is completely ignored. Update symbol indices as well.
@llvm/pr-subscribers-llvm-binary-utilities Author: Evgenii Kudriashov (e-kud) ChangesAfter deleting debug sections symbol indexes are shifted but WinCFGuard sections encode these indices into section data that is completely ignored. Update symbol indices as well. Full diff: https://github.com/llvm/llvm-project/pull/153322.diff 5 Files Affected:
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
index 5fa13391c908f..fcb1bbfe91332 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
@@ -16,8 +16,11 @@ namespace coff {
using namespace object;
void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
+ size_t RawIndex = 0;
for (Symbol S : NewSymbols) {
S.UniqueId = NextSymbolUniqueId++;
+ S.OriginalRawIndex = RawIndex;
+ RawIndex += 1 + S.Sym.NumberOfAuxSymbols;
Symbols.emplace_back(S);
}
updateSymbols();
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h b/llvm/lib/ObjCopy/COFF/COFFObject.h
index cdd1f17fc6055..9f88d45962513 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.h
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.h
@@ -89,6 +89,7 @@ struct Symbol {
std::optional<size_t> WeakTargetSymbolId;
size_t UniqueId;
size_t RawIndex;
+ size_t OriginalRawIndex;
bool Referenced;
};
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
index 350c4aec572c9..4e3aa8201f0dc 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
@@ -92,6 +93,63 @@ Error COFFWriter::finalizeSymbolContents() {
return Error::success();
}
+Error COFFWriter::finalizeCFGuardContents() {
+ DenseMap<size_t, size_t> SymIdMap;
+ bool NeedUpdate = false;
+ for (Symbol &Sym : Obj.getMutableSymbols()) {
+ NeedUpdate |= Sym.OriginalRawIndex == Sym.RawIndex;
+ SymIdMap[Sym.OriginalRawIndex] = Sym.RawIndex;
+ }
+
+ if (!NeedUpdate)
+ return Error::success();
+
+ for (auto &Sym : Obj.getMutableSymbols()) {
+ if (Sym.Name != ".gljmp$y" && Sym.Name != ".giats$y" &&
+ Sym.Name != ".gfids$y")
+ continue;
+
+ auto Sec = find_if(Obj.getMutableSections(),
+ [&Sym](Section &S) { return S.Name == Sym.Name; });
+
+ if (Sec == Obj.getMutableSections().end() ||
+ Sec->UniqueId != Sym.TargetSectionId)
+ return createStringError(object_error::invalid_symbol_index,
+ "symbol '%s' is missing its section",
+ Sym.Name.str().c_str());
+
+ if (Sym.Sym.NumberOfAuxSymbols != 1 ||
+ Sym.Sym.StorageClass != IMAGE_SYM_CLASS_STATIC)
+ return createStringError(object_error::invalid_symbol_index,
+ "symbol '%s' has unexpected section format",
+ Sym.Name.str().c_str());
+
+ ArrayRef<uint8_t> RawIds = Sec->getContents();
+ // Nothing to do and also CheckSum will be -1 instead of 0 if we recalculate
+ // it on empty input.
+ if (RawIds.size() == 0)
+ return Error::success();
+
+ // Create updated content
+ ArrayRef<uint32_t> Ids(reinterpret_cast<const uint32_t *>(RawIds.data()),
+ RawIds.size() / 4);
+ std::vector<uint32_t> NewIds;
+ for (auto Id : Ids)
+ NewIds.push_back(SymIdMap[Id]);
+ ArrayRef<uint8_t> NewRawIds(reinterpret_cast<uint8_t *>(NewIds.data()),
+ RawIds.size());
+ // Update check sum
+ JamCRC JC(/*Init=*/0);
+ JC.update(NewRawIds);
+ coff_aux_section_definition *SD =
+ reinterpret_cast<coff_aux_section_definition *>(Sym.AuxData[0].Opaque);
+ SD->CheckSum = JC.getCRC();
+ // Set new content
+ Sec->setOwnedContents(NewRawIds);
+ }
+ return Error::success();
+}
+
void COFFWriter::layoutSections() {
for (auto &S : Obj.getMutableSections()) {
if (S.Header.SizeOfRawData > 0)
@@ -183,6 +241,8 @@ Error COFFWriter::finalize(bool IsBigObj) {
return E;
if (Error E = finalizeSymbolContents())
return E;
+ if (Error E = finalizeCFGuardContents())
+ return E;
size_t SizeOfHeaders = 0;
FileAlignment = 1;
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h
index b7dca69e9a81a..557dbe4c01b9c 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.h
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h
@@ -34,6 +34,7 @@ class COFFWriter {
template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
Error finalizeRelocTargets();
Error finalizeSymbolContents();
+ Error finalizeCFGuardContents();
void layoutSections();
Expected<size_t> finalizeStringTable();
diff --git a/llvm/test/tools/llvm-objcopy/COFF/strip-update-winguards.test b/llvm/test/tools/llvm-objcopy/COFF/strip-update-winguards.test
new file mode 100644
index 0000000000000..4dc1a821dbd6a
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/COFF/strip-update-winguards.test
@@ -0,0 +1,237 @@
+# RUN: yaml2obj %s -o %t.in.o
+
+# RUN: llvm-readobj -r -s -x '.gfids$y' -x '.giats$y' -x '.gljmp$y' %t.in.o | FileCheck %s --check-prefix=ORIG
+# RUN: llvm-objcopy --strip-debug %t.in.o %t.out.o
+# RUN: llvm-readobj -r -s -x '.gfids$y' -x '.giats$y' -x '.gljmp$y' %t.out.o | FileCheck %s --check-prefix=STRIP
+
+# ORIG: Relocations [
+# ORIG-NEXT: Section (1) .text {
+# ORIG-NEXT: 0x3 IMAGE_REL_AMD64_REL32 foo (14)
+# ORIG-NEXT: 0xA IMAGE_REL_AMD64_REL32 bar (15)
+# ORIG-NEXT: 0x11 IMAGE_REL_AMD64_REL32 baz (16)
+# ORIG-NEXT: 0x18 IMAGE_REL_AMD64_REL32 foobar (17)
+# ORIG-NEXT: }
+# ORIG-NEXT: ]
+# ORIG: Symbols [
+# ORIG: Name: .gfids$y
+# ORIG: Section: .gfids$y
+# ORIG: AuxSymbolCount: 1
+# ORIG: AuxSectionDef {
+# ORIG: Checksum: 0x459345AD
+# ORIG: }
+# ORIG: Name: .giats$y
+# ORIG: Section: .giats$y
+# ORIG: AuxSymbolCount: 1
+# ORIG: AuxSectionDef {
+# ORIG: Checksum: 0x31852256
+# ORIG: }
+# ORIG: Name: .gljmp$y
+# ORIG: Section: .gljmp$y
+# ORIG: AuxSymbolCount: 1
+# ORIG: AuxSectionDef {
+# ORIG: Checksum: 0xC608680B
+# ORIG: }
+# ORIG: ]
+# ORIG: Hex dump of section '.gfids$y':
+# ORIG-NEXT: 0x00000000 0e000000 10000000 ........
+# ORIG: Hex dump of section '.giats$y':
+# ORIG-NEXT: 0x00000000 0f000000 11000000 ........
+# ORIG: Hex dump of section '.gljmp$y':
+# ORIG-NEXT: 0x00000000 0e000000 0f000000 10000000 11000000 ................
+
+# STRIP: Relocations [
+# STRIP-NEXT: Section (1) .text {
+# STRIP-NEXT: 0x3 IMAGE_REL_AMD64_REL32 foo (12)
+# STRIP-NEXT: 0xA IMAGE_REL_AMD64_REL32 bar (13)
+# STRIP-NEXT: 0x11 IMAGE_REL_AMD64_REL32 baz (14)
+# STRIP-NEXT: 0x18 IMAGE_REL_AMD64_REL32 foobar (15)
+# STRIP-NEXT: }
+# STRIP-NEXT: ]
+# STRIP: Symbols [
+# STRIP: Name: .gfids$y
+# STRIP: Section: .gfids$y
+# STRIP: AuxSymbolCount: 1
+# STRIP: AuxSectionDef {
+# STRIP: Checksum: 0xB770627C
+# STRIP: }
+# STRIP: Name: .giats$y
+# STRIP: Section: .giats$y
+# STRIP: AuxSymbolCount: 1
+# STRIP: AuxSectionDef {
+# STRIP: Checksum: 0xC3660587
+# STRIP: }
+# STRIP: Name: .gljmp$y
+# STRIP: Section: .gljmp$y
+# STRIP: AuxSymbolCount: 1
+# STRIP: AuxSectionDef {
+# STRIP: Checksum: 0x7464D042
+# STRIP: }
+# STRIP: ]
+# STRIP: Hex dump of section '.gfids$y':
+# STRIP-NEXT: 0x00000000 0c000000 0e000000 ........
+# STRIP: Hex dump of section '.giats$y':
+# STRIP-NEXT: 0x00000000 0d000000 0f000000 ........
+# STRIP: Hex dump of section '.gljmp$y':
+# STRIP-NEXT: 0x00000000 0c000000 0d000000 0e000000 0f000000 ................
+
+--- !COFF
+header:
+ Machine: IMAGE_FILE_MACHINE_AMD64
+ Characteristics: [ ]
+sections:
+ - Name: .text
+ Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+ Alignment: 4
+ SectionData: 488D0500000000488D0D00000000488D0500000000488D0500000000
+ SizeOfRawData: 28
+ Relocations:
+ - VirtualAddress: 3
+ SymbolName: foo
+ Type: IMAGE_REL_AMD64_REL32
+ - VirtualAddress: 10
+ SymbolName: bar
+ Type: IMAGE_REL_AMD64_REL32
+ - VirtualAddress: 17
+ SymbolName: baz
+ Type: IMAGE_REL_AMD64_REL32
+ - VirtualAddress: 24
+ SymbolName: foobar
+ Type: IMAGE_REL_AMD64_REL32
+ - Name: .data
+ Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+ Alignment: 4
+ SectionData: ''
+ - Name: .bss
+ Characteristics: [ IMAGE_SCN_CNT_UNINITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+ Alignment: 4
+ SectionData: ''
+ - Name: '.debug$S'
+ Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+ Alignment: 4
+ SectionData: 04000000F100000044656275672073656374696F6E20746F20626520737472697070656400
+ SizeOfRawData: 37
+ - Name: '.gfids$y'
+ Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+ Alignment: 4
+ SectionData: '0E00000010000000'
+ SizeOfRawData: 8
+ - Name: '.giats$y'
+ Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+ Alignment: 4
+ SectionData: 0F00000011000000
+ SizeOfRawData: 8
+ - Name: '.gljmp$y'
+ Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+ Alignment: 4
+ SectionData: 0E0000000F0000001000000011000000
+ SizeOfRawData: 16
+symbols:
+ - Name: .text
+ Value: 0
+ SectionNumber: 1
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 28
+ NumberOfRelocations: 4
+ NumberOfLinenumbers: 0
+ CheckSum: 3583480811
+ Number: 1
+ - Name: .data
+ Value: 0
+ SectionNumber: 2
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 0
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum: 0
+ Number: 2
+ - Name: .bss
+ Value: 0
+ SectionNumber: 3
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 0
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum: 0
+ Number: 3
+ - Name: '.debug$S'
+ Value: 0
+ SectionNumber: 4
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 37
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum: 2941632545
+ Number: 4
+ - Name: '.gfids$y'
+ Value: 0
+ SectionNumber: 5
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 8
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum: 1167279533
+ Number: 5
+ - Name: '.giats$y'
+ Value: 0
+ SectionNumber: 6
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 8
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum: 830808662
+ Number: 6
+ - Name: '.gljmp$y'
+ Value: 0
+ SectionNumber: 7
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_STATIC
+ SectionDefinition:
+ Length: 16
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum: 3322439691
+ Number: 7
+ - Name: foo
+ Value: 0
+ SectionNumber: 0
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+ - Name: bar
+ Value: 0
+ SectionNumber: 0
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+ - Name: baz
+ Value: 0
+ SectionNumber: 0
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+ - Name: foobar
+ Value: 0
+ SectionNumber: 0
+ SimpleType: IMAGE_SYM_TYPE_NULL
+ ComplexType: IMAGE_SYM_DTYPE_NULL
+ StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+...
|
It is still not a universal solution, as actually |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's been a while since I've looked at the details of the COFF Object class, so this probably won't be all that straightforward, but rather than have lots of finalize* methods that specifically target different features that need updating, I wonder if it would be possible to have a finalize virtual method on the section class? The default would do nothing, but certain Section subclasses could then define a different behaviour, specific to the section kind, which takes care of updating references etc.
llvm/lib/ObjCopy/COFF/COFFWriter.cpp
Outdated
continue; | ||
|
||
auto Sec = find_if(Obj.getMutableSections(), | ||
[&Sym](Section &S) { return S.Name == Sym.Name; }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It feels somewhat backwards that we iterate over the symbols, in order to later iterate over sections to find the matching section. But I guess you do need both if you are going to update the section CRC at the end. (Does anything actually use the CRC? I don't think I've seen anything actually touching it anywhere.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was my first version, then I copied the implementation from finalizeSymbolContents
that gets coff_aux_section_definition
. So, this is how I came up with the backward version. What if we build a StringMap
from Symbol name to coff_aux_section_definition
while collecting id mapping (basically three entries at max)? Then we can iterate through sections and we know where Checksum
should be written.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Making a mapping from section to symbol would be good here, yes.
However - do note that an object file can have multiple symbols with the same name. Only extern symbols are unique - non-extern symbols can be have collisions. So looking up a symbol based on a section name doesn't generally work.
As an example:
void func1(void) {}
void func2(void) {}
$ clang -target x86_64-windows-msvc -c test.c -ffunction-sections
$ obj2yaml test.o
- Name: .text
Value: 0
SectionNumber: 4
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
SectionDefinition:
Length: 1
NumberOfRelocations: 0
NumberOfLinenumbers: 0
CheckSum: 40735498
Number: 4
Selection: IMAGE_COMDAT_SELECT_NODUPLICATES
- Name: func1
Value: 0
SectionNumber: 4
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_FUNCTION
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
- Name: .text
Value: 0
SectionNumber: 5
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_NULL
StorageClass: IMAGE_SYM_CLASS_STATIC
SectionDefinition:
Length: 1
NumberOfRelocations: 0
NumberOfLinenumbers: 0
CheckSum: 40735498
Number: 5
Selection: IMAGE_COMDAT_SELECT_NODUPLICATES
- Name: func2
Value: 0
SectionNumber: 5
SimpleType: IMAGE_SYM_TYPE_NULL
ComplexType: IMAGE_SYM_DTYPE_FUNCTION
StorageClass: IMAGE_SYM_CLASS_EXTERNAL
It's unlikely that you'd have multiple duplicate symbols/sections for these CFG sections - but it sets a bad precedent if we have code in objcopy that does incorrect assumptions on how you can look things up.
The only right way to map a section to its section definition is by looking at the SectionNumber
field - looking for a symbol with the right StorageClass and number of aux symbols.
So for that, perhaps it's simplest to either iterate over all symbols, pick up all section definitions (ones with num aux == 1 and IMAGE_SYM_CLASS_STATIC I guess), put that in a map with SectionNumber->Symbol, and look up in that when operating on/iterating over sections?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented as a mapping of symbols with the specified names, section definition and zero offset from section id to its section definition.
llvm/lib/ObjCopy/COFF/COFFWriter.cpp
Outdated
[&Sym](Section &S) { return S.Name == Sym.Name; }); | ||
|
||
if (Sec == Obj.getMutableSections().end() || | ||
Sec->UniqueId != Sym.TargetSectionId) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we bother to look up the section from the symbol, it would feel more logical to identify the section only based on Sym.TargetSectionId
rather than the section name. But I guess that both should match anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently we match symbols using Sec.UniqueId
in a map formed from Sym.TargetSectionId
. Should be resolved.
llvm/lib/ObjCopy/COFF/COFFWriter.cpp
Outdated
"symbol '%s' has unexpected section format", | ||
Sym.Name.str().c_str()); | ||
|
||
ArrayRef<uint8_t> RawIds = Sec->getContents(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that if we resolve the section from the symbol, it's possible that the symbol contains an offset into the section as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is interesting, I haven't expected to see a union in Name
. So, in current version, when Symbol
is matched through the Name
, it means that Zeroes
are not zero, and this is not an offset. I think it is OK.
struct StringTableOffset {
support::ulittle32_t Zeroes;
support::ulittle32_t Offset;
};
template <typename SectionNumberType>
struct coff_symbol {
union {
char ShortName[COFF::NameSize];
StringTableOffset Offset;
} Name;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's not what I meant with offset. The string-or-offset aspect of the name is not an issue here.
What I meant was, we have this:
struct coff_symbol {
union { ... } Name;
support::ulittle32_t Value;
SectionNumberType SectionNumber;
A symbol in a COFF object points at a section number, and Value
is an offset into that section. E.g. when you have one large .text
section and a function is at a specific offset within that section.
For section definition symbols, with coff_aux_section_definition
, it is of course most common that they have a zero offset and point to the start of the section. (I'm not sure if it makes sense to have a section definition symbol pointing at an offset within the section.) But it would be good to verify this, because if the symbol has got a nonzero Value
here, it really means that it points at an offset into the section.
Now if the linker doesn't really use the symbol at all, but just inspects the whole contents of the section, it probably doesn't matter either way. But in the current form, where we primarily iterate over symbols and use that to look up a section, it gives an impression that almost any symbol would be ok.
Anyway, if you add a Value != 0
to the check where you currently check Sym.Sym.NumberOfAuxSymbols != 1 || Sym.Sym.StorageClass != IMAGE_SYM_CLASS_STATIC
, then it's probably fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got the idea, thank you!
llvm/lib/ObjCopy/COFF/COFFWriter.cpp
Outdated
RawIds.size() / 4); | ||
std::vector<uint32_t> NewIds; | ||
for (auto Id : Ids) | ||
NewIds.push_back(SymIdMap[Id]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here we really do need to check if the potentially totally garbage numbers we read from the section actually map to real symbols. It's also possible that they existed in the original symbol table but those symbols have been removed - we need to account for that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want to emit an error here or silently leave it as it is? The origin of the the initial problem is that MSVC linker complains on the corrupted object file. So we probably can complain as well.
SymIdMap
is formed after stripping. So we actually can't say whether it is a garbage value or a removed one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Silently leaving it as is would be bad, as it would essentially be a broken object file - as you say the linker would then complain about it later.
I guess ideally, if these sections simply are listings of symbols, and some of those symbols no longer exist, then we should just remove them from the listings. That would then require shrinking the section contents.
Initially it's probably fine to just error out on this case, but in the theoretical best case I guess we'd drop them from the listing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a check and a test for it.
I don't think that design would really be helpful here. To do that, we would need to identify the kind of section for each input section, to pick the right subclass, to give it the right behaviour for the finalize virtual method. In this case, the So I don't think that design pattern would be beneficial here at all, it would just add a bunch of extra complexity elsewhere. I think the suggested design is reasonable here, while I have a couple of comments on the implementation details. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Going through places where we use emitCOFFSymbolIndex() in LLVM, there's also |
This is true. |
llvm/lib/ObjCopy/COFF/COFFObject.cpp
Outdated
@@ -16,8 +16,11 @@ namespace coff { | |||
using namespace object; | |||
|
|||
void Object::addSymbols(ArrayRef<Symbol> NewSymbols) { | |||
size_t RawIndex = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mstorsjo I've noticed that this approach is error-prone as if addSymbols
invoked twice, symbols will have duplicated OriginalRawIndex
(It doesn't happen now). Looks like we should have this variable as a class member.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see this as a big concern though - we probably only ever call addSymbols
once. But I don't mind making RawIndex
a class member indeed, if it avoids the risk of an issue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Created a NextSymbolOriginalIndex
similarly to NextSymbolUniqueId
. Strictly speaking the name would be NextSymbolOriginalRawIndex
. Dropped Raw
. Maybe I should've dropped Original
.
Supported |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, the implementation looks mostly straightforward at this point now. A couple issues/questions remain though, plus @jh7370's comments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM from me, but leaving it up to @jh7370 to approve once he's ok with the details he has commented on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One more minor round of comments. Please note that I am not going to be at work much of next week and will likely not have time to review further. I am happy for @mstorsjo to approve in my absence once my comments have been addressed.
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section-1.test
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section-1.test
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section-1.test
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section-1.test
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test
Outdated
Show resolved
Hide resolved
I realized that I've lost checks for checksums. Returned them back. |
After deleting debug sections, symbol indices are shifted but WinCFGuard sections encode these indices into section data that is completely ignored. Update symbol indices as well.