[lld-macho] Implement LC_UUID
Apple devtools use this to locate the dSYM files for a given binary. The UUID is computed based on an MD5 hash of the binary's contents. In order to hash the contents, we must first write them, but LC_UUID itself must be part of the written contents in order for all the offsets to be calculated correctly. We resolve this circular paradox by first writing an LC_UUID with an all-zero UUID, then updating the UUID with its real value later. I'm not sure there's a good way to test that the value of the UUID is "as expected", so I've just checked that it's present. Reviewed By: #lld-macho, compnerd, smeenai Differential Revision: https://reviews.llvm.org/D89418
This commit is contained in:
parent
2e8e1bdb89
commit
b86908171e
|
|
@ -24,6 +24,7 @@
|
|||
#include "llvm/BinaryFormat/MachO.h"
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
#include "llvm/Support/LEB128.h"
|
||||
#include "llvm/Support/MD5.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
|
||||
|
|
@ -35,9 +36,7 @@ using namespace lld;
|
|||
using namespace lld::macho;
|
||||
|
||||
namespace {
|
||||
class LCLinkEdit;
|
||||
class LCDyldInfo;
|
||||
class LCSymtab;
|
||||
class LCUuid;
|
||||
|
||||
class Writer {
|
||||
public:
|
||||
|
|
@ -51,6 +50,7 @@ public:
|
|||
|
||||
void openFile();
|
||||
void writeSections();
|
||||
void writeUuid();
|
||||
|
||||
void run();
|
||||
|
||||
|
|
@ -62,6 +62,7 @@ public:
|
|||
SymtabSection *symtabSection = nullptr;
|
||||
IndirectSymtabSection *indirectSymtabSection = nullptr;
|
||||
UnwindInfoSection *unwindInfoSection = nullptr;
|
||||
LCUuid *uuidCommand = nullptr;
|
||||
};
|
||||
|
||||
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
|
||||
|
|
@ -341,6 +342,30 @@ public:
|
|||
const PlatformInfo &platform;
|
||||
};
|
||||
|
||||
// Stores a unique identifier for the output file based on an MD5 hash of its
|
||||
// contents. In order to hash the contents, we must first write them, but
|
||||
// LC_UUID itself must be part of the written contents in order for all the
|
||||
// offsets to be calculated correctly. We resolve this circular paradox by
|
||||
// first writing an LC_UUID with an all-zero UUID, then updating the UUID with
|
||||
// its real value later.
|
||||
class LCUuid : public LoadCommand {
|
||||
public:
|
||||
uint32_t getSize() const override { return sizeof(uuid_command); }
|
||||
|
||||
void writeTo(uint8_t *buf) const override {
|
||||
auto *c = reinterpret_cast<uuid_command *>(buf);
|
||||
c->cmd = LC_UUID;
|
||||
c->cmdsize = getSize();
|
||||
uuidBuf = c->uuid;
|
||||
}
|
||||
|
||||
void writeUuid(const std::array<uint8_t, 16> &uuid) const {
|
||||
memcpy(uuidBuf, uuid.data(), uuid.size());
|
||||
}
|
||||
|
||||
mutable uint8_t *uuidBuf;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void Writer::scanRelocations() {
|
||||
|
|
@ -391,6 +416,9 @@ void Writer::createLoadCommands() {
|
|||
|
||||
in.header->addLoadCommand(make<LCBuildVersion>(config->platform));
|
||||
|
||||
uuidCommand = make<LCUuid>();
|
||||
in.header->addLoadCommand(uuidCommand);
|
||||
|
||||
uint8_t segIndex = 0;
|
||||
for (OutputSegment *seg : outputSegments) {
|
||||
in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
|
||||
|
|
@ -618,6 +646,21 @@ void Writer::writeSections() {
|
|||
osec->writeTo(buf + osec->fileOff);
|
||||
}
|
||||
|
||||
void Writer::writeUuid() {
|
||||
MD5 hash;
|
||||
const auto *bufStart = reinterpret_cast<char *>(buffer->getBufferStart());
|
||||
const auto *bufEnd = reinterpret_cast<char *>(buffer->getBufferEnd());
|
||||
hash.update(StringRef(bufStart, bufEnd - bufStart));
|
||||
MD5::MD5Result result;
|
||||
hash.final(result);
|
||||
// Conform to UUID version 4 & 5 as specified in RFC 4122:
|
||||
// 1. Set the version field to indicate that this is an MD5-based UUID.
|
||||
result.Bytes[6] = (result.Bytes[6] & 0xf) | 0x30;
|
||||
// 2. Set the two MSBs of uuid_t::clock_seq_hi_and_reserved to zero and one.
|
||||
result.Bytes[8] = (result.Bytes[8] & 0x3f) | 0x80;
|
||||
uuidCommand->writeUuid(result.Bytes);
|
||||
}
|
||||
|
||||
void Writer::run() {
|
||||
// dyld requires __LINKEDIT segment to always exist (even if empty).
|
||||
OutputSegment *linkEditSegment =
|
||||
|
|
@ -668,6 +711,7 @@ void Writer::run() {
|
|||
return;
|
||||
|
||||
writeSections();
|
||||
writeUuid();
|
||||
|
||||
if (auto e = buffer->commit())
|
||||
error("failed to write to the output file: " + toString(std::move(e)));
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@
|
|||
# RUN: %lld -o %t %t.o
|
||||
# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PADx
|
||||
#
|
||||
# PADx: magic {{.+}} ncmds sizeofcmds flags
|
||||
# PADx-NEXT: MH_MAGIC_64 {{.+}} 9 [[#%u, CMDSIZE:]] {{.*}}
|
||||
# PADx: magic {{.+}} ncmds sizeofcmds flags
|
||||
# PADx-NEXT: MH_MAGIC_64 {{.+}} [[#]] [[#%u, CMDSIZE:]] {{.*}}
|
||||
# PADx: sectname __text
|
||||
# PADx-NEXT: segname __TEXT
|
||||
# PADx-NEXT: addr
|
||||
|
|
@ -27,8 +27,8 @@
|
|||
# RUN: %lld -o %t %t.o -headerpad 0 -headerpad_max_install_names
|
||||
# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD0
|
||||
#
|
||||
# PAD0: magic {{.+}} ncmds sizeofcmds flags
|
||||
# PAD0-NEXT: MH_MAGIC_64 {{.+}} 9 [[#%u, CMDSIZE:]] {{.*}}
|
||||
# PAD0: magic {{.+}} ncmds sizeofcmds flags
|
||||
# PAD0-NEXT: MH_MAGIC_64 {{.+}} [[#]] [[#%u, CMDSIZE:]] {{.*}}
|
||||
# PAD0: sectname __text
|
||||
# PAD0-NEXT: segname __TEXT
|
||||
# PAD0-NEXT: addr
|
||||
|
|
@ -43,8 +43,8 @@
|
|||
# RUN: %lld -o %t %t.o -headerpad 0X11 -headerpad_max_install_names
|
||||
# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD11
|
||||
#
|
||||
# PAD11: magic {{.+}} ncmds sizeofcmds flags
|
||||
# PAD11-NEXT: MH_MAGIC_64 {{.+}} 9 [[#%u, CMDSIZE:]] {{.*}}
|
||||
# PAD11: magic {{.+}} ncmds sizeofcmds flags
|
||||
# PAD11-NEXT: MH_MAGIC_64 {{.+}} [[#]] [[#%u, CMDSIZE:]] {{.*}}
|
||||
# PAD11: sectname __text
|
||||
# PAD11-NEXT: segname __TEXT
|
||||
# PAD11-NEXT: addr
|
||||
|
|
@ -70,7 +70,7 @@
|
|||
# PADMAX-NEXT: segname __TEXT
|
||||
# PADMAX-NEXT: addr
|
||||
# PADMAX-NEXT: size
|
||||
# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 6)]]
|
||||
# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 7)]]
|
||||
|
||||
################ All 3 kinds of LCDylib swamped by a larger override
|
||||
# RUN: %lld -o %T/libnull.dylib %T/null.o -dylib \
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
# COMMON-DAG: cmd LC_DYLD_INFO_ONLY
|
||||
# COMMON-DAG: cmd LC_SYMTAB
|
||||
# COMMON-DAG: cmd LC_DYSYMTAB
|
||||
# COMMON-DAG: cmd LC_UUID
|
||||
|
||||
## Check for the presence of load commands that are essential for a working
|
||||
## executable. Also check that it has the right filetype.
|
||||
|
|
|
|||
|
|
@ -12,12 +12,12 @@
|
|||
## address offset and the contents at that address very similarly, so am using
|
||||
## --match-full-lines to make sure we match on the right thing.
|
||||
# CHECK: Contents of section __TEXT,__cstring:
|
||||
# CHECK-NEXT: 10000040c {{.*}}
|
||||
# CHECK-NEXT: 100000424 {{.*}}
|
||||
|
||||
## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
|
||||
## start of __cstring
|
||||
# CHECK: Contents of section __DATA_CONST,__got:
|
||||
# CHECK-NEXT: [[#%X,ADDR:]] 1a040000 01000000 0c040000 01000000 {{.*}}
|
||||
# CHECK-NEXT: [[#%X,ADDR:]] 32040000 01000000 24040000 01000000 {{.*}}
|
||||
# CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
|
||||
|
||||
## Check that the rebase table is empty.
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
# RUN: llvm-objdump --section=__const --full-contents %t | FileCheck %s --check-prefix=NONPCREL
|
||||
# NONPCREL: Contents of section __DATA,__const:
|
||||
# NONPCREL-NEXT: 100001000 f0030000 01000000 f0030000 01000000
|
||||
# NONPCREL-NEXT: 100001000 08040000 01000000 08040000 01000000
|
||||
|
||||
.section __TEXT,__text
|
||||
.globl _main, _f
|
||||
|
|
|
|||
Loading…
Reference in New Issue