375 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			375 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "MachOReader.h"
 | |
| #include "Object.h"
 | |
| #include "llvm/BinaryFormat/MachO.h"
 | |
| #include "llvm/Object/MachO.h"
 | |
| #include "llvm/Support/Errc.h"
 | |
| #include <memory>
 | |
| 
 | |
| using namespace llvm;
 | |
| using namespace llvm::objcopy;
 | |
| using namespace llvm::objcopy::macho;
 | |
| 
 | |
| void MachOReader::readHeader(Object &O) const {
 | |
|   O.Header.Magic = MachOObj.getHeader().magic;
 | |
|   O.Header.CPUType = MachOObj.getHeader().cputype;
 | |
|   O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
 | |
|   O.Header.FileType = MachOObj.getHeader().filetype;
 | |
|   O.Header.NCmds = MachOObj.getHeader().ncmds;
 | |
|   O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
 | |
|   O.Header.Flags = MachOObj.getHeader().flags;
 | |
| }
 | |
| 
 | |
| template <typename SectionType>
 | |
| static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
 | |
|   StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
 | |
|   StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
 | |
|   Section S(SegName, SectName);
 | |
|   S.Index = Index;
 | |
|   S.Addr = Sec.addr;
 | |
|   S.Size = Sec.size;
 | |
|   S.OriginalOffset = Sec.offset;
 | |
|   S.Align = Sec.align;
 | |
|   S.RelOff = Sec.reloff;
 | |
|   S.NReloc = Sec.nreloc;
 | |
|   S.Flags = Sec.flags;
 | |
|   S.Reserved1 = Sec.reserved1;
 | |
|   S.Reserved2 = Sec.reserved2;
 | |
|   S.Reserved3 = 0;
 | |
|   return S;
 | |
| }
 | |
| 
 | |
| Section constructSection(const MachO::section &Sec, uint32_t Index) {
 | |
|   return constructSectionCommon(Sec, Index);
 | |
| }
 | |
| 
 | |
| Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
 | |
|   Section S = constructSectionCommon(Sec, Index);
 | |
|   S.Reserved3 = Sec.reserved3;
 | |
|   return S;
 | |
| }
 | |
| 
 | |
| template <typename SectionType, typename SegmentType>
 | |
| Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
 | |
|     const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
 | |
|     const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
 | |
|   std::vector<std::unique_ptr<Section>> Sections;
 | |
|   for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
 | |
|                                                          sizeof(SegmentType)),
 | |
|             End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
 | |
|                                                         LoadCmd.C.cmdsize);
 | |
|        Curr < End; ++Curr) {
 | |
|     SectionType Sec;
 | |
|     memcpy((void *)&Sec, Curr, sizeof(SectionType));
 | |
| 
 | |
|     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
 | |
|       MachO::swapStruct(Sec);
 | |
| 
 | |
|     Sections.push_back(
 | |
|         std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
 | |
| 
 | |
|     Section &S = *Sections.back();
 | |
| 
 | |
|     Expected<object::SectionRef> SecRef =
 | |
|         MachOObj.getSection(NextSectionIndex++);
 | |
|     if (!SecRef)
 | |
|       return SecRef.takeError();
 | |
| 
 | |
|     Expected<ArrayRef<uint8_t>> Data =
 | |
|         MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
 | |
|     if (!Data)
 | |
|       return Data.takeError();
 | |
| 
 | |
|     S.Content =
 | |
|         StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
 | |
| 
 | |
|     const uint32_t CPUType = MachOObj.getHeader().cputype;
 | |
|     S.Relocations.reserve(S.NReloc);
 | |
|     for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
 | |
|               RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
 | |
|          RI != RE; ++RI) {
 | |
|       RelocationInfo R;
 | |
|       R.Symbol = nullptr; // We'll fill this field later.
 | |
|       R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
 | |
|       R.Scattered = MachOObj.isRelocationScattered(R.Info);
 | |
|       unsigned Type = MachOObj.getAnyRelocationType(R.Info);
 | |
|       // TODO Support CPU_TYPE_ARM.
 | |
|       R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
 | |
|                                     Type == MachO::ARM64_RELOC_ADDEND);
 | |
|       R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
 | |
|       S.Relocations.push_back(R);
 | |
|     }
 | |
| 
 | |
|     assert(S.NReloc == S.Relocations.size() &&
 | |
|            "Incorrect number of relocations");
 | |
|   }
 | |
|   return std::move(Sections);
 | |
| }
 | |
| 
 | |
| Error MachOReader::readLoadCommands(Object &O) const {
 | |
|   // For MachO sections indices start from 1.
 | |
|   uint32_t NextSectionIndex = 1;
 | |
|   static constexpr char TextSegmentName[] = "__TEXT";
 | |
|   for (auto LoadCmd : MachOObj.load_commands()) {
 | |
|     LoadCommand LC;
 | |
|     switch (LoadCmd.C.cmd) {
 | |
|     case MachO::LC_CODE_SIGNATURE:
 | |
|       O.CodeSignatureCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_SEGMENT:
 | |
|       // LoadCmd.Ptr might not be aligned temporarily as
 | |
|       // MachO::segment_command requires, but the segname char pointer do not
 | |
|       // have alignment restrictions.
 | |
|       if (StringRef(reinterpret_cast<const char *>(
 | |
|               LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
 | |
|           TextSegmentName)
 | |
|         O.TextSegmentCommandIndex = O.LoadCommands.size();
 | |
| 
 | |
|       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
 | |
|               extractSections<MachO::section, MachO::segment_command>(
 | |
|                   LoadCmd, MachOObj, NextSectionIndex))
 | |
|         LC.Sections = std::move(*Sections);
 | |
|       else
 | |
|         return Sections.takeError();
 | |
|       break;
 | |
|     case MachO::LC_SEGMENT_64:
 | |
|       // LoadCmd.Ptr might not be aligned temporarily as
 | |
|       // MachO::segment_command_64 requires, but the segname char pointer do
 | |
|       // not have alignment restrictions.
 | |
|       if (StringRef(reinterpret_cast<const char *>(
 | |
|               LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
 | |
|           TextSegmentName)
 | |
|         O.TextSegmentCommandIndex = O.LoadCommands.size();
 | |
| 
 | |
|       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
 | |
|               extractSections<MachO::section_64, MachO::segment_command_64>(
 | |
|                   LoadCmd, MachOObj, NextSectionIndex))
 | |
|         LC.Sections = std::move(*Sections);
 | |
|       else
 | |
|         return Sections.takeError();
 | |
|       break;
 | |
|     case MachO::LC_SYMTAB:
 | |
|       O.SymTabCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_DYSYMTAB:
 | |
|       O.DySymTabCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_DYLD_INFO:
 | |
|     case MachO::LC_DYLD_INFO_ONLY:
 | |
|       O.DyLdInfoCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_DATA_IN_CODE:
 | |
|       O.DataInCodeCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_LINKER_OPTIMIZATION_HINT:
 | |
|       O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_FUNCTION_STARTS:
 | |
|       O.FunctionStartsCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_DYLD_EXPORTS_TRIE:
 | |
|       O.ExportsTrieCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     case MachO::LC_DYLD_CHAINED_FIXUPS:
 | |
|       O.ChainedFixupsCommandIndex = O.LoadCommands.size();
 | |
|       break;
 | |
|     }
 | |
| #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
 | |
|   case MachO::LCName:                                                          \
 | |
|     memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
 | |
|            sizeof(MachO::LCStruct));                                           \
 | |
|     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
 | |
|       MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
 | |
|     if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
 | |
|       LC.Payload = ArrayRef<uint8_t>(                                          \
 | |
|           reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
 | |
|               sizeof(MachO::LCStruct),                                         \
 | |
|           LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
 | |
|     break;
 | |
| 
 | |
|     switch (LoadCmd.C.cmd) {
 | |
|     default:
 | |
|       memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
 | |
|              sizeof(MachO::load_command));
 | |
|       if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
 | |
|         MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
 | |
|       if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
 | |
|         LC.Payload = ArrayRef<uint8_t>(
 | |
|             reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
 | |
|                 sizeof(MachO::load_command),
 | |
|             LoadCmd.C.cmdsize - sizeof(MachO::load_command));
 | |
|       break;
 | |
| #include "llvm/BinaryFormat/MachO.def"
 | |
|     }
 | |
|     O.LoadCommands.push_back(std::move(LC));
 | |
|   }
 | |
|   return Error::success();
 | |
| }
 | |
| 
 | |
| template <typename nlist_t>
 | |
| SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
 | |
|   assert(nlist.n_strx < StrTable.size() &&
 | |
|          "n_strx exceeds the size of the string table");
 | |
|   SymbolEntry SE;
 | |
|   SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
 | |
|   SE.n_type = nlist.n_type;
 | |
|   SE.n_sect = nlist.n_sect;
 | |
|   SE.n_desc = nlist.n_desc;
 | |
|   SE.n_value = nlist.n_value;
 | |
|   return SE;
 | |
| }
 | |
| 
 | |
| void MachOReader::readSymbolTable(Object &O) const {
 | |
|   StringRef StrTable = MachOObj.getStringTableData();
 | |
|   for (auto Symbol : MachOObj.symbols()) {
 | |
|     SymbolEntry SE =
 | |
|         (MachOObj.is64Bit()
 | |
|              ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
 | |
|                                                   Symbol.getRawDataRefImpl()))
 | |
|              : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
 | |
|                                                   Symbol.getRawDataRefImpl())));
 | |
| 
 | |
|     O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
 | |
|   }
 | |
| }
 | |
| 
 | |
| void MachOReader::setSymbolInRelocationInfo(Object &O) const {
 | |
|   std::vector<const Section *> Sections;
 | |
|   for (auto &LC : O.LoadCommands)
 | |
|     for (std::unique_ptr<Section> &Sec : LC.Sections)
 | |
|       Sections.push_back(Sec.get());
 | |
| 
 | |
|   for (LoadCommand &LC : O.LoadCommands)
 | |
|     for (std::unique_ptr<Section> &Sec : LC.Sections)
 | |
|       for (auto &Reloc : Sec->Relocations)
 | |
|         if (!Reloc.Scattered && !Reloc.IsAddend) {
 | |
|           const uint32_t SymbolNum =
 | |
|               Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
 | |
|           if (Reloc.Extern) {
 | |
|             Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
 | |
|           } else {
 | |
|             // FIXME: Refactor error handling in MachOReader and report an error
 | |
|             // if we encounter an invalid relocation.
 | |
|             assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
 | |
|                    "Invalid section index.");
 | |
|             Reloc.Sec = Sections[SymbolNum - 1];
 | |
|           }
 | |
|         }
 | |
| }
 | |
| 
 | |
| void MachOReader::readRebaseInfo(Object &O) const {
 | |
|   O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
 | |
| }
 | |
| 
 | |
| void MachOReader::readBindInfo(Object &O) const {
 | |
|   O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
 | |
| }
 | |
| 
 | |
| void MachOReader::readWeakBindInfo(Object &O) const {
 | |
|   O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
 | |
| }
 | |
| 
 | |
| void MachOReader::readLazyBindInfo(Object &O) const {
 | |
|   O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
 | |
| }
 | |
| 
 | |
| void MachOReader::readExportInfo(Object &O) const {
 | |
|   O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
 | |
| }
 | |
| 
 | |
| void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex,
 | |
|                                LinkData &LD) const {
 | |
|   if (!LCIndex)
 | |
|     return;
 | |
|   const MachO::linkedit_data_command &LC =
 | |
|       O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
 | |
|   LD.Data =
 | |
|       arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
 | |
| }
 | |
| 
 | |
| void MachOReader::readDataInCodeData(Object &O) const {
 | |
|   return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
 | |
| }
 | |
| 
 | |
| void MachOReader::readLinkerOptimizationHint(Object &O) const {
 | |
|   return readLinkData(O, O.LinkerOptimizationHintCommandIndex,
 | |
|                       O.LinkerOptimizationHint);
 | |
| }
 | |
| 
 | |
| void MachOReader::readFunctionStartsData(Object &O) const {
 | |
|   return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
 | |
| }
 | |
| 
 | |
| void MachOReader::readExportsTrie(Object &O) const {
 | |
|   return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
 | |
| }
 | |
| 
 | |
| void MachOReader::readChainedFixups(Object &O) const {
 | |
|   return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
 | |
| }
 | |
| 
 | |
| void MachOReader::readIndirectSymbolTable(Object &O) const {
 | |
|   MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
 | |
|   constexpr uint32_t AbsOrLocalMask =
 | |
|       MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
 | |
|   for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
 | |
|     uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
 | |
|     if ((Index & AbsOrLocalMask) != 0)
 | |
|       O.IndirectSymTable.Symbols.emplace_back(Index, None);
 | |
|     else
 | |
|       O.IndirectSymTable.Symbols.emplace_back(
 | |
|           Index, O.SymTable.getSymbolByIndex(Index));
 | |
|   }
 | |
| }
 | |
| 
 | |
| void MachOReader::readSwiftVersion(Object &O) const {
 | |
|   struct ObjCImageInfo {
 | |
|     uint32_t Version;
 | |
|     uint32_t Flags;
 | |
|   } ImageInfo;
 | |
| 
 | |
|   for (const LoadCommand &LC : O.LoadCommands)
 | |
|     for (const std::unique_ptr<Section> &Sec : LC.Sections)
 | |
|       if (Sec->Sectname == "__objc_imageinfo" &&
 | |
|           (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
 | |
|            Sec->Segname == "__DATA_DIRTY") &&
 | |
|           Sec->Content.size() >= sizeof(ObjCImageInfo)) {
 | |
|         memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
 | |
|         if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
 | |
|           sys::swapByteOrder(ImageInfo.Version);
 | |
|           sys::swapByteOrder(ImageInfo.Flags);
 | |
|         }
 | |
|         O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
 | |
|         return;
 | |
|       }
 | |
| }
 | |
| 
 | |
| Expected<std::unique_ptr<Object>> MachOReader::create() const {
 | |
|   auto Obj = std::make_unique<Object>();
 | |
|   readHeader(*Obj);
 | |
|   if (Error E = readLoadCommands(*Obj))
 | |
|     return std::move(E);
 | |
|   readSymbolTable(*Obj);
 | |
|   setSymbolInRelocationInfo(*Obj);
 | |
|   readRebaseInfo(*Obj);
 | |
|   readBindInfo(*Obj);
 | |
|   readWeakBindInfo(*Obj);
 | |
|   readLazyBindInfo(*Obj);
 | |
|   readExportInfo(*Obj);
 | |
|   readDataInCodeData(*Obj);
 | |
|   readLinkerOptimizationHint(*Obj);
 | |
|   readFunctionStartsData(*Obj);
 | |
|   readExportsTrie(*Obj);
 | |
|   readChainedFixups(*Obj);
 | |
|   readIndirectSymbolTable(*Obj);
 | |
|   readSwiftVersion(*Obj);
 | |
|   return std::move(Obj);
 | |
| }
 |