ELF2: Add basic linker script support.

This linker script parser and evaluator is powerful enough to read
Linux's libc.so, which is (despite its name) a linker script that
contains OUTPUT_FORMAT, GROUP and AS_NEEDED directives.

The parser implemented in this patch is a recursive-descendent one.
It does *not* construct an AST but consumes directives in place and
sets the results to Symtab object, like what Driver is doing.
This should be very fast since less objects are allocated, and
this is also more readable.

http://reviews.llvm.org/D13232

llvm-svn: 248918
This commit is contained in:
Rui Ueyama 2015-09-30 17:06:09 +00:00
parent 848c1aa452
commit f5c4aca98f
7 changed files with 193 additions and 44 deletions

View File

@ -25,39 +25,29 @@ using namespace lld::elf2;
namespace lld {
namespace elf2 {
Configuration *Config;
std::vector<std::unique_ptr<MemoryBuffer>> *MemoryBufferPool;
void link(ArrayRef<const char *> Args) {
Configuration C;
Config = &C;
std::vector<std::unique_ptr<MemoryBuffer>> V;
MemoryBufferPool = &V;
LinkerDriver().link(Args.slice(1));
}
}
}
// Opens a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
MemoryBufferRef LinkerDriver::openFile(StringRef Path) {
MemoryBufferRef openFile(StringRef Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(Path);
error(MBOrErr, Twine("cannot open ") + Path);
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
MemoryBufferRef MBRef = MB->getMemBufferRef();
OwningMBs.push_back(std::move(MB)); // take ownership
MemoryBufferPool->push_back(std::move(MB)); // transfer ownership
return MBRef;
}
static std::unique_ptr<InputFile> createFile(MemoryBufferRef MB) {
using namespace llvm::sys::fs;
file_magic Magic = identify_magic(MB.getBuffer());
if (Magic == file_magic::archive)
return make_unique<ArchiveFile>(MB);
if (Magic == file_magic::elf_shared_object)
return createELFFile<SharedFile>(MB);
return createELFFile<ObjectFile>(MB);
}
}
// Makes a path by concatenating Dir and File.
@ -92,6 +82,12 @@ static std::string searchLibrary(StringRef Path) {
error(Twine("Unable to find library -l") + Path);
}
// Returns true if MB looks like a linker script.
static bool isLinkerScript(MemoryBufferRef MB) {
using namespace llvm::sys::fs;
return identify_magic(MB.getBuffer()) == file_magic::unknown;
}
void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Parse command line options.
opt::InputArgList Args = Parser.parse(ArgsArr);
@ -125,31 +121,25 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
Config->NoInhibitExec = Args.hasArg(OPT_noinhibit_exec);
Config->Shared = Args.hasArg(OPT_shared);
// Create a list of input files.
std::vector<MemoryBufferRef> Inputs;
for (auto *Arg : Args.filtered(OPT_l, OPT_INPUT)) {
StringRef Path = Arg->getValue();
if (Arg->getOption().getID() == OPT_l) {
Inputs.push_back(openFile(searchLibrary(Path)));
continue;
}
Inputs.push_back(openFile(Path));
}
if (Inputs.empty())
error("no input files.");
// Create a symbol table.
SymbolTable Symtab;
// Parse all input files and put all symbols to the symbol table.
// The symbol table will take care of name resolution.
for (MemoryBufferRef MB : Inputs) {
std::unique_ptr<InputFile> File = createFile(MB);
Symtab.addFile(std::move(File));
for (auto *Arg : Args.filtered(OPT_l, OPT_INPUT)) {
std::string Path = Arg->getValue();
if (Arg->getOption().getID() == OPT_l)
Path = searchLibrary(Path);
MemoryBufferRef MB = openFile(Path);
if (isLinkerScript(MB)) {
// readLinkerScript may add files to the symbol table.
readLinkerScript(&Symtab, MB);
continue;
}
Symtab.addFile(createFile(MB));
}
if (Symtab.getObjectFiles().empty())
error("no input files.");
// Write the result.
const ELFFileBase *FirstObj = Symtab.getFirstELF();
switch (FirstObj->getELFKind()) {

View File

@ -11,11 +11,18 @@
#define LLD_ELF_DRIVER_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Option/ArgList.h"
namespace lld {
namespace elf2 {
class SymbolTable;
// The owner of all opened files.
extern std::vector<std::unique_ptr<MemoryBuffer>> *MemoryBufferPool;
MemoryBufferRef openFile(StringRef Path);
// Entry point of the ELF linker.
void link(ArrayRef<const char *> Args);
@ -34,13 +41,6 @@ public:
private:
ArgParser Parser;
// Opens a file. Path has to be resolved already.
MemoryBufferRef openFile(StringRef Path);
// Driver is the owner of all opened files.
// InputFiles have MemoryBufferRefs to them.
std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
};
// Create enum with OPT_xxx values for each option in Options.td
@ -51,6 +51,9 @@ enum {
#undef OPTION
};
// Parses a linker script. Calling this function updates the Symtab and Config.
void readLinkerScript(SymbolTable *Symtab, MemoryBufferRef MB);
} // namespace elf2
} // namespace lld

View File

@ -13,10 +13,14 @@
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "SymbolTable.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
@ -75,3 +79,124 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> Argv) {
return Args;
}
// Parser and evaluator of the linker script.
// Results are directly written to the Config object.
namespace {
class LinkerScript {
public:
LinkerScript(SymbolTable *T, StringRef S) : Symtab(T), Tokens(tokenize(S)) {}
void run();
private:
static std::vector<StringRef> tokenize(StringRef S);
static StringRef skipSpace(StringRef S);
StringRef next();
bool atEOF() { return Tokens.size() == Pos; }
void expect(StringRef Expect);
void readAsNeeded();
void readGroup();
void readOutputFormat();
SymbolTable *Symtab;
std::vector<StringRef> Tokens;
size_t Pos = 0;
};
}
void LinkerScript::run() {
while (!atEOF()) {
StringRef Tok = next();
if (Tok == "GROUP") {
readGroup();
} else if (Tok == "OUTPUT_FORMAT") {
readOutputFormat();
} else {
error("unknown directive: " + Tok);
}
}
}
// Split S into linker script tokens.
std::vector<StringRef> LinkerScript::tokenize(StringRef S) {
std::vector<StringRef> Ret;
for (;;) {
S = skipSpace(S);
if (S.empty())
return Ret;
size_t Pos = S.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-:");
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
if (Pos == 0)
Pos = 1;
Ret.push_back(S.substr(0, Pos));
S = S.substr(Pos);
}
}
// Skip leading whitespace characters or /**/-style comments.
StringRef LinkerScript::skipSpace(StringRef S) {
for (;;) {
if (S.startswith("/*")) {
size_t E = S.find("*/", 2);
if (E == StringRef::npos)
error("unclosed comment in a linker script");
S = S.substr(E + 2);
continue;
}
size_t Size = S.size();
S = S.ltrim();
if (S.size() == Size)
return S;
}
}
StringRef LinkerScript::next() {
if (Pos == Tokens.size())
error("unexpected EOF");
return Tokens[Pos++];
}
void LinkerScript::expect(StringRef Expect) {
StringRef Tok = next();
if (Tok != Expect)
error(Expect + " expected, but got " + Tok);
}
void LinkerScript::readAsNeeded() {
expect("(");
for (;;) {
StringRef Tok = next();
if (Tok == ")")
return;
Symtab->addFile(createFile(openFile(Tok)));
}
}
void LinkerScript::readGroup() {
expect("(");
for (;;) {
StringRef Tok = next();
if (Tok == ")")
return;
if (Tok == "AS_NEEDED") {
readAsNeeded();
continue;
}
Symtab->addFile(createFile(openFile(Tok)));
}
}
void LinkerScript::readOutputFormat() {
// Error checking only for now.
expect("(");
next();
expect(")");
}
void lld::elf2::readLinkerScript(SymbolTable *Symtab, MemoryBufferRef MB) {
LinkerScript(Symtab, MB.getBuffer()).run();
}

View File

@ -16,10 +16,20 @@
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys::fs;
using namespace lld;
using namespace lld::elf2;
std::unique_ptr<InputFile> lld::elf2::createFile(MemoryBufferRef MB) {
file_magic Magic = identify_magic(MB.getBuffer());
if (Magic == file_magic::archive)
return make_unique<ArchiveFile>(MB);
if (Magic == file_magic::elf_shared_object)
return createELFFile<SharedFile>(MB);
return createELFFile<ObjectFile>(MB);
}
template <class ELFT> static uint16_t getEMachine(const ELFFileBase &B) {
bool IsShared = isa<SharedFileBase>(B);
if (IsShared)

View File

@ -25,9 +25,12 @@ namespace elf2 {
using llvm::object::Archive;
class InputFile;
class Lazy;
class SymbolBody;
std::unique_ptr<InputFile> createFile(MemoryBufferRef MB);
// The root class of input files.
class InputFile {
public:

View File

@ -177,11 +177,24 @@ _start:
# CHECK-NEXT: }
# CHECK-NEXT: ]
# Test for the response file
# RUN: echo " -o %t2" > %t.responsefile
# RUN: lld -flavor gnu2 %t @%t.responsefile
# RUN: llvm-readobj -file-headers -sections -program-headers -symbols %t2 \
# RUN: | FileCheck %s
# Test for the linker script
# RUN: echo "GROUP(" %t ")" > %t.script
# RUN: lld -flavor gnu2 -o %t2 %t.script
# RUN: llvm-readobj -file-headers -sections -program-headers -symbols %t2 \
# RUN: | FileCheck %s
# RUN: echo "OUTPUT_FORMAT(elf64-x86-64) /*/*/ GROUP(" %t ")" > %t.script
# RUN: lld -flavor gnu2 -o %t2 %t.script
# RUN: llvm-readobj -file-headers -sections -program-headers -symbols %t2 \
# RUN: | FileCheck %s
# RUN: not lld -flavor gnu2 %t.foo -o %t2 2>&1 | \
# RUN: FileCheck --check-prefix=MISSING %s
# MISSING: cannot open {{.*}}.foo: {{[Nn]}}o such file or directory

View File

@ -0,0 +1,5 @@
# RUN: echo "FOO(BAR)" > %t.script
# RUN: not lld -flavor gnu2 -o foo %t.script > %t.log 2>&1
# RUN: FileCheck -check-prefix=ERR1 %s < %t.log
ERR1: unknown directive: FOO