forked from OSchip/llvm-project
1657 lines
55 KiB
C++
1657 lines
55 KiB
C++
//===-- Disassembler.cpp --------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "lldb/Core/Disassembler.h"
|
|
|
|
#include "lldb/Core/AddressRange.h"
|
|
#include "lldb/Core/Debugger.h"
|
|
#include "lldb/Core/EmulateInstruction.h"
|
|
#include "lldb/Core/Mangled.h"
|
|
#include "lldb/Core/Module.h"
|
|
#include "lldb/Core/ModuleList.h"
|
|
#include "lldb/Core/PluginManager.h"
|
|
#include "lldb/Core/SourceManager.h"
|
|
#include "lldb/Host/FileSystem.h"
|
|
#include "lldb/Interpreter/OptionValue.h"
|
|
#include "lldb/Interpreter/OptionValueArray.h"
|
|
#include "lldb/Interpreter/OptionValueDictionary.h"
|
|
#include "lldb/Interpreter/OptionValueRegex.h"
|
|
#include "lldb/Interpreter/OptionValueString.h"
|
|
#include "lldb/Interpreter/OptionValueUInt64.h"
|
|
#include "lldb/Symbol/Function.h"
|
|
#include "lldb/Symbol/Symbol.h"
|
|
#include "lldb/Symbol/SymbolContext.h"
|
|
#include "lldb/Target/ExecutionContext.h"
|
|
#include "lldb/Target/SectionLoadList.h"
|
|
#include "lldb/Target/StackFrame.h"
|
|
#include "lldb/Target/Target.h"
|
|
#include "lldb/Target/Thread.h"
|
|
#include "lldb/Utility/DataBufferHeap.h"
|
|
#include "lldb/Utility/DataExtractor.h"
|
|
#include "lldb/Utility/RegularExpression.h"
|
|
#include "lldb/Utility/Status.h"
|
|
#include "lldb/Utility/Stream.h"
|
|
#include "lldb/Utility/StreamString.h"
|
|
#include "lldb/Utility/Timer.h"
|
|
#include "lldb/lldb-private-enumerations.h"
|
|
#include "lldb/lldb-private-interfaces.h"
|
|
#include "lldb/lldb-private-types.h"
|
|
#include "llvm/ADT/Triple.h"
|
|
#include "llvm/Support/Compiler.h"
|
|
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <utility>
|
|
|
|
#include <cassert>
|
|
|
|
#define DEFAULT_DISASM_BYTE_SIZE 32
|
|
|
|
using namespace lldb;
|
|
using namespace lldb_private;
|
|
|
|
DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
|
|
const char *flavor,
|
|
const char *plugin_name) {
|
|
LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
|
|
arch.GetArchitectureName(), plugin_name);
|
|
|
|
DisassemblerCreateInstance create_callback = nullptr;
|
|
|
|
if (plugin_name) {
|
|
create_callback =
|
|
PluginManager::GetDisassemblerCreateCallbackForPluginName(plugin_name);
|
|
if (create_callback) {
|
|
DisassemblerSP disassembler_sp(create_callback(arch, flavor));
|
|
|
|
if (disassembler_sp)
|
|
return disassembler_sp;
|
|
}
|
|
} else {
|
|
for (uint32_t idx = 0;
|
|
(create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
|
|
idx)) != nullptr;
|
|
++idx) {
|
|
DisassemblerSP disassembler_sp(create_callback(arch, flavor));
|
|
|
|
if (disassembler_sp)
|
|
return disassembler_sp;
|
|
}
|
|
}
|
|
return DisassemblerSP();
|
|
}
|
|
|
|
DisassemblerSP Disassembler::FindPluginForTarget(const Target &target,
|
|
const ArchSpec &arch,
|
|
const char *flavor,
|
|
const char *plugin_name) {
|
|
if (flavor == nullptr) {
|
|
// FIXME - we don't have the mechanism in place to do per-architecture
|
|
// settings. But since we know that for now we only support flavors on x86
|
|
// & x86_64,
|
|
if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
|
|
arch.GetTriple().getArch() == llvm::Triple::x86_64)
|
|
flavor = target.GetDisassemblyFlavor();
|
|
}
|
|
return FindPlugin(arch, flavor, plugin_name);
|
|
}
|
|
|
|
static Address ResolveAddress(Target &target, const Address &addr) {
|
|
if (!addr.IsSectionOffset()) {
|
|
Address resolved_addr;
|
|
// If we weren't passed in a section offset address range, try and resolve
|
|
// it to something
|
|
bool is_resolved = target.GetSectionLoadList().IsEmpty()
|
|
? target.GetImages().ResolveFileAddress(
|
|
addr.GetOffset(), resolved_addr)
|
|
: target.GetSectionLoadList().ResolveLoadAddress(
|
|
addr.GetOffset(), resolved_addr);
|
|
|
|
// We weren't able to resolve the address, just treat it as a raw address
|
|
if (is_resolved && resolved_addr.IsValid())
|
|
return resolved_addr;
|
|
}
|
|
return addr;
|
|
}
|
|
|
|
lldb::DisassemblerSP Disassembler::DisassembleRange(
|
|
const ArchSpec &arch, const char *plugin_name, const char *flavor,
|
|
Target &target, const AddressRange &range, bool force_live_memory) {
|
|
if (range.GetByteSize() <= 0)
|
|
return {};
|
|
|
|
if (!range.GetBaseAddress().IsValid())
|
|
return {};
|
|
|
|
lldb::DisassemblerSP disasm_sp =
|
|
Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name);
|
|
|
|
if (!disasm_sp)
|
|
return {};
|
|
|
|
const size_t bytes_disassembled = disasm_sp->ParseInstructions(
|
|
target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
|
|
nullptr, force_live_memory);
|
|
if (bytes_disassembled == 0)
|
|
return {};
|
|
|
|
return disasm_sp;
|
|
}
|
|
|
|
lldb::DisassemblerSP
|
|
Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
|
|
const char *flavor, const Address &start,
|
|
const void *src, size_t src_len,
|
|
uint32_t num_instructions, bool data_from_file) {
|
|
if (!src)
|
|
return {};
|
|
|
|
lldb::DisassemblerSP disasm_sp =
|
|
Disassembler::FindPlugin(arch, flavor, plugin_name);
|
|
|
|
if (!disasm_sp)
|
|
return {};
|
|
|
|
DataExtractor data(src, src_len, arch.GetByteOrder(),
|
|
arch.GetAddressByteSize());
|
|
|
|
(void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false,
|
|
data_from_file);
|
|
return disasm_sp;
|
|
}
|
|
|
|
bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
|
|
const char *plugin_name, const char *flavor,
|
|
const ExecutionContext &exe_ctx,
|
|
const Address &address, Limit limit,
|
|
bool mixed_source_and_assembly,
|
|
uint32_t num_mixed_context_lines,
|
|
uint32_t options, Stream &strm) {
|
|
if (!exe_ctx.GetTargetPtr())
|
|
return false;
|
|
|
|
lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
|
|
exe_ctx.GetTargetRef(), arch, flavor, plugin_name));
|
|
if (!disasm_sp)
|
|
return false;
|
|
|
|
const bool force_live_memory = true;
|
|
size_t bytes_disassembled = disasm_sp->ParseInstructions(
|
|
exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory);
|
|
if (bytes_disassembled == 0)
|
|
return false;
|
|
|
|
disasm_sp->PrintInstructions(debugger, arch, exe_ctx,
|
|
mixed_source_and_assembly,
|
|
num_mixed_context_lines, options, strm);
|
|
return true;
|
|
}
|
|
|
|
Disassembler::SourceLine
|
|
Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
|
|
if (!sc.function)
|
|
return {};
|
|
|
|
if (!sc.line_entry.IsValid())
|
|
return {};
|
|
|
|
LineEntry prologue_end_line = sc.line_entry;
|
|
FileSpec func_decl_file;
|
|
uint32_t func_decl_line;
|
|
sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
|
|
|
|
if (func_decl_file != prologue_end_line.file &&
|
|
func_decl_file != prologue_end_line.original_file)
|
|
return {};
|
|
|
|
SourceLine decl_line;
|
|
decl_line.file = func_decl_file;
|
|
decl_line.line = func_decl_line;
|
|
// TODO: Do we care about column on these entries? If so, we need to plumb
|
|
// that through GetStartLineSourceInfo.
|
|
decl_line.column = 0;
|
|
return decl_line;
|
|
}
|
|
|
|
void Disassembler::AddLineToSourceLineTables(
|
|
SourceLine &line,
|
|
std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
|
|
if (line.IsValid()) {
|
|
auto source_lines_seen_pos = source_lines_seen.find(line.file);
|
|
if (source_lines_seen_pos == source_lines_seen.end()) {
|
|
std::set<uint32_t> lines;
|
|
lines.insert(line.line);
|
|
source_lines_seen.emplace(line.file, lines);
|
|
} else {
|
|
source_lines_seen_pos->second.insert(line.line);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Disassembler::ElideMixedSourceAndDisassemblyLine(
|
|
const ExecutionContext &exe_ctx, const SymbolContext &sc,
|
|
SourceLine &line) {
|
|
|
|
// TODO: should we also check target.process.thread.step-avoid-libraries ?
|
|
|
|
const RegularExpression *avoid_regex = nullptr;
|
|
|
|
// Skip any line #0 entries - they are implementation details
|
|
if (line.line == 0)
|
|
return false;
|
|
|
|
ThreadSP thread_sp = exe_ctx.GetThreadSP();
|
|
if (thread_sp) {
|
|
avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
|
|
} else {
|
|
TargetSP target_sp = exe_ctx.GetTargetSP();
|
|
if (target_sp) {
|
|
Status error;
|
|
OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
|
|
&exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
|
|
if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
|
|
OptionValueRegex *re = value_sp->GetAsRegex();
|
|
if (re) {
|
|
avoid_regex = re->GetCurrentValue();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (avoid_regex && sc.symbol != nullptr) {
|
|
const char *function_name =
|
|
sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
|
|
.GetCString();
|
|
if (function_name && avoid_regex->Execute(function_name)) {
|
|
// skip this source line
|
|
return true;
|
|
}
|
|
}
|
|
// don't skip this source line
|
|
return false;
|
|
}
|
|
|
|
void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
|
|
const ExecutionContext &exe_ctx,
|
|
bool mixed_source_and_assembly,
|
|
uint32_t num_mixed_context_lines,
|
|
uint32_t options, Stream &strm) {
|
|
// We got some things disassembled...
|
|
size_t num_instructions_found = GetInstructionList().GetSize();
|
|
|
|
const uint32_t max_opcode_byte_size =
|
|
GetInstructionList().GetMaxOpcocdeByteSize();
|
|
SymbolContext sc;
|
|
SymbolContext prev_sc;
|
|
AddressRange current_source_line_range;
|
|
const Address *pc_addr_ptr = nullptr;
|
|
StackFrame *frame = exe_ctx.GetFramePtr();
|
|
|
|
TargetSP target_sp(exe_ctx.GetTargetSP());
|
|
SourceManager &source_manager =
|
|
target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
|
|
|
|
if (frame) {
|
|
pc_addr_ptr = &frame->GetFrameCodeAddress();
|
|
}
|
|
const uint32_t scope =
|
|
eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
|
|
const bool use_inline_block_range = false;
|
|
|
|
const FormatEntity::Entry *disassembly_format = nullptr;
|
|
FormatEntity::Entry format;
|
|
if (exe_ctx.HasTargetScope()) {
|
|
disassembly_format =
|
|
exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
|
|
} else {
|
|
FormatEntity::Parse("${addr}: ", format);
|
|
disassembly_format = &format;
|
|
}
|
|
|
|
// First pass: step through the list of instructions, find how long the
|
|
// initial addresses strings are, insert padding in the second pass so the
|
|
// opcodes all line up nicely.
|
|
|
|
// Also build up the source line mapping if this is mixed source & assembly
|
|
// mode. Calculate the source line for each assembly instruction (eliding
|
|
// inlined functions which the user wants to skip).
|
|
|
|
std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
|
|
Symbol *previous_symbol = nullptr;
|
|
|
|
size_t address_text_size = 0;
|
|
for (size_t i = 0; i < num_instructions_found; ++i) {
|
|
Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
|
|
if (inst) {
|
|
const Address &addr = inst->GetAddress();
|
|
ModuleSP module_sp(addr.GetModule());
|
|
if (module_sp) {
|
|
const SymbolContextItem resolve_mask = eSymbolContextFunction |
|
|
eSymbolContextSymbol |
|
|
eSymbolContextLineEntry;
|
|
uint32_t resolved_mask =
|
|
module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
|
|
if (resolved_mask) {
|
|
StreamString strmstr;
|
|
Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
|
|
&exe_ctx, &addr, strmstr);
|
|
size_t cur_line = strmstr.GetSizeOfLastLine();
|
|
if (cur_line > address_text_size)
|
|
address_text_size = cur_line;
|
|
|
|
// Add entries to our "source_lines_seen" map+set which list which
|
|
// sources lines occur in this disassembly session. We will print
|
|
// lines of context around a source line, but we don't want to print
|
|
// a source line that has a line table entry of its own - we'll leave
|
|
// that source line to be printed when it actually occurs in the
|
|
// disassembly.
|
|
|
|
if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
|
|
if (sc.symbol != previous_symbol) {
|
|
SourceLine decl_line = GetFunctionDeclLineEntry(sc);
|
|
if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
|
|
AddLineToSourceLineTables(decl_line, source_lines_seen);
|
|
}
|
|
if (sc.line_entry.IsValid()) {
|
|
SourceLine this_line;
|
|
this_line.file = sc.line_entry.file;
|
|
this_line.line = sc.line_entry.line;
|
|
this_line.column = sc.line_entry.column;
|
|
if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
|
|
AddLineToSourceLineTables(this_line, source_lines_seen);
|
|
}
|
|
}
|
|
}
|
|
sc.Clear(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
previous_symbol = nullptr;
|
|
SourceLine previous_line;
|
|
for (size_t i = 0; i < num_instructions_found; ++i) {
|
|
Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
|
|
|
|
if (inst) {
|
|
const Address &addr = inst->GetAddress();
|
|
const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
|
|
SourceLinesToDisplay source_lines_to_display;
|
|
|
|
prev_sc = sc;
|
|
|
|
ModuleSP module_sp(addr.GetModule());
|
|
if (module_sp) {
|
|
uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
|
|
addr, eSymbolContextEverything, sc);
|
|
if (resolved_mask) {
|
|
if (mixed_source_and_assembly) {
|
|
|
|
// If we've started a new function (non-inlined), print all of the
|
|
// source lines from the function declaration until the first line
|
|
// table entry - typically the opening curly brace of the function.
|
|
if (previous_symbol != sc.symbol) {
|
|
// The default disassembly format puts an extra blank line
|
|
// between functions - so when we're displaying the source
|
|
// context for a function, we don't want to add a blank line
|
|
// after the source context or we'll end up with two of them.
|
|
if (previous_symbol != nullptr)
|
|
source_lines_to_display.print_source_context_end_eol = false;
|
|
|
|
previous_symbol = sc.symbol;
|
|
if (sc.function && sc.line_entry.IsValid()) {
|
|
LineEntry prologue_end_line = sc.line_entry;
|
|
if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
|
|
prologue_end_line)) {
|
|
FileSpec func_decl_file;
|
|
uint32_t func_decl_line;
|
|
sc.function->GetStartLineSourceInfo(func_decl_file,
|
|
func_decl_line);
|
|
if (func_decl_file == prologue_end_line.file ||
|
|
func_decl_file == prologue_end_line.original_file) {
|
|
// Add all the lines between the function declaration and
|
|
// the first non-prologue source line to the list of lines
|
|
// to print.
|
|
for (uint32_t lineno = func_decl_line;
|
|
lineno <= prologue_end_line.line; lineno++) {
|
|
SourceLine this_line;
|
|
this_line.file = func_decl_file;
|
|
this_line.line = lineno;
|
|
source_lines_to_display.lines.push_back(this_line);
|
|
}
|
|
// Mark the last line as the "current" one. Usually this
|
|
// is the open curly brace.
|
|
if (source_lines_to_display.lines.size() > 0)
|
|
source_lines_to_display.current_source_line =
|
|
source_lines_to_display.lines.size() - 1;
|
|
}
|
|
}
|
|
}
|
|
sc.GetAddressRange(scope, 0, use_inline_block_range,
|
|
current_source_line_range);
|
|
}
|
|
|
|
// If we've left a previous source line's address range, print a
|
|
// new source line
|
|
if (!current_source_line_range.ContainsFileAddress(addr)) {
|
|
sc.GetAddressRange(scope, 0, use_inline_block_range,
|
|
current_source_line_range);
|
|
|
|
if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
|
|
SourceLine this_line;
|
|
this_line.file = sc.line_entry.file;
|
|
this_line.line = sc.line_entry.line;
|
|
|
|
if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
|
|
this_line)) {
|
|
// Only print this source line if it is different from the
|
|
// last source line we printed. There may have been inlined
|
|
// functions between these lines that we elided, resulting in
|
|
// the same line being printed twice in a row for a
|
|
// contiguous block of assembly instructions.
|
|
if (this_line != previous_line) {
|
|
|
|
std::vector<uint32_t> previous_lines;
|
|
for (uint32_t i = 0;
|
|
i < num_mixed_context_lines &&
|
|
(this_line.line - num_mixed_context_lines) > 0;
|
|
i++) {
|
|
uint32_t line =
|
|
this_line.line - num_mixed_context_lines + i;
|
|
auto pos = source_lines_seen.find(this_line.file);
|
|
if (pos != source_lines_seen.end()) {
|
|
if (pos->second.count(line) == 1) {
|
|
previous_lines.clear();
|
|
} else {
|
|
previous_lines.push_back(line);
|
|
}
|
|
}
|
|
}
|
|
for (size_t i = 0; i < previous_lines.size(); i++) {
|
|
SourceLine previous_line;
|
|
previous_line.file = this_line.file;
|
|
previous_line.line = previous_lines[i];
|
|
auto pos = source_lines_seen.find(previous_line.file);
|
|
if (pos != source_lines_seen.end()) {
|
|
pos->second.insert(previous_line.line);
|
|
}
|
|
source_lines_to_display.lines.push_back(previous_line);
|
|
}
|
|
|
|
source_lines_to_display.lines.push_back(this_line);
|
|
source_lines_to_display.current_source_line =
|
|
source_lines_to_display.lines.size() - 1;
|
|
|
|
for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
|
|
SourceLine next_line;
|
|
next_line.file = this_line.file;
|
|
next_line.line = this_line.line + i + 1;
|
|
auto pos = source_lines_seen.find(next_line.file);
|
|
if (pos != source_lines_seen.end()) {
|
|
if (pos->second.count(next_line.line) == 1)
|
|
break;
|
|
pos->second.insert(next_line.line);
|
|
}
|
|
source_lines_to_display.lines.push_back(next_line);
|
|
}
|
|
}
|
|
previous_line = this_line;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
sc.Clear(true);
|
|
}
|
|
}
|
|
|
|
if (source_lines_to_display.lines.size() > 0) {
|
|
strm.EOL();
|
|
for (size_t idx = 0; idx < source_lines_to_display.lines.size();
|
|
idx++) {
|
|
SourceLine ln = source_lines_to_display.lines[idx];
|
|
const char *line_highlight = "";
|
|
if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
|
|
line_highlight = "->";
|
|
} else if (idx == source_lines_to_display.current_source_line) {
|
|
line_highlight = "**";
|
|
}
|
|
source_manager.DisplaySourceLinesWithLineNumbers(
|
|
ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
|
|
}
|
|
if (source_lines_to_display.print_source_context_end_eol)
|
|
strm.EOL();
|
|
}
|
|
|
|
const bool show_bytes = (options & eOptionShowBytes) != 0;
|
|
const bool show_control_flow_kind =
|
|
(options & eOptionShowControlFlowKind) != 0;
|
|
inst->Dump(&strm, max_opcode_byte_size, true, show_bytes,
|
|
show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr,
|
|
address_text_size);
|
|
strm.EOL();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
|
|
StackFrame &frame, Stream &strm) {
|
|
AddressRange range;
|
|
SymbolContext sc(
|
|
frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
|
|
if (sc.function) {
|
|
range = sc.function->GetAddressRange();
|
|
} else if (sc.symbol && sc.symbol->ValueIsAddress()) {
|
|
range.GetBaseAddress() = sc.symbol->GetAddressRef();
|
|
range.SetByteSize(sc.symbol->GetByteSize());
|
|
} else {
|
|
range.GetBaseAddress() = frame.GetFrameCodeAddress();
|
|
}
|
|
|
|
if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
|
|
range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
|
|
|
|
Disassembler::Limit limit = {Disassembler::Limit::Bytes,
|
|
range.GetByteSize()};
|
|
if (limit.value == 0)
|
|
limit.value = DEFAULT_DISASM_BYTE_SIZE;
|
|
|
|
return Disassemble(debugger, arch, nullptr, nullptr, frame,
|
|
range.GetBaseAddress(), limit, false, 0, 0, strm);
|
|
}
|
|
|
|
Instruction::Instruction(const Address &address, AddressClass addr_class)
|
|
: m_address(address), m_address_class(addr_class), m_opcode(),
|
|
m_calculated_strings(false) {}
|
|
|
|
Instruction::~Instruction() = default;
|
|
|
|
namespace x86 {
|
|
|
|
/// These are the three values deciding instruction control flow kind.
|
|
/// InstructionLengthDecode function decodes an instruction and get this struct.
|
|
///
|
|
/// primary_opcode
|
|
/// Primary opcode of the instruction.
|
|
/// For one-byte opcode instruction, it's the first byte after prefix.
|
|
/// For two- and three-byte opcodes, it's the second byte.
|
|
///
|
|
/// opcode_len
|
|
/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
|
|
///
|
|
/// modrm
|
|
/// ModR/M byte of the instruction.
|
|
/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
|
|
/// may contain a register or specify an addressing mode, depending on MOD.
|
|
struct InstructionOpcodeAndModrm {
|
|
uint8_t primary_opcode;
|
|
uint8_t opcode_len;
|
|
uint8_t modrm;
|
|
};
|
|
|
|
/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
|
|
/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
|
|
/// instruction set.
|
|
///
|
|
/// \param[in] opcode_and_modrm
|
|
/// Contains primary_opcode byte, its length, and ModR/M byte.
|
|
/// Refer to the struct InstructionOpcodeAndModrm for details.
|
|
///
|
|
/// \return
|
|
/// The control flow kind of the instruction or
|
|
/// eInstructionControlFlowKindOther if the instruction doesn't affect
|
|
/// the control flow of the program.
|
|
lldb::InstructionControlFlowKind
|
|
MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
|
|
uint8_t opcode = opcode_and_modrm.primary_opcode;
|
|
uint8_t opcode_len = opcode_and_modrm.opcode_len;
|
|
uint8_t modrm = opcode_and_modrm.modrm;
|
|
|
|
if (opcode_len > 2)
|
|
return lldb::eInstructionControlFlowKindOther;
|
|
|
|
if (opcode >= 0x70 && opcode <= 0x7F) {
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindCondJump;
|
|
else
|
|
return lldb::eInstructionControlFlowKindOther;
|
|
}
|
|
|
|
if (opcode >= 0x80 && opcode <= 0x8F) {
|
|
if (opcode_len == 2)
|
|
return lldb::eInstructionControlFlowKindCondJump;
|
|
else
|
|
return lldb::eInstructionControlFlowKindOther;
|
|
}
|
|
|
|
switch (opcode) {
|
|
case 0x9A:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindFarCall;
|
|
break;
|
|
case 0xFF:
|
|
if (opcode_len == 1) {
|
|
uint8_t modrm_reg = (modrm >> 3) & 7;
|
|
if (modrm_reg == 2)
|
|
return lldb::eInstructionControlFlowKindCall;
|
|
else if (modrm_reg == 3)
|
|
return lldb::eInstructionControlFlowKindFarCall;
|
|
else if (modrm_reg == 4)
|
|
return lldb::eInstructionControlFlowKindJump;
|
|
else if (modrm_reg == 5)
|
|
return lldb::eInstructionControlFlowKindFarJump;
|
|
}
|
|
break;
|
|
case 0xE8:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindCall;
|
|
break;
|
|
case 0xCD:
|
|
case 0xCC:
|
|
case 0xCE:
|
|
case 0xF1:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindFarCall;
|
|
break;
|
|
case 0xCF:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindFarReturn;
|
|
break;
|
|
case 0xE9:
|
|
case 0xEB:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindJump;
|
|
break;
|
|
case 0xEA:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindFarJump;
|
|
break;
|
|
case 0xE3:
|
|
case 0xE0:
|
|
case 0xE1:
|
|
case 0xE2:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindCondJump;
|
|
break;
|
|
case 0xC3:
|
|
case 0xC2:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindReturn;
|
|
break;
|
|
case 0xCB:
|
|
case 0xCA:
|
|
if (opcode_len == 1)
|
|
return lldb::eInstructionControlFlowKindFarReturn;
|
|
break;
|
|
case 0x05:
|
|
case 0x34:
|
|
if (opcode_len == 2)
|
|
return lldb::eInstructionControlFlowKindFarCall;
|
|
break;
|
|
case 0x35:
|
|
case 0x07:
|
|
if (opcode_len == 2)
|
|
return lldb::eInstructionControlFlowKindFarReturn;
|
|
break;
|
|
case 0x01:
|
|
if (opcode_len == 2) {
|
|
switch (modrm) {
|
|
case 0xc1:
|
|
return lldb::eInstructionControlFlowKindFarCall;
|
|
case 0xc2:
|
|
case 0xc3:
|
|
return lldb::eInstructionControlFlowKindFarReturn;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return lldb::eInstructionControlFlowKindOther;
|
|
}
|
|
|
|
/// Decode an instruction into opcode, modrm and opcode_len.
|
|
/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
|
|
/// Opcodes in x86 are generally the first byte of instruction, though two-byte
|
|
/// instructions and prefixes exist. ModR/M is the byte following the opcode
|
|
/// and adds additional information for how the instruction is executed.
|
|
///
|
|
/// \param[in] inst_bytes
|
|
/// Raw bytes of the instruction
|
|
///
|
|
///
|
|
/// \param[in] bytes_len
|
|
/// The length of the inst_bytes array.
|
|
///
|
|
/// \param[in] is_exec_mode_64b
|
|
/// If true, the execution mode is 64 bit.
|
|
///
|
|
/// \return
|
|
/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
|
|
/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
|
|
/// for more details.
|
|
/// Otherwise if the given instruction is invalid, returns None.
|
|
llvm::Optional<InstructionOpcodeAndModrm>
|
|
InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
|
|
bool is_exec_mode_64b) {
|
|
int op_idx = 0;
|
|
bool prefix_done = false;
|
|
InstructionOpcodeAndModrm ret = {0, 0, 0};
|
|
|
|
// In most cases, the primary_opcode is the first byte of the instruction
|
|
// but some instructions have a prefix to be skipped for these calculations.
|
|
// The following mapping is inspired from libipt's instruction decoding logic
|
|
// in `src/pt_ild.c`
|
|
while (!prefix_done) {
|
|
if (op_idx >= bytes_len)
|
|
return llvm::None;
|
|
|
|
ret.primary_opcode = inst_bytes[op_idx];
|
|
switch (ret.primary_opcode) {
|
|
// prefix_ignore
|
|
case 0x26:
|
|
case 0x2e:
|
|
case 0x36:
|
|
case 0x3e:
|
|
case 0x64:
|
|
case 0x65:
|
|
// prefix_osz, prefix_asz
|
|
case 0x66:
|
|
case 0x67:
|
|
// prefix_lock, prefix_f2, prefix_f3
|
|
case 0xf0:
|
|
case 0xf2:
|
|
case 0xf3:
|
|
op_idx++;
|
|
break;
|
|
|
|
// prefix_rex
|
|
case 0x40:
|
|
case 0x41:
|
|
case 0x42:
|
|
case 0x43:
|
|
case 0x44:
|
|
case 0x45:
|
|
case 0x46:
|
|
case 0x47:
|
|
case 0x48:
|
|
case 0x49:
|
|
case 0x4a:
|
|
case 0x4b:
|
|
case 0x4c:
|
|
case 0x4d:
|
|
case 0x4e:
|
|
case 0x4f:
|
|
if (is_exec_mode_64b)
|
|
op_idx++;
|
|
else
|
|
prefix_done = true;
|
|
break;
|
|
|
|
// prefix_vex_c4, c5
|
|
case 0xc5:
|
|
if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
|
|
prefix_done = true;
|
|
break;
|
|
}
|
|
|
|
ret.opcode_len = 2;
|
|
ret.primary_opcode = inst_bytes[op_idx + 2];
|
|
ret.modrm = inst_bytes[op_idx + 3];
|
|
return ret;
|
|
|
|
case 0xc4:
|
|
if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
|
|
prefix_done = true;
|
|
break;
|
|
}
|
|
ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
|
|
ret.primary_opcode = inst_bytes[op_idx + 3];
|
|
ret.modrm = inst_bytes[op_idx + 4];
|
|
return ret;
|
|
|
|
// prefix_evex
|
|
case 0x62:
|
|
if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
|
|
prefix_done = true;
|
|
break;
|
|
}
|
|
ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
|
|
ret.primary_opcode = inst_bytes[op_idx + 4];
|
|
ret.modrm = inst_bytes[op_idx + 5];
|
|
return ret;
|
|
|
|
default:
|
|
prefix_done = true;
|
|
break;
|
|
}
|
|
} // prefix done
|
|
|
|
ret.primary_opcode = inst_bytes[op_idx];
|
|
ret.modrm = inst_bytes[op_idx + 1];
|
|
ret.opcode_len = 1;
|
|
|
|
// If the first opcode is 0F, it's two- or three- byte opcodes.
|
|
if (ret.primary_opcode == 0x0F) {
|
|
ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
|
|
|
|
if (ret.primary_opcode == 0x38) {
|
|
ret.opcode_len = 3;
|
|
ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
|
|
ret.modrm = inst_bytes[op_idx + 1];
|
|
} else if (ret.primary_opcode == 0x3A) {
|
|
ret.opcode_len = 3;
|
|
ret.primary_opcode = inst_bytes[++op_idx];
|
|
ret.modrm = inst_bytes[op_idx + 1];
|
|
} else if ((ret.primary_opcode & 0xf8) == 0x38) {
|
|
ret.opcode_len = 0;
|
|
ret.primary_opcode = inst_bytes[++op_idx];
|
|
ret.modrm = inst_bytes[op_idx + 1];
|
|
} else if (ret.primary_opcode == 0x0F) {
|
|
ret.opcode_len = 3;
|
|
// opcode is 0x0F, no needs to update
|
|
ret.modrm = inst_bytes[op_idx + 1];
|
|
} else {
|
|
ret.opcode_len = 2;
|
|
ret.modrm = inst_bytes[op_idx + 1];
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
|
|
Opcode m_opcode) {
|
|
llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
|
|
|
|
if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
|
|
// x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
|
|
return lldb::eInstructionControlFlowKindUnknown;
|
|
}
|
|
|
|
// Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
|
|
// These are the three values deciding instruction control flow kind.
|
|
ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
|
|
m_opcode.GetByteSize(), is_exec_mode_64b);
|
|
if (!ret)
|
|
return lldb::eInstructionControlFlowKindUnknown;
|
|
else
|
|
return MapOpcodeIntoControlFlowKind(ret.value());
|
|
}
|
|
|
|
} // namespace x86
|
|
|
|
lldb::InstructionControlFlowKind
|
|
Instruction::GetControlFlowKind(const ArchSpec &arch) {
|
|
if (arch.GetTriple().getArch() == llvm::Triple::x86)
|
|
return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode);
|
|
else if (arch.GetTriple().getArch() == llvm::Triple::x86_64)
|
|
return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode);
|
|
else
|
|
return eInstructionControlFlowKindUnknown; // not implemented
|
|
}
|
|
|
|
AddressClass Instruction::GetAddressClass() {
|
|
if (m_address_class == AddressClass::eInvalid)
|
|
m_address_class = m_address.GetAddressClass();
|
|
return m_address_class;
|
|
}
|
|
|
|
void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
|
|
bool show_address, bool show_bytes,
|
|
bool show_control_flow_kind,
|
|
const ExecutionContext *exe_ctx,
|
|
const SymbolContext *sym_ctx,
|
|
const SymbolContext *prev_sym_ctx,
|
|
const FormatEntity::Entry *disassembly_addr_format,
|
|
size_t max_address_text_size) {
|
|
size_t opcode_column_width = 7;
|
|
const size_t operand_column_width = 25;
|
|
|
|
CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
|
|
|
|
StreamString ss;
|
|
|
|
if (show_address) {
|
|
Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
|
|
prev_sym_ctx, exe_ctx, &m_address, ss);
|
|
ss.FillLastLineToColumn(max_address_text_size, ' ');
|
|
}
|
|
|
|
if (show_bytes) {
|
|
if (m_opcode.GetType() == Opcode::eTypeBytes) {
|
|
// x86_64 and i386 are the only ones that use bytes right now so pad out
|
|
// the byte dump to be able to always show 15 bytes (3 chars each) plus a
|
|
// space
|
|
if (max_opcode_byte_size > 0)
|
|
m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
|
|
else
|
|
m_opcode.Dump(&ss, 15 * 3 + 1);
|
|
} else {
|
|
// Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
|
|
// (10 spaces) plus two for padding...
|
|
if (max_opcode_byte_size > 0)
|
|
m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
|
|
else
|
|
m_opcode.Dump(&ss, 12);
|
|
}
|
|
}
|
|
|
|
if (show_control_flow_kind) {
|
|
switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) {
|
|
case eInstructionControlFlowKindUnknown:
|
|
ss.Printf("%-12s", "unknown");
|
|
break;
|
|
case eInstructionControlFlowKindOther:
|
|
ss.Printf("%-12s", "other");
|
|
break;
|
|
case eInstructionControlFlowKindCall:
|
|
ss.Printf("%-12s", "call");
|
|
break;
|
|
case eInstructionControlFlowKindReturn:
|
|
ss.Printf("%-12s", "return");
|
|
break;
|
|
case eInstructionControlFlowKindJump:
|
|
ss.Printf("%-12s", "jump");
|
|
break;
|
|
case eInstructionControlFlowKindCondJump:
|
|
ss.Printf("%-12s", "cond jump");
|
|
break;
|
|
case eInstructionControlFlowKindFarCall:
|
|
ss.Printf("%-12s", "far call");
|
|
break;
|
|
case eInstructionControlFlowKindFarReturn:
|
|
ss.Printf("%-12s", "far return");
|
|
break;
|
|
case eInstructionControlFlowKindFarJump:
|
|
ss.Printf("%-12s", "far jump");
|
|
break;
|
|
}
|
|
}
|
|
|
|
const size_t opcode_pos = ss.GetSizeOfLastLine();
|
|
|
|
// The default opcode size of 7 characters is plenty for most architectures
|
|
// but some like arm can pull out the occasional vqrshrun.s16. We won't get
|
|
// consistent column spacing in these cases, unfortunately.
|
|
if (m_opcode_name.length() >= opcode_column_width) {
|
|
opcode_column_width = m_opcode_name.length() + 1;
|
|
}
|
|
|
|
ss.PutCString(m_opcode_name);
|
|
ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
|
|
ss.PutCString(m_mnemonics);
|
|
|
|
if (!m_comment.empty()) {
|
|
ss.FillLastLineToColumn(
|
|
opcode_pos + opcode_column_width + operand_column_width, ' ');
|
|
ss.PutCString(" ; ");
|
|
ss.PutCString(m_comment);
|
|
}
|
|
s->PutCString(ss.GetString());
|
|
}
|
|
|
|
bool Instruction::DumpEmulation(const ArchSpec &arch) {
|
|
std::unique_ptr<EmulateInstruction> insn_emulator_up(
|
|
EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
|
|
if (insn_emulator_up) {
|
|
insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
|
|
return insn_emulator_up->EvaluateInstruction(0);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool Instruction::CanSetBreakpoint () {
|
|
return !HasDelaySlot();
|
|
}
|
|
|
|
bool Instruction::HasDelaySlot() {
|
|
// Default is false.
|
|
return false;
|
|
}
|
|
|
|
OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
|
|
OptionValue::Type data_type) {
|
|
bool done = false;
|
|
char buffer[1024];
|
|
|
|
auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
|
|
|
|
int idx = 0;
|
|
while (!done) {
|
|
if (!fgets(buffer, 1023, in_file)) {
|
|
out_stream->Printf(
|
|
"Instruction::ReadArray: Error reading file (fgets).\n");
|
|
option_value_sp.reset();
|
|
return option_value_sp;
|
|
}
|
|
|
|
std::string line(buffer);
|
|
|
|
size_t len = line.size();
|
|
if (line[len - 1] == '\n') {
|
|
line[len - 1] = '\0';
|
|
line.resize(len - 1);
|
|
}
|
|
|
|
if ((line.size() == 1) && line[0] == ']') {
|
|
done = true;
|
|
line.clear();
|
|
}
|
|
|
|
if (!line.empty()) {
|
|
std::string value;
|
|
static RegularExpression g_reg_exp(
|
|
llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
|
|
llvm::SmallVector<llvm::StringRef, 2> matches;
|
|
if (g_reg_exp.Execute(line, &matches))
|
|
value = matches[1].str();
|
|
else
|
|
value = line;
|
|
|
|
OptionValueSP data_value_sp;
|
|
switch (data_type) {
|
|
case OptionValue::eTypeUInt64:
|
|
data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
|
|
data_value_sp->SetValueFromString(value);
|
|
break;
|
|
// Other types can be added later as needed.
|
|
default:
|
|
data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
|
|
break;
|
|
}
|
|
|
|
option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
|
|
++idx;
|
|
}
|
|
}
|
|
|
|
return option_value_sp;
|
|
}
|
|
|
|
OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
|
|
bool done = false;
|
|
char buffer[1024];
|
|
|
|
auto option_value_sp = std::make_shared<OptionValueDictionary>();
|
|
static ConstString encoding_key("data_encoding");
|
|
OptionValue::Type data_type = OptionValue::eTypeInvalid;
|
|
|
|
while (!done) {
|
|
// Read the next line in the file
|
|
if (!fgets(buffer, 1023, in_file)) {
|
|
out_stream->Printf(
|
|
"Instruction::ReadDictionary: Error reading file (fgets).\n");
|
|
option_value_sp.reset();
|
|
return option_value_sp;
|
|
}
|
|
|
|
// Check to see if the line contains the end-of-dictionary marker ("}")
|
|
std::string line(buffer);
|
|
|
|
size_t len = line.size();
|
|
if (line[len - 1] == '\n') {
|
|
line[len - 1] = '\0';
|
|
line.resize(len - 1);
|
|
}
|
|
|
|
if ((line.size() == 1) && (line[0] == '}')) {
|
|
done = true;
|
|
line.clear();
|
|
}
|
|
|
|
// Try to find a key-value pair in the current line and add it to the
|
|
// dictionary.
|
|
if (!line.empty()) {
|
|
static RegularExpression g_reg_exp(llvm::StringRef(
|
|
"^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
|
|
|
|
llvm::SmallVector<llvm::StringRef, 3> matches;
|
|
|
|
bool reg_exp_success = g_reg_exp.Execute(line, &matches);
|
|
std::string key;
|
|
std::string value;
|
|
if (reg_exp_success) {
|
|
key = matches[1].str();
|
|
value = matches[2].str();
|
|
} else {
|
|
out_stream->Printf("Instruction::ReadDictionary: Failure executing "
|
|
"regular expression.\n");
|
|
option_value_sp.reset();
|
|
return option_value_sp;
|
|
}
|
|
|
|
ConstString const_key(key.c_str());
|
|
// Check value to see if it's the start of an array or dictionary.
|
|
|
|
lldb::OptionValueSP value_sp;
|
|
assert(value.empty() == false);
|
|
assert(key.empty() == false);
|
|
|
|
if (value[0] == '{') {
|
|
assert(value.size() == 1);
|
|
// value is a dictionary
|
|
value_sp = ReadDictionary(in_file, out_stream);
|
|
if (!value_sp) {
|
|
option_value_sp.reset();
|
|
return option_value_sp;
|
|
}
|
|
} else if (value[0] == '[') {
|
|
assert(value.size() == 1);
|
|
// value is an array
|
|
value_sp = ReadArray(in_file, out_stream, data_type);
|
|
if (!value_sp) {
|
|
option_value_sp.reset();
|
|
return option_value_sp;
|
|
}
|
|
// We've used the data_type to read an array; re-set the type to
|
|
// Invalid
|
|
data_type = OptionValue::eTypeInvalid;
|
|
} else if ((value[0] == '0') && (value[1] == 'x')) {
|
|
value_sp = std::make_shared<OptionValueUInt64>(0, 0);
|
|
value_sp->SetValueFromString(value);
|
|
} else {
|
|
size_t len = value.size();
|
|
if ((value[0] == '"') && (value[len - 1] == '"'))
|
|
value = value.substr(1, len - 2);
|
|
value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
|
|
}
|
|
|
|
if (const_key == encoding_key) {
|
|
// A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
|
|
// indicating the
|
|
// data type of an upcoming array (usually the next bit of data to be
|
|
// read in).
|
|
if (strcmp(value.c_str(), "uint32_t") == 0)
|
|
data_type = OptionValue::eTypeUInt64;
|
|
} else
|
|
option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
|
|
false);
|
|
}
|
|
}
|
|
|
|
return option_value_sp;
|
|
}
|
|
|
|
bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
|
|
if (!out_stream)
|
|
return false;
|
|
|
|
if (!file_name) {
|
|
out_stream->Printf("Instruction::TestEmulation: Missing file_name.");
|
|
return false;
|
|
}
|
|
FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
|
|
if (!test_file) {
|
|
out_stream->Printf(
|
|
"Instruction::TestEmulation: Attempt to open test file failed.");
|
|
return false;
|
|
}
|
|
|
|
char buffer[256];
|
|
if (!fgets(buffer, 255, test_file)) {
|
|
out_stream->Printf(
|
|
"Instruction::TestEmulation: Error reading first line of test file.\n");
|
|
fclose(test_file);
|
|
return false;
|
|
}
|
|
|
|
if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
|
|
out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
|
|
"emulation state dictionary\n");
|
|
fclose(test_file);
|
|
return false;
|
|
}
|
|
|
|
// Read all the test information from the test file into an
|
|
// OptionValueDictionary.
|
|
|
|
OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
|
|
if (!data_dictionary_sp) {
|
|
out_stream->Printf(
|
|
"Instruction::TestEmulation: Error reading Dictionary Object.\n");
|
|
fclose(test_file);
|
|
return false;
|
|
}
|
|
|
|
fclose(test_file);
|
|
|
|
OptionValueDictionary *data_dictionary =
|
|
data_dictionary_sp->GetAsDictionary();
|
|
static ConstString description_key("assembly_string");
|
|
static ConstString triple_key("triple");
|
|
|
|
OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
|
|
|
|
if (!value_sp) {
|
|
out_stream->Printf("Instruction::TestEmulation: Test file does not "
|
|
"contain description string.\n");
|
|
return false;
|
|
}
|
|
|
|
SetDescription(value_sp->GetStringValue());
|
|
|
|
value_sp = data_dictionary->GetValueForKey(triple_key);
|
|
if (!value_sp) {
|
|
out_stream->Printf(
|
|
"Instruction::TestEmulation: Test file does not contain triple.\n");
|
|
return false;
|
|
}
|
|
|
|
ArchSpec arch;
|
|
arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
|
|
|
|
bool success = false;
|
|
std::unique_ptr<EmulateInstruction> insn_emulator_up(
|
|
EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
|
|
if (insn_emulator_up)
|
|
success =
|
|
insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
|
|
|
|
if (success)
|
|
out_stream->Printf("Emulation test succeeded.");
|
|
else
|
|
out_stream->Printf("Emulation test failed.");
|
|
|
|
return success;
|
|
}
|
|
|
|
bool Instruction::Emulate(
|
|
const ArchSpec &arch, uint32_t evaluate_options, void *baton,
|
|
EmulateInstruction::ReadMemoryCallback read_mem_callback,
|
|
EmulateInstruction::WriteMemoryCallback write_mem_callback,
|
|
EmulateInstruction::ReadRegisterCallback read_reg_callback,
|
|
EmulateInstruction::WriteRegisterCallback write_reg_callback) {
|
|
std::unique_ptr<EmulateInstruction> insn_emulator_up(
|
|
EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
|
|
if (insn_emulator_up) {
|
|
insn_emulator_up->SetBaton(baton);
|
|
insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
|
|
read_reg_callback, write_reg_callback);
|
|
insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
|
|
return insn_emulator_up->EvaluateInstruction(evaluate_options);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
uint32_t Instruction::GetData(DataExtractor &data) {
|
|
return m_opcode.GetData(data);
|
|
}
|
|
|
|
InstructionList::InstructionList() : m_instructions() {}
|
|
|
|
InstructionList::~InstructionList() = default;
|
|
|
|
size_t InstructionList::GetSize() const { return m_instructions.size(); }
|
|
|
|
uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
|
|
uint32_t max_inst_size = 0;
|
|
collection::const_iterator pos, end;
|
|
for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
|
|
++pos) {
|
|
uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
|
|
if (max_inst_size < inst_size)
|
|
max_inst_size = inst_size;
|
|
}
|
|
return max_inst_size;
|
|
}
|
|
|
|
InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
|
|
InstructionSP inst_sp;
|
|
if (idx < m_instructions.size())
|
|
inst_sp = m_instructions[idx];
|
|
return inst_sp;
|
|
}
|
|
|
|
InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) {
|
|
uint32_t index = GetIndexOfInstructionAtAddress(address);
|
|
if (index != UINT32_MAX)
|
|
return GetInstructionAtIndex(index);
|
|
return nullptr;
|
|
}
|
|
|
|
void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
|
|
bool show_control_flow_kind,
|
|
const ExecutionContext *exe_ctx) {
|
|
const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
|
|
collection::const_iterator pos, begin, end;
|
|
|
|
const FormatEntity::Entry *disassembly_format = nullptr;
|
|
FormatEntity::Entry format;
|
|
if (exe_ctx && exe_ctx->HasTargetScope()) {
|
|
disassembly_format =
|
|
exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
|
|
} else {
|
|
FormatEntity::Parse("${addr}: ", format);
|
|
disassembly_format = &format;
|
|
}
|
|
|
|
for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
|
|
pos != end; ++pos) {
|
|
if (pos != begin)
|
|
s->EOL();
|
|
(*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes,
|
|
show_control_flow_kind, exe_ctx, nullptr, nullptr,
|
|
disassembly_format, 0);
|
|
}
|
|
}
|
|
|
|
void InstructionList::Clear() { m_instructions.clear(); }
|
|
|
|
void InstructionList::Append(lldb::InstructionSP &inst_sp) {
|
|
if (inst_sp)
|
|
m_instructions.push_back(inst_sp);
|
|
}
|
|
|
|
uint32_t
|
|
InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
|
|
bool ignore_calls,
|
|
bool *found_calls) const {
|
|
size_t num_instructions = m_instructions.size();
|
|
|
|
uint32_t next_branch = UINT32_MAX;
|
|
|
|
if (found_calls)
|
|
*found_calls = false;
|
|
for (size_t i = start; i < num_instructions; i++) {
|
|
if (m_instructions[i]->DoesBranch()) {
|
|
if (ignore_calls && m_instructions[i]->IsCall()) {
|
|
if (found_calls)
|
|
*found_calls = true;
|
|
continue;
|
|
}
|
|
next_branch = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return next_branch;
|
|
}
|
|
|
|
uint32_t
|
|
InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
|
|
size_t num_instructions = m_instructions.size();
|
|
uint32_t index = UINT32_MAX;
|
|
for (size_t i = 0; i < num_instructions; i++) {
|
|
if (m_instructions[i]->GetAddress() == address) {
|
|
index = i;
|
|
break;
|
|
}
|
|
}
|
|
return index;
|
|
}
|
|
|
|
uint32_t
|
|
InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
|
|
Target &target) {
|
|
Address address;
|
|
address.SetLoadAddress(load_addr, &target);
|
|
return GetIndexOfInstructionAtAddress(address);
|
|
}
|
|
|
|
size_t Disassembler::ParseInstructions(Target &target, Address start,
|
|
Limit limit, Stream *error_strm_ptr,
|
|
bool force_live_memory) {
|
|
m_instruction_list.Clear();
|
|
|
|
if (!start.IsValid())
|
|
return 0;
|
|
|
|
start = ResolveAddress(target, start);
|
|
|
|
addr_t byte_size = limit.value;
|
|
if (limit.kind == Limit::Instructions)
|
|
byte_size *= m_arch.GetMaximumOpcodeByteSize();
|
|
auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
|
|
|
|
Status error;
|
|
lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
|
|
const size_t bytes_read =
|
|
target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(),
|
|
error, force_live_memory, &load_addr);
|
|
const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
|
|
|
|
if (bytes_read == 0) {
|
|
if (error_strm_ptr) {
|
|
if (const char *error_cstr = error.AsCString())
|
|
error_strm_ptr->Printf("error: %s\n", error_cstr);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
if (bytes_read != data_sp->GetByteSize())
|
|
data_sp->SetByteSize(bytes_read);
|
|
DataExtractor data(data_sp, m_arch.GetByteOrder(),
|
|
m_arch.GetAddressByteSize());
|
|
return DecodeInstructions(start, data, 0,
|
|
limit.kind == Limit::Instructions ? limit.value
|
|
: UINT32_MAX,
|
|
false, data_from_file);
|
|
}
|
|
|
|
// Disassembler copy constructor
|
|
Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
|
|
: m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
|
|
m_flavor() {
|
|
if (flavor == nullptr)
|
|
m_flavor.assign("default");
|
|
else
|
|
m_flavor.assign(flavor);
|
|
|
|
// If this is an arm variant that can only include thumb (T16, T32)
|
|
// instructions, force the arch triple to be "thumbv.." instead of "armv..."
|
|
if (arch.IsAlwaysThumbInstructions()) {
|
|
std::string thumb_arch_name(arch.GetTriple().getArchName().str());
|
|
// Replace "arm" with "thumb" so we get all thumb variants correct
|
|
if (thumb_arch_name.size() > 3) {
|
|
thumb_arch_name.erase(0, 3);
|
|
thumb_arch_name.insert(0, "thumb");
|
|
}
|
|
m_arch.SetTriple(thumb_arch_name.c_str());
|
|
}
|
|
}
|
|
|
|
Disassembler::~Disassembler() = default;
|
|
|
|
InstructionList &Disassembler::GetInstructionList() {
|
|
return m_instruction_list;
|
|
}
|
|
|
|
const InstructionList &Disassembler::GetInstructionList() const {
|
|
return m_instruction_list;
|
|
}
|
|
|
|
// Class PseudoInstruction
|
|
|
|
PseudoInstruction::PseudoInstruction()
|
|
: Instruction(Address(), AddressClass::eUnknown), m_description() {}
|
|
|
|
PseudoInstruction::~PseudoInstruction() = default;
|
|
|
|
bool PseudoInstruction::DoesBranch() {
|
|
// This is NOT a valid question for a pseudo instruction.
|
|
return false;
|
|
}
|
|
|
|
bool PseudoInstruction::HasDelaySlot() {
|
|
// This is NOT a valid question for a pseudo instruction.
|
|
return false;
|
|
}
|
|
|
|
bool PseudoInstruction::IsLoad() { return false; }
|
|
|
|
bool PseudoInstruction::IsAuthenticated() { return false; }
|
|
|
|
size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
|
|
const lldb_private::DataExtractor &data,
|
|
lldb::offset_t data_offset) {
|
|
return m_opcode.GetByteSize();
|
|
}
|
|
|
|
void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
|
|
if (!opcode_data)
|
|
return;
|
|
|
|
switch (opcode_size) {
|
|
case 8: {
|
|
uint8_t value8 = *((uint8_t *)opcode_data);
|
|
m_opcode.SetOpcode8(value8, eByteOrderInvalid);
|
|
break;
|
|
}
|
|
case 16: {
|
|
uint16_t value16 = *((uint16_t *)opcode_data);
|
|
m_opcode.SetOpcode16(value16, eByteOrderInvalid);
|
|
break;
|
|
}
|
|
case 32: {
|
|
uint32_t value32 = *((uint32_t *)opcode_data);
|
|
m_opcode.SetOpcode32(value32, eByteOrderInvalid);
|
|
break;
|
|
}
|
|
case 64: {
|
|
uint64_t value64 = *((uint64_t *)opcode_data);
|
|
m_opcode.SetOpcode64(value64, eByteOrderInvalid);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void PseudoInstruction::SetDescription(llvm::StringRef description) {
|
|
m_description = std::string(description);
|
|
}
|
|
|
|
Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
|
|
Operand ret;
|
|
ret.m_type = Type::Register;
|
|
ret.m_register = r;
|
|
return ret;
|
|
}
|
|
|
|
Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
|
|
bool neg) {
|
|
Operand ret;
|
|
ret.m_type = Type::Immediate;
|
|
ret.m_immediate = imm;
|
|
ret.m_negative = neg;
|
|
return ret;
|
|
}
|
|
|
|
Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
|
|
Operand ret;
|
|
ret.m_type = Type::Immediate;
|
|
if (imm < 0) {
|
|
ret.m_immediate = -imm;
|
|
ret.m_negative = true;
|
|
} else {
|
|
ret.m_immediate = imm;
|
|
ret.m_negative = false;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Instruction::Operand
|
|
Instruction::Operand::BuildDereference(const Operand &ref) {
|
|
Operand ret;
|
|
ret.m_type = Type::Dereference;
|
|
ret.m_children = {ref};
|
|
return ret;
|
|
}
|
|
|
|
Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
|
|
const Operand &rhs) {
|
|
Operand ret;
|
|
ret.m_type = Type::Sum;
|
|
ret.m_children = {lhs, rhs};
|
|
return ret;
|
|
}
|
|
|
|
Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
|
|
const Operand &rhs) {
|
|
Operand ret;
|
|
ret.m_type = Type::Product;
|
|
ret.m_children = {lhs, rhs};
|
|
return ret;
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::MatchBinaryOp(
|
|
std::function<bool(const Instruction::Operand &)> base,
|
|
std::function<bool(const Instruction::Operand &)> left,
|
|
std::function<bool(const Instruction::Operand &)> right) {
|
|
return [base, left, right](const Instruction::Operand &op) -> bool {
|
|
return (base(op) && op.m_children.size() == 2 &&
|
|
((left(op.m_children[0]) && right(op.m_children[1])) ||
|
|
(left(op.m_children[1]) && right(op.m_children[0]))));
|
|
};
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::MatchUnaryOp(
|
|
std::function<bool(const Instruction::Operand &)> base,
|
|
std::function<bool(const Instruction::Operand &)> child) {
|
|
return [base, child](const Instruction::Operand &op) -> bool {
|
|
return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
|
|
};
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
|
|
return [&info](const Instruction::Operand &op) {
|
|
return (op.m_type == Instruction::Operand::Type::Register &&
|
|
(op.m_register == ConstString(info.name) ||
|
|
op.m_register == ConstString(info.alt_name)));
|
|
};
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::FetchRegOp(ConstString ®) {
|
|
return [®](const Instruction::Operand &op) {
|
|
if (op.m_type != Instruction::Operand::Type::Register) {
|
|
return false;
|
|
}
|
|
reg = op.m_register;
|
|
return true;
|
|
};
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
|
|
return [imm](const Instruction::Operand &op) {
|
|
return (op.m_type == Instruction::Operand::Type::Immediate &&
|
|
((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
|
|
(!op.m_negative && op.m_immediate == (uint64_t)imm)));
|
|
};
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
|
|
return [&imm](const Instruction::Operand &op) {
|
|
if (op.m_type != Instruction::Operand::Type::Immediate) {
|
|
return false;
|
|
}
|
|
if (op.m_negative) {
|
|
imm = -((int64_t)op.m_immediate);
|
|
} else {
|
|
imm = ((int64_t)op.m_immediate);
|
|
}
|
|
return true;
|
|
};
|
|
}
|
|
|
|
std::function<bool(const Instruction::Operand &)>
|
|
lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
|
|
return [type](const Instruction::Operand &op) { return op.m_type == type; };
|
|
}
|