Files
clang-p2996/lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVM.cpp
Greg Clayton 0ae962735f Made the lldb_private::Opcode struct into a real boy... I mean class.
Modified the Disassembler::Instruction base class to contain an Opcode 
instance so that we can know the bytes for an instruction without needing
to keep the data around.

Modified the DisassemblerLLVM's instruction class to correctly extract the
opcode bytes if all goes well.

llvm-svn: 128248
2011-03-24 23:53:38 +00:00

530 lines
16 KiB
C++

//===-- DisassemblerLLVM.cpp ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "DisassemblerLLVM.h"
#include "llvm-c/EnhancedDisassembly.h"
#include "lldb/Core/Address.h"
#include "lldb/Core/DataExtractor.h"
#include "lldb/Core/Disassembler.h"
#include "lldb/Core/Module.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Core/Stream.h"
#include "lldb/Core/StreamString.h"
#include "lldb/Symbol/SymbolContext.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/RegisterContext.h"
#include "lldb/Target/Target.h"
#include <assert.h>
using namespace lldb;
using namespace lldb_private;
static int
DataExtractorByteReader (uint8_t *byte, uint64_t address, void *arg)
{
DataExtractor &extractor = *((DataExtractor *)arg);
if (extractor.ValidOffset(address))
{
*byte = *(extractor.GetDataStart() + address);
return 0;
}
else
{
return -1;
}
}
namespace {
struct RegisterReaderArg {
const lldb::addr_t instructionPointer;
const EDDisassemblerRef disassembler;
RegisterReaderArg(lldb::addr_t ip,
EDDisassemblerRef dis) :
instructionPointer(ip),
disassembler(dis)
{
}
};
}
static int IPRegisterReader(uint64_t *value, unsigned regID, void* arg)
{
uint64_t instructionPointer = ((RegisterReaderArg*)arg)->instructionPointer;
EDDisassemblerRef disassembler = ((RegisterReaderArg*)arg)->disassembler;
if (EDRegisterIsProgramCounter(disassembler, regID)) {
*value = instructionPointer;
return 0;
}
return -1;
}
DisassemblerLLVM::InstructionLLVM::InstructionLLVM (const Address &addr, EDDisassemblerRef disassembler) :
Instruction (addr),
m_disassembler (disassembler)
{
}
DisassemblerLLVM::InstructionLLVM::~InstructionLLVM()
{
}
static void
PadString(Stream *s, const std::string &str, size_t width)
{
int diff = width - str.length();
if (diff > 0)
s->Printf("%s%*.*s", str.c_str(), diff, diff, "");
else
s->Printf("%s ", str.c_str());
}
void
DisassemblerLLVM::InstructionLLVM::Dump
(
Stream *s,
bool show_address,
const DataExtractor *bytes,
uint32_t bytes_offset,
const lldb_private::ExecutionContext* exe_ctx,
bool raw
)
{
const size_t opcodeColumnWidth = 7;
const size_t operandColumnWidth = 25;
ExecutionContextScope *exe_scope = NULL;
if (exe_ctx)
exe_scope = exe_ctx->GetBestExecutionContextScope();
// If we have an address, print it out
if (GetAddress().IsValid() && show_address)
{
if (GetAddress().Dump (s,
exe_scope,
Address::DumpStyleLoadAddress,
Address::DumpStyleModuleWithFileAddress,
0))
s->PutCString(": ");
}
// If we are supposed to show bytes, "bytes" will be non-NULL.
if (bytes)
{
uint32_t bytes_dumped = bytes->Dump(s, bytes_offset, eFormatBytes, 1, EDInstByteSize(m_inst), UINT32_MAX, LLDB_INVALID_ADDRESS, 0, 0) - bytes_offset;
// Allow for 15 bytes of opcodes since this is the max for x86_64.
// TOOD: We need to taylor this better for different architectures. For
// ARM we would want to show 16 bit opcodes for Thumb as properly byte
// swapped uint16_t values, or 32 bit values swapped values for ARM.
const uint32_t default_num_opcode_bytes = 15;
if (bytes_dumped * 3 < (default_num_opcode_bytes*3))
{
uint32_t indent_level = (default_num_opcode_bytes*3) - (bytes_dumped * 3) + 1;
s->Printf("%*.*s", indent_level, indent_level, "");
}
}
int numTokens = EDNumTokens(m_inst);
int currentOpIndex = -1;
std::auto_ptr<RegisterReaderArg> rra;
if (!raw)
{
addr_t base_addr = LLDB_INVALID_ADDRESS;
if (exe_ctx && exe_ctx->target && !exe_ctx->target->GetSectionLoadList().IsEmpty())
base_addr = GetAddress().GetLoadAddress (exe_ctx->target);
if (base_addr == LLDB_INVALID_ADDRESS)
base_addr = GetAddress().GetFileAddress ();
rra.reset(new RegisterReaderArg(base_addr + EDInstByteSize(m_inst), m_disassembler));
}
bool printTokenized = false;
if (numTokens != -1)
{
printTokenized = true;
// Handle the opcode column.
StreamString opcode;
int tokenIndex = 0;
EDTokenRef token;
const char *tokenStr;
if (EDGetToken(&token, m_inst, tokenIndex))
printTokenized = false;
if (!printTokenized || !EDTokenIsOpcode(token))
printTokenized = false;
if (!printTokenized || EDGetTokenString(&tokenStr, token))
printTokenized = false;
// Put the token string into our opcode string
opcode.PutCString(tokenStr);
// If anything follows, it probably starts with some whitespace. Skip it.
tokenIndex++;
if (printTokenized && tokenIndex < numTokens)
{
if(!printTokenized || EDGetToken(&token, m_inst, tokenIndex))
printTokenized = false;
if(!printTokenized || !EDTokenIsWhitespace(token))
printTokenized = false;
}
tokenIndex++;
// Handle the operands and the comment.
StreamString operands;
StreamString comment;
if (printTokenized)
{
bool show_token;
for (; tokenIndex < numTokens; ++tokenIndex)
{
if (EDGetToken(&token, m_inst, tokenIndex))
return;
if (raw)
{
show_token = true;
}
else
{
int operandIndex = EDOperandIndexForToken(token);
if (operandIndex >= 0)
{
if (operandIndex != currentOpIndex)
{
show_token = true;
currentOpIndex = operandIndex;
EDOperandRef operand;
if (!EDGetOperand(&operand, m_inst, currentOpIndex))
{
if (EDOperandIsMemory(operand))
{
uint64_t operand_value;
if (!EDEvaluateOperand(&operand_value, operand, IPRegisterReader, rra.get()))
{
if (EDInstIsBranch(m_inst))
{
operands.Printf("0x%llx ", operand_value);
show_token = false;
}
else
{
// Put the address value into the comment
comment.Printf("0x%llx ", operand_value);
}
lldb_private::Address so_addr;
if (exe_ctx && exe_ctx->target && !exe_ctx->target->GetSectionLoadList().IsEmpty())
{
if (exe_ctx->target->GetSectionLoadList().ResolveLoadAddress (operand_value, so_addr))
so_addr.Dump(&comment, exe_scope, Address::DumpStyleResolvedDescriptionNoModule, Address::DumpStyleSectionNameOffset);
}
else
{
Module *module = GetAddress().GetModule();
if (module)
{
if (module->ResolveFileAddress (operand_value, so_addr))
so_addr.Dump(&comment, exe_scope, Address::DumpStyleResolvedDescriptionNoModule, Address::DumpStyleSectionNameOffset);
}
}
} // EDEvaluateOperand
} // EDOperandIsMemory
} // EDGetOperand
} // operandIndex != currentOpIndex
} // operandIndex >= 0
} // else(raw)
if (show_token)
{
if(EDGetTokenString(&tokenStr, token))
{
printTokenized = false;
break;
}
operands.PutCString(tokenStr);
}
} // for (tokenIndex)
if (printTokenized)
{
if (operands.GetString().empty())
{
s->PutCString(opcode.GetString().c_str());
}
else
{
PadString(s, opcode.GetString(), opcodeColumnWidth);
if (comment.GetString().empty())
{
s->PutCString(operands.GetString().c_str());
}
else
{
PadString(s, operands.GetString(), operandColumnWidth);
s->PutCString("; ");
s->PutCString(comment.GetString().c_str());
} // else (comment.GetString().empty())
} // else (operands.GetString().empty())
} // printTokenized
} // for (tokenIndex)
} // numTokens != -1
if (!printTokenized)
{
const char *str;
if (EDGetInstString(&str, m_inst))
return;
else
s->PutCString(str);
}
}
bool
DisassemblerLLVM::InstructionLLVM::DoesBranch() const
{
return EDInstIsBranch(m_inst);
}
size_t
DisassemblerLLVM::InstructionLLVM::GetByteSize() const
{
return EDInstByteSize(m_inst);
}
size_t
DisassemblerLLVM::InstructionLLVM::Extract (const Disassembler &disassembler,
const lldb_private::DataExtractor &data,
uint32_t data_offset)
{
if (EDCreateInsts(&m_inst, 1, m_disassembler, DataExtractorByteReader, data_offset, (void*)(&data)))
{
const int byte_size = EDInstByteSize(m_inst);
uint32_t offset = data_offset;
// Make a copy of the opcode in m_opcode
switch (disassembler.GetArchitecture().GetMachine())
{
case llvm::Triple::x86:
case llvm::Triple::x86_64:
m_opcode.SetOpcodeBytes (data.PeekData (data_offset, byte_size), byte_size);
break;
case llvm::Triple::arm:
assert (byte_size == 4);
m_opcode.SetOpcode32 (data.GetU32 (&offset));
break;
case llvm::Triple::thumb:
assert ((byte_size == 2) || (byte_size == 4));
if (byte_size == 2)
m_opcode.SetOpcode16 (data.GetU16 (&offset));
else
m_opcode.SetOpcode32 (data.GetU32 (&offset));
break;
default:
assert (!"This shouldn't happen since we control the architecture we allow DisassemblerLLVM to be created for");
break;
}
return byte_size;
}
else
return 0;
}
static inline EDAssemblySyntax_t
SyntaxForArchSpec (const ArchSpec &arch)
{
switch (arch.GetMachine ())
{
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return kEDAssemblySyntaxX86ATT;
case llvm::Triple::arm:
return kEDAssemblySyntaxARMUAL;
default:
break;
}
return (EDAssemblySyntax_t)0; // default
}
Disassembler *
DisassemblerLLVM::CreateInstance(const ArchSpec &arch)
{
std::auto_ptr<DisassemblerLLVM> disasm_ap (new DisassemblerLLVM(arch));
if (disasm_ap->IsValid())
return disasm_ap.release();
return NULL;
}
DisassemblerLLVM::DisassemblerLLVM(const ArchSpec &arch) :
Disassembler (arch),
m_disassembler (NULL),
m_disassembler_thumb (NULL) // For ARM only
{
const std::string &arch_triple = arch.GetTriple().str();
if (!arch_triple.empty())
{
if (EDGetDisassembler(&m_disassembler, arch_triple.c_str(), SyntaxForArchSpec (arch)))
m_disassembler = NULL;
llvm::Triple::ArchType llvm_arch = arch.GetTriple().getArch();
if (llvm_arch == llvm::Triple::arm)
{
if (EDGetDisassembler(&m_disassembler_thumb, "thumb-apple-darwin", kEDAssemblySyntaxARMUAL))
m_disassembler_thumb = NULL;
}
else if (llvm_arch == llvm::Triple::thumb)
{
m_disassembler_thumb = m_disassembler;
if (EDGetDisassembler(&m_disassembler, "arm-apple-darwin-unknown", kEDAssemblySyntaxARMUAL))
m_disassembler = NULL;
}
}
}
DisassemblerLLVM::~DisassemblerLLVM()
{
}
size_t
DisassemblerLLVM::DecodeInstructions
(
const Address &base_addr,
const DataExtractor& data,
uint32_t data_offset,
uint32_t num_instructions,
bool append
)
{
if (m_disassembler == NULL)
return 0;
size_t total_inst_byte_size = 0;
if (!append)
m_instruction_list.Clear();
while (data.ValidOffset(data_offset) && num_instructions)
{
Address inst_addr (base_addr);
inst_addr.Slide(data_offset);
bool use_thumb = false;
// If we have a thumb disassembler, then we have an ARM architecture
// so we need to check what the instruction address class is to make
// sure we shouldn't be disassembling as thumb...
if (m_disassembler_thumb)
{
if (inst_addr.GetAddressClass () == eAddressClassCodeAlternateISA)
use_thumb = true;
}
InstructionSP inst_sp (new InstructionLLVM (inst_addr,
use_thumb ? m_disassembler_thumb : m_disassembler));
size_t inst_byte_size = inst_sp->Extract (*this, data, data_offset);
if (inst_byte_size == 0)
break;
m_instruction_list.Append (inst_sp);
total_inst_byte_size += inst_byte_size;
data_offset += inst_byte_size;
num_instructions--;
}
return total_inst_byte_size;
}
void
DisassemblerLLVM::Initialize()
{
PluginManager::RegisterPlugin (GetPluginNameStatic(),
GetPluginDescriptionStatic(),
CreateInstance);
}
void
DisassemblerLLVM::Terminate()
{
PluginManager::UnregisterPlugin (CreateInstance);
}
const char *
DisassemblerLLVM::GetPluginNameStatic()
{
return "disassembler.llvm";
}
const char *
DisassemblerLLVM::GetPluginDescriptionStatic()
{
return "Disassembler that uses LLVM opcode tables to disassemble i386 and x86_64.";
}
//------------------------------------------------------------------
// PluginInterface protocol
//------------------------------------------------------------------
const char *
DisassemblerLLVM::GetPluginName()
{
return "DisassemblerLLVM";
}
const char *
DisassemblerLLVM::GetShortPluginName()
{
return GetPluginNameStatic();
}
uint32_t
DisassemblerLLVM::GetPluginVersion()
{
return 1;
}