retdec项目源代码阅读部分记录
只涉及到x86架构的部分,像arm mips powerpc架构我目前用不上。
// 获得对应长度的整数类型
llvm::IntegerType* getIntegerTypeFromByteSize(llvm::Module* module, unsigned sz)
{
sz = sz ? 8*sz : module->getDataLayout().getPointerSizeInBits();
return llvm::Type::getIntNTy(module->getContext(), sz);
}
// llvm中的CreateNot可不是x86的not指令,not指令要xor -1来模拟
llvm::Value* generateValueNegate(llvm::IRBuilder<>& irb, llvm::Value* val)
{
return irb.CreateXor(val, llvm::ConstantInt::getSigned(val->getType(), -1));
}
llvm::Value* Capstone2LlvmIrTranslatorX86_impl::loadOp(...){
case X86_OP_REG:
{
//把llvm ir中的@op.reg(如@eax\@ebx)拿出来
auto* r = loadRegister(op.reg, irb);
return r ? r : llvm::UndefValue::get(ty ? ty : getDefaultType());
}
case X86_OP_IMM:
{
//整数有i1 i8 i16 i32,必须适配寄存器的长度
auto* t = getIntegerTypeFromByteSize(_module, op.size);
return llvm::ConstantInt::get(t, op.imm, false);
}
}
// 将操作数(op)存放到val中
// 比如mov ebx,1; op就是ebx,val是1
llvm::Instruction* Capstone2LlvmIrTranslatorX86_impl::storeOp(
cs_x86_op& op,
llvm::Value* val,
llvm::IRBuilder<>& irb,
eOpConv ct){
}
void Capstone2LlvmIrTranslatorX86_impl::translateAdd(cs_insn* i, cs_x86* xi, llvm::IRBuilder<>& irb)
{
EXPECT_IS_BINARY(i, xi, irb);
std::tie(op0, op1) = loadOpBinary(xi, irb, eOpConv::SEXT_TRUNC_OR_BITCAST);
auto* add = irb.CreateAdd(op0, op1);
// mov eax,1;add eax,1;之后eflags的处理抛异常,先注释了
//storeRegistersPlusSflags(irb, add, {
// {X86_REG_AF, generateCarryAddInt4(op0, op1, irb)},
// {X86_REG_CF, generateCarryAdd(add, op0, irb)},
// {X86_REG_OF, generateOverflowAdd(add, op0, op1, irb)}});
storeOp(xi->operands[0], add, irb);
if (i->id == X86_INS_XADD)
{
storeOp(xi->operands[1], op0, irb);
}
}
// 获得下一条指令的地址
llvm::Value* Capstone2LlvmIrTranslatorX86_impl::getCurrentPc(cs_insn* i)
{
return getNextInsnAddress(i);
}
std::vector<uint32_t> _reg2parentMap;
void Capstone2LlvmIrTranslatorX86_impl::initializeRegistersParentMap16()
{
// Last element in vector is its own parent.
std::vector<std::vector<x86_reg>> rss =
{
{X86_REG_AH, X86_REG_AL, X86_REG_AX},
{X86_REG_CH, X86_REG_CL, X86_REG_CX},
{X86_REG_DH, X86_REG_DL, X86_REG_DX},
{X86_REG_BH, X86_REG_BL, X86_REG_BX},
{X86_REG_SPL, X86_REG_SP},
{X86_REG_BPL, X86_REG_BP},
{X86_REG_SIL, X86_REG_SI},
{X86_REG_DIL, X86_REG_DI},
{X86_REG_IP},
};
for (std::vector<x86_reg>& rs : rss)
{
// 把vecotr当map用了
// 相当于_reg2parentMap[AH] = AX _reg2parentMap[AL]=AX
initializeRegistersParentMapToOther(rs, rs.back());
}
}
// 完整映射
{X86_REG_AH, X86_REG_AL, X86_REG_AX, X86_REG_EAX, X86_REG_RAX},
{X86_REG_CH, X86_REG_CL, X86_REG_CX, X86_REG_ECX, X86_REG_RCX},
{X86_REG_DH, X86_REG_DL, X86_REG_DX, X86_REG_EDX, X86_REG_RDX},
{X86_REG_BH, X86_REG_BL, X86_REG_BX, X86_REG_EBX, X86_REG_RBX},
{X86_REG_SPL, X86_REG_SP, X86_REG_ESP, X86_REG_RSP},
{X86_REG_BPL, X86_REG_BP, X86_REG_EBP, X86_REG_RBP},
{X86_REG_SIL, X86_REG_SI, X86_REG_ESI, X86_REG_RSI},
{X86_REG_DIL, X86_REG_DI, X86_REG_EDI, X86_REG_RDI},
{X86_REG_IP, X86_REG_EIP, X86_REG_RIP},
{X86_REG_EIZ, X86_REG_RIZ},
{X86_REG_R8B, X86_REG_R8W, X86_REG_R8D, X86_REG_R8},
{X86_REG_R9B, X86_REG_R9W, X86_REG_R9D, X86_REG_R9},
{X86_REG_R10B, X86_REG_R10W, X86_REG_R10D, X86_REG_R10},
{X86_REG_R11B, X86_REG_R11W, X86_REG_R11D, X86_REG_R11},
{X86_REG_R12B, X86_REG_R12W, X86_REG_R12D, X86_REG_R12},
{X86_REG_R13B, X86_REG_R13W, X86_REG_R13D, X86_REG_R13},
{X86_REG_R14B, X86_REG_R14W, X86_REG_R14D, X86_REG_R14},
{X86_REG_R15B, X86_REG_R15W, X86_REG_R15D, X86_REG_R15}
// 一般只用x86和x64的Layout就好了
void Capstone2LlvmIrTranslatorX86_impl::generateDataLayout()
{
switch (_origBasicMode)
{
case CS_MODE_16:
{
_module->setDataLayout("e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"); // clang -m16
break;
}
case CS_MODE_32:
{
_module->setDataLayout("e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"); // clang -m32
break;
}
case CS_MODE_64:
{
_module->setDataLayout("e-m:e-p:64:64-i64:64-f80:128-n8:16:32:64-S128"); // clang
break;
}
default:
{
throw GenericError("Unhandled mode in getStackPointerRegister().");
break;
}
}
}
template <typename CInsn, typename CInsnOp>
llvm::StoreInst* Capstone2LlvmIrTranslator_impl<CInsn, CInsnOp>::generateSpecialAsm2LlvmInstr(
llvm::IRBuilder<>& irb,
cs_insn* i)
{
retdec::common::Address a = i->address; // 当前指令地址
auto* gv = getAsm2LlvmMapGlobalVariable(); // _asm2llvmGv
auto* ci = llvm::ConstantInt::get(gv->getValueType(), a, false);
auto* s = irb.CreateStore(ci, gv, true); // 一开始学习的时候我会注释这行代码,减少eip对优化的干扰
return s;
}
const std::string entryPointName = "entry_point";
const std::string generatedImportPrefix = "imported_function_ord_";
const std::string generatedFunctionPrefix = "function_";
const std::string generatedFunctionPrefixIDA = "ida_";
const std::string generatedFunctionPrefixUnk = "unknown_";
const std::string generatedGlobalVarPrefix = "global_var_";
const std::string generatedStackVarPrefix = "stack_var_";
const std::string generatedTempVarPrefix = "v";
const std::string generatedBasicBlockPrefix = "dec_label_pc_";
const std::string generatedUndefFunctionPrefix = "__decompiler_undefined_function_";
const std::string generatedVtablePrefix = "vtable_";
const std::string asm2llvmGv = "_asm_program_counter";
const std::string pseudoCallFunction = "__pseudo_call";
const std::string pseudoReturnFunction = "__pseudo_return";
const std::string pseudoBranchFunction = "__pseudo_branch";
const std::string pseudoCondBranchFunction = "__pseudo_cond_branch";
const std::string pseudoX87dataLoadFunction = "__frontend_reg_load.fpr";
const std::string pseudoX87dataStoreFunction = "__frontend_reg_store.fpr";
using ModuleGlobalPair = std::pair<
const llvm::Module*,
llvm::GlobalVariable*>;
using ModuleInstructionMap = std::pair<
const llvm::Module*,
std::map<llvm::StoreInst*, cs_insn*>>;
// 这个好像就是存储每个module里面的pc寄存器的
std::vector<AsmInstruction::ModuleGlobalPair> AsmInstruction::_module2global;
//
static std::vector<ModuleInstructionMap> _module2instMap;
llvm::StoreInst* _llvmToAsmInstr = nullptr;