retdec项目源代码阅读部分记录

只涉及到x86架构的部分,像arm mips powerpc架构我目前用不上。

// 获得对应长度的整数类型
llvm::IntegerType* getIntegerTypeFromByteSize(llvm::Module* module, unsigned sz)
{
	sz = sz ? 8*sz : module->getDataLayout().getPointerSizeInBits();
	return llvm::Type::getIntNTy(module->getContext(), sz);
}
// llvm中的CreateNot可不是x86的not指令,not指令要xor -1来模拟
llvm::Value* generateValueNegate(llvm::IRBuilder<>& irb, llvm::Value* val)
{
	return irb.CreateXor(val, llvm::ConstantInt::getSigned(val->getType(), -1));
}
llvm::Value* Capstone2LlvmIrTranslatorX86_impl::loadOp(...){

case X86_OP_REG:
		{   
		    //把llvm ir中的@op.reg(如@eax\@ebx)拿出来
			auto* r = loadRegister(op.reg, irb);
			return r ? r : llvm::UndefValue::get(ty ? ty : getDefaultType());
		}

case X86_OP_IMM:
		{  
		    //整数有i1 i8 i16 i32,必须适配寄存器的长度
			auto* t = getIntegerTypeFromByteSize(_module, op.size);
			return llvm::ConstantInt::get(t, op.imm, false);
		}
}
// 将操作数(op)存放到val中
// 比如mov ebx,1; op就是ebx,val是1
llvm::Instruction* Capstone2LlvmIrTranslatorX86_impl::storeOp(
		cs_x86_op& op,
		llvm::Value* val,
		llvm::IRBuilder<>& irb,
		eOpConv ct){
		
		
		}
void Capstone2LlvmIrTranslatorX86_impl::translateAdd(cs_insn* i, cs_x86* xi, llvm::IRBuilder<>& irb)
{
	EXPECT_IS_BINARY(i, xi, irb);

	std::tie(op0, op1) = loadOpBinary(xi, irb, eOpConv::SEXT_TRUNC_OR_BITCAST);

	auto* add = irb.CreateAdd(op0, op1);

	// mov eax,1;add eax,1;之后eflags的处理抛异常,先注释了
	//storeRegistersPlusSflags(irb, add, {
	//		{X86_REG_AF, generateCarryAddInt4(op0, op1, irb)},
	//		{X86_REG_CF, generateCarryAdd(add, op0, irb)},
	//		{X86_REG_OF, generateOverflowAdd(add, op0, op1, irb)}});
	storeOp(xi->operands[0], add, irb);
	if (i->id == X86_INS_XADD)
	{
		storeOp(xi->operands[1], op0, irb);
	}
}
// 获得下一条指令的地址
llvm::Value* Capstone2LlvmIrTranslatorX86_impl::getCurrentPc(cs_insn* i)
{
	return getNextInsnAddress(i);
}
std::vector<uint32_t> _reg2parentMap;
void Capstone2LlvmIrTranslatorX86_impl::initializeRegistersParentMap16()
{
	// Last element in vector is its own parent.
	std::vector<std::vector<x86_reg>> rss =
	{
			{X86_REG_AH, X86_REG_AL, X86_REG_AX},
			{X86_REG_CH, X86_REG_CL, X86_REG_CX},
			{X86_REG_DH, X86_REG_DL, X86_REG_DX},
			{X86_REG_BH, X86_REG_BL, X86_REG_BX},
			{X86_REG_SPL, X86_REG_SP},
			{X86_REG_BPL, X86_REG_BP},
			{X86_REG_SIL, X86_REG_SI},
			{X86_REG_DIL, X86_REG_DI},
			{X86_REG_IP},
	};

	for (std::vector<x86_reg>& rs : rss)
	{
		// 把vecotr当map用了
		// 相当于_reg2parentMap[AH] = AX _reg2parentMap[AL]=AX
		initializeRegistersParentMapToOther(rs, rs.back());
	}
}

			// 完整映射
			{X86_REG_AH, X86_REG_AL, X86_REG_AX, X86_REG_EAX, X86_REG_RAX},
			{X86_REG_CH, X86_REG_CL, X86_REG_CX, X86_REG_ECX, X86_REG_RCX},
			{X86_REG_DH, X86_REG_DL, X86_REG_DX, X86_REG_EDX, X86_REG_RDX},
			{X86_REG_BH, X86_REG_BL, X86_REG_BX, X86_REG_EBX, X86_REG_RBX},
			{X86_REG_SPL, X86_REG_SP, X86_REG_ESP, X86_REG_RSP},
			{X86_REG_BPL, X86_REG_BP, X86_REG_EBP, X86_REG_RBP},
			{X86_REG_SIL, X86_REG_SI, X86_REG_ESI, X86_REG_RSI},
			{X86_REG_DIL, X86_REG_DI, X86_REG_EDI, X86_REG_RDI},
			{X86_REG_IP, X86_REG_EIP, X86_REG_RIP},
			{X86_REG_EIZ, X86_REG_RIZ},
			{X86_REG_R8B, X86_REG_R8W, X86_REG_R8D, X86_REG_R8},
			{X86_REG_R9B, X86_REG_R9W, X86_REG_R9D, X86_REG_R9},
			{X86_REG_R10B, X86_REG_R10W, X86_REG_R10D, X86_REG_R10},
			{X86_REG_R11B, X86_REG_R11W, X86_REG_R11D, X86_REG_R11},
			{X86_REG_R12B, X86_REG_R12W, X86_REG_R12D, X86_REG_R12},
			{X86_REG_R13B, X86_REG_R13W, X86_REG_R13D, X86_REG_R13},
			{X86_REG_R14B, X86_REG_R14W, X86_REG_R14D, X86_REG_R14},
			{X86_REG_R15B, X86_REG_R15W, X86_REG_R15D, X86_REG_R15}
// 一般只用x86和x64的Layout就好了
void Capstone2LlvmIrTranslatorX86_impl::generateDataLayout()
{
	switch (_origBasicMode)
	{
		case CS_MODE_16:
		{
			_module->setDataLayout("e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"); // clang -m16
			break;
		}
		case CS_MODE_32:
		{
			_module->setDataLayout("e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"); // clang -m32
			break;
		}
		case CS_MODE_64:
		{
			_module->setDataLayout("e-m:e-p:64:64-i64:64-f80:128-n8:16:32:64-S128"); // clang
			break;
		}
		default:
		{
			throw GenericError("Unhandled mode in getStackPointerRegister().");
			break;
		}
	}
}
template <typename CInsn, typename CInsnOp>
llvm::StoreInst* Capstone2LlvmIrTranslator_impl<CInsn, CInsnOp>::generateSpecialAsm2LlvmInstr(
		llvm::IRBuilder<>& irb,
		cs_insn* i)
{
	retdec::common::Address a = i->address; // 当前指令地址
	auto* gv = getAsm2LlvmMapGlobalVariable(); // _asm2llvmGv
	auto* ci = llvm::ConstantInt::get(gv->getValueType(), a, false);
	auto* s = irb.CreateStore(ci, gv, true); // 一开始学习的时候我会注释这行代码,减少eip对优化的干扰
	return s;
}
const std::string entryPointName               = "entry_point";
const std::string generatedImportPrefix        = "imported_function_ord_";
const std::string generatedFunctionPrefix      = "function_";
const std::string generatedFunctionPrefixIDA   = "ida_";
const std::string generatedFunctionPrefixUnk   = "unknown_";
const std::string generatedGlobalVarPrefix     = "global_var_";
const std::string generatedStackVarPrefix      = "stack_var_";
const std::string generatedTempVarPrefix       = "v";
const std::string generatedBasicBlockPrefix    = "dec_label_pc_";
const std::string generatedUndefFunctionPrefix = "__decompiler_undefined_function_";
const std::string generatedVtablePrefix        = "vtable_";
const std::string asm2llvmGv                   = "_asm_program_counter";
const std::string pseudoCallFunction           = "__pseudo_call";
const std::string pseudoReturnFunction         = "__pseudo_return";
const std::string pseudoBranchFunction         = "__pseudo_branch";
const std::string pseudoCondBranchFunction     = "__pseudo_cond_branch";
const std::string pseudoX87dataLoadFunction    = "__frontend_reg_load.fpr";
const std::string pseudoX87dataStoreFunction   = "__frontend_reg_store.fpr";
		using ModuleGlobalPair = std::pair<
				const llvm::Module*,
				llvm::GlobalVariable*>;
		using ModuleInstructionMap = std::pair<
				const llvm::Module*,
				std::map<llvm::StoreInst*, cs_insn*>>;
// 这个好像就是存储每个module里面的pc寄存器的
std::vector<AsmInstruction::ModuleGlobalPair> AsmInstruction::_module2global;
//
static std::vector<ModuleInstructionMap> _module2instMap;

llvm::StoreInst* _llvmToAsmInstr = nullptr;