/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "slicer/common.h" #include "slicer/code_ir.h" #include "slicer/dex_bytecode.h" #include "slicer/dex_format.h" #include "slicer/dex_ir.h" #include "slicer/dex_leb128.h" #include "slicer/bytecode_encoder.h" #include "slicer/debuginfo_encoder.h" #include "slicer/tryblocks_encoder.h" #include <assert.h> #include <string.h> #include <algorithm> #include <cstdlib> #include <type_traits> #include <vector> namespace lir { void CodeIr::Assemble() { auto ir_code = ir_method->code; SLICER_CHECK(ir_code != nullptr); // new .dex bytecode // // NOTE: this must be done before the debug information and // try/catch blocks since here is where we update the final offsets // BytecodeEncoder bytecode_encoder(instructions); bytecode_encoder.Encode(ir_code, dex_ir); // debug information if (ir_code->debug_info != nullptr) { DebugInfoEncoder dbginfo_encoder(instructions); dbginfo_encoder.Encode(ir_method, dex_ir); } // try/catch blocks TryBlocksEncoder try_blocks_encoder(instructions); try_blocks_encoder.Encode(ir_code, dex_ir); } void CodeIr::DissasembleTryBlocks(const ir::Code* ir_code) { int nextTryBlockId = 1; for (const auto& tryBlock : ir_code->try_blocks) { auto try_block_begin = Alloc<TryBlockBegin>(); try_block_begin->id = nextTryBlockId++; try_block_begin->offset = tryBlock.start_addr; auto try_block_end = Alloc<TryBlockEnd>(); try_block_end->try_begin = try_block_begin; try_block_end->offset = tryBlock.start_addr + tryBlock.insn_count; // parse the catch handlers const dex::u1* ptr = ir_code->catch_handlers.ptr<dex::u1>() + tryBlock.handler_off; int catchCount = dex::ReadSLeb128(&ptr); for (int catchIndex = 0; catchIndex < std::abs(catchCount); ++catchIndex) { CatchHandler handler = {}; // type dex::u4 type_index = dex::ReadULeb128(&ptr); handler.ir_type = dex_ir->types_map[type_index]; SLICER_CHECK(handler.ir_type != nullptr); // address dex::u4 address = dex::ReadULeb128(&ptr); handler.label = GetLabel(address); try_block_end->handlers.push_back(handler); } // catch_all handler? // // NOTE: this is used to generate code for the "finally" blocks // (see Java Virtual Machine Specification - 3.13 "Compiling finally") // if (catchCount < 1) { dex::u4 address = dex::ReadULeb128(&ptr); try_block_end->catch_all = GetLabel(address); } // we should have at least one handler SLICER_CHECK(!try_block_end->handlers.empty() || try_block_end->catch_all != nullptr); try_begins_.push_back(try_block_begin); try_ends_.push_back(try_block_end); } } void CodeIr::DissasembleDebugInfo(const ir::DebugInfo* ir_debug_info) { if (ir_debug_info == nullptr) { return; } // debug info state machine registers dex::u4 address = 0; int line = ir_debug_info->line_start; ir::String* source_file = ir_method->decl->parent->class_def->source_file; // header if (!ir_debug_info->param_names.empty()) { auto dbg_header = Alloc<DbgInfoHeader>(); dbg_header->param_names = ir_debug_info->param_names; dbg_header->offset = 0; dbg_annotations_.push_back(dbg_header); } // initial source file { auto annotation = Alloc<DbgInfoAnnotation>(dex::DBG_SET_FILE); annotation->offset = 0; annotation->operands.push_back(Alloc<String>( source_file, source_file ? source_file->orig_index : dex::kNoIndex)); dbg_annotations_.push_back(annotation); } // initial line number - redundant? { auto annotation = Alloc<DbgInfoAnnotation>(dex::DBG_ADVANCE_LINE); annotation->offset = 0; annotation->operands.push_back(Alloc<LineNumber>(line)); dbg_annotations_.push_back(annotation); } // debug info annotations const dex::u1* ptr = ir_debug_info->data.ptr<dex::u1>(); dex::u1 opcode = 0; while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) { DbgInfoAnnotation* annotation = nullptr; switch (opcode) { case dex::DBG_ADVANCE_PC: // addr_diff address += dex::ReadULeb128(&ptr); break; case dex::DBG_ADVANCE_LINE: // line_diff line += dex::ReadSLeb128(&ptr); SLICER_WEAK_CHECK(line > 0); break; case dex::DBG_START_LOCAL: { annotation = Alloc<DbgInfoAnnotation>(opcode); // register_num annotation->operands.push_back(Alloc<VReg>(dex::ReadULeb128(&ptr))); // name dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; annotation->operands.push_back(GetString(name_index)); // type dex::u4 type_index = dex::ReadULeb128(&ptr) - 1; annotation->operands.push_back(GetType(type_index)); } break; case dex::DBG_START_LOCAL_EXTENDED: { annotation = Alloc<DbgInfoAnnotation>(opcode); // register_num annotation->operands.push_back(Alloc<VReg>(dex::ReadULeb128(&ptr))); // name dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; annotation->operands.push_back(GetString(name_index)); // type dex::u4 type_index = dex::ReadULeb128(&ptr) - 1; annotation->operands.push_back(GetType(type_index)); // signature dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1; annotation->operands.push_back(GetString(sig_index)); } break; case dex::DBG_END_LOCAL: case dex::DBG_RESTART_LOCAL: annotation = Alloc<DbgInfoAnnotation>(opcode); // register_num annotation->operands.push_back(Alloc<VReg>(dex::ReadULeb128(&ptr))); break; case dex::DBG_SET_PROLOGUE_END: annotation = Alloc<DbgInfoAnnotation>(opcode); break; case dex::DBG_SET_EPILOGUE_BEGIN: annotation = Alloc<DbgInfoAnnotation>(opcode); break; case dex::DBG_SET_FILE: { annotation = Alloc<DbgInfoAnnotation>(opcode); // source file name dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; source_file = (name_index == dex::kNoIndex) ? nullptr : dex_ir->strings_map[name_index]; annotation->operands.push_back(Alloc<String>(source_file, name_index)); } break; default: { int adjusted_opcode = opcode - dex::DBG_FIRST_SPECIAL; line += dex::DBG_LINE_BASE + (adjusted_opcode % dex::DBG_LINE_RANGE); address += (adjusted_opcode / dex::DBG_LINE_RANGE); SLICER_WEAK_CHECK(line > 0); annotation = Alloc<DbgInfoAnnotation>(dex::DBG_ADVANCE_LINE); annotation->operands.push_back(Alloc<LineNumber>(line)); } break; } if (annotation != nullptr) { annotation->offset = address; dbg_annotations_.push_back(annotation); } } } void CodeIr::DissasembleBytecode(const ir::Code* ir_code) { const dex::u2* begin = ir_code->instructions.begin(); const dex::u2* end = ir_code->instructions.end(); const dex::u2* ptr = begin; while (ptr < end) { auto isize = dex::GetWidthFromBytecode(ptr); SLICER_CHECK(isize > 0); dex::u4 offset = ptr - begin; Instruction* instr = nullptr; switch (*ptr) { case dex::kPackedSwitchSignature: instr = DecodePackedSwitch(ptr, offset); break; case dex::kSparseSwitchSignature: instr = DecodeSparseSwitch(ptr, offset); break; case dex::kArrayDataSignature: instr = DecodeArrayData(ptr, offset); break; default: instr = DecodeBytecode(ptr, offset); break; } instr->offset = offset; instructions.push_back(instr); ptr += isize; } SLICER_CHECK(ptr == end); } void CodeIr::FixupSwitches() { const dex::u2* begin = ir_method->code->instructions.begin(); // packed switches for (auto& fixup : packed_switches_) { FixupPackedSwitch(fixup.second.instr, fixup.second.base_offset, begin + fixup.first); } // sparse switches for (auto& fixup : sparse_switches_) { FixupSparseSwitch(fixup.second.instr, fixup.second.base_offset, begin + fixup.first); } } // merge a set of extra instructions into the instruction list template <class I_LIST, class E_LIST> static void MergeInstructions(I_LIST& instructions, const E_LIST& extra) { // the extra instructins must be sorted by offset SLICER_CHECK(std::is_sorted(extra.begin(), extra.end(), [](const Instruction* a, const Instruction* b) { return a->offset < b->offset; })); auto instrIt = instructions.begin(); auto extraIt = extra.begin(); while (extraIt != extra.end()) { if (instrIt == instructions.end() || (*extraIt)->offset == (*instrIt)->offset) { instructions.insert(instrIt, *extraIt); ++extraIt; } else { ++instrIt; } } } void CodeIr::Dissasemble() { nodes_.clear(); labels_.clear(); try_begins_.clear(); try_ends_.clear(); dbg_annotations_.clear(); packed_switches_.clear(); sparse_switches_.clear(); auto ir_code = ir_method->code; if (ir_code == nullptr) { return; } // decode the .dex bytecodes DissasembleBytecode(ir_code); // try/catch blocks DissasembleTryBlocks(ir_code); // debug information DissasembleDebugInfo(ir_code->debug_info); // fixup switches FixupSwitches(); // assign label ids std::vector<Label*> tmp_labels; int nextLabelId = 1; for (auto& label : labels_) { label.second->id = nextLabelId++; tmp_labels.push_back(label.second); } // merge the labels into the instructions stream MergeInstructions(instructions, dbg_annotations_); MergeInstructions(instructions, try_begins_); MergeInstructions(instructions, tmp_labels); MergeInstructions(instructions, try_ends_); } PackedSwitchPayload* CodeIr::DecodePackedSwitch(const dex::u2* /*ptr*/, dex::u4 offset) { // actual decoding is delayed to FixupPackedSwitch() // (since the label offsets are relative to the referring // instruction, not the switch data) SLICER_CHECK(offset % 2 == 0); auto& instr = packed_switches_[offset].instr; SLICER_CHECK(instr == nullptr); instr = Alloc<PackedSwitchPayload>(); return instr; } void CodeIr::FixupPackedSwitch(PackedSwitchPayload* instr, dex::u4 base_offset, const dex::u2* ptr) { SLICER_CHECK(instr->targets.empty()); auto dex_packed_switch = reinterpret_cast<const dex::PackedSwitchPayload*>(ptr); SLICER_CHECK(dex_packed_switch->ident == dex::kPackedSwitchSignature); instr->first_key = dex_packed_switch->first_key; for (dex::u2 i = 0; i < dex_packed_switch->size; ++i) { instr->targets.push_back( GetLabel(base_offset + dex_packed_switch->targets[i])); } } SparseSwitchPayload* CodeIr::DecodeSparseSwitch(const dex::u2* /*ptr*/, dex::u4 offset) { // actual decoding is delayed to FixupSparseSwitch() // (since the label offsets are relative to the referring // instruction, not the switch data) SLICER_CHECK(offset % 2 == 0); auto& instr = sparse_switches_[offset].instr; SLICER_CHECK(instr == nullptr); instr = Alloc<SparseSwitchPayload>(); return instr; } void CodeIr::FixupSparseSwitch(SparseSwitchPayload* instr, dex::u4 base_offset, const dex::u2* ptr) { SLICER_CHECK(instr->switch_cases.empty()); auto dex_sparse_switch = reinterpret_cast<const dex::SparseSwitchPayload*>(ptr); SLICER_CHECK(dex_sparse_switch->ident == dex::kSparseSwitchSignature); auto& data = dex_sparse_switch->data; auto& size = dex_sparse_switch->size; for (dex::u2 i = 0; i < size; ++i) { SparseSwitchPayload::SwitchCase switch_case = {}; switch_case.key = data[i]; switch_case.target = GetLabel(base_offset + data[i + size]); instr->switch_cases.push_back(switch_case); } } ArrayData* CodeIr::DecodeArrayData(const dex::u2* ptr, dex::u4 offset) { auto dex_array_data = reinterpret_cast<const dex::ArrayData*>(ptr); SLICER_CHECK(dex_array_data->ident == dex::kArrayDataSignature); SLICER_CHECK(offset % 2 == 0); auto instr = Alloc<ArrayData>(); instr->data = slicer::MemView(ptr, dex::GetWidthFromBytecode(ptr) * 2); return instr; } Operand* CodeIr::GetRegA(const dex::Instruction& dex_instr) { auto verify_flags = dex::GetVerifyFlagsFromOpcode(dex_instr.opcode); if ((verify_flags & dex::kVerifyRegAWide) != 0) { return Alloc<VRegPair>(dex_instr.vA); } else { return Alloc<VReg>(dex_instr.vA); } } Operand* CodeIr::GetRegB(const dex::Instruction& dex_instr) { auto verify_flags = dex::GetVerifyFlagsFromOpcode(dex_instr.opcode); if ((verify_flags & dex::kVerifyRegBWide) != 0) { return Alloc<VRegPair>(dex_instr.vB); } else { return Alloc<VReg>(dex_instr.vB); } } Operand* CodeIr::GetRegC(const dex::Instruction& dex_instr) { auto verify_flags = dex::GetVerifyFlagsFromOpcode(dex_instr.opcode); if ((verify_flags & dex::kVerifyRegCWide) != 0) { return Alloc<VRegPair>(dex_instr.vC); } else { return Alloc<VReg>(dex_instr.vC); } } Bytecode* CodeIr::DecodeBytecode(const dex::u2* ptr, dex::u4 offset) { auto dex_instr = dex::DecodeInstruction(ptr); auto instr = Alloc<Bytecode>(); instr->opcode = dex_instr.opcode; auto index_type = dex::GetIndexTypeFromOpcode(dex_instr.opcode); switch (dex::GetFormatFromOpcode(dex_instr.opcode)) { case dex::k10x: // op break; case dex::k12x: // op vA, vB case dex::k22x: // op vAA, vBBBB case dex::k32x: // op vAAAA, vBBBB instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(GetRegB(dex_instr)); break; case dex::k11n: // op vA, #+B case dex::k21s: // op vAA, #+BBBB case dex::k31i: // op vAA, #+BBBBBBBB instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(Alloc<Const32>(dex_instr.vB)); break; case dex::k11x: // op vAA instr->operands.push_back(GetRegA(dex_instr)); break; case dex::k10t: // op +AA case dex::k20t: // op +AAAA case dex::k30t: // op +AAAAAAAA { auto label = GetLabel(offset + dex::s4(dex_instr.vA)); instr->operands.push_back(Alloc<CodeLocation>(label)); } break; case dex::k21t: // op vAA, +BBBB case dex::k31t: // op vAA, +BBBBBBBB { dex::u4 targetOffset = offset + dex::s4(dex_instr.vB); instr->operands.push_back(GetRegA(dex_instr)); auto label = GetLabel(targetOffset); instr->operands.push_back(Alloc<CodeLocation>(label)); if (dex_instr.opcode == dex::OP_PACKED_SWITCH) { label->aligned = true; dex::u4& base_offset = packed_switches_[targetOffset].base_offset; SLICER_CHECK(base_offset == kInvalidOffset); base_offset = offset; } else if (dex_instr.opcode == dex::OP_SPARSE_SWITCH) { label->aligned = true; dex::u4& base_offset = sparse_switches_[targetOffset].base_offset; SLICER_CHECK(base_offset == kInvalidOffset); base_offset = offset; } else if (dex_instr.opcode == dex::OP_FILL_ARRAY_DATA) { label->aligned = true; } } break; case dex::k23x: // op vAA, vBB, vCC instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(GetRegB(dex_instr)); instr->operands.push_back(GetRegC(dex_instr)); break; case dex::k22t: // op vA, vB, +CCCC { instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(GetRegB(dex_instr)); auto label = GetLabel(offset + dex::s4(dex_instr.vC)); instr->operands.push_back(Alloc<CodeLocation>(label)); } break; case dex::k22b: // op vAA, vBB, #+CC case dex::k22s: // op vA, vB, #+CCCC instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(GetRegB(dex_instr)); instr->operands.push_back(Alloc<Const32>(dex_instr.vC)); break; case dex::k22c: // op vA, vB, thing@CCCC instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(GetRegB(dex_instr)); instr->operands.push_back(GetIndexedOperand(index_type, dex_instr.vC)); break; case dex::k21c: // op vAA, thing@BBBB case dex::k31c: // op vAA, string@BBBBBBBB instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(GetIndexedOperand(index_type, dex_instr.vB)); break; case dex::k35c: // op {vC,vD,vE,vF,vG}, thing@BBBB { SLICER_CHECK(dex_instr.vA <= 5); auto vreg_list = Alloc<VRegList>(); for (dex::u4 i = 0; i < dex_instr.vA; ++i) { vreg_list->registers.push_back(dex_instr.arg[i]); } instr->operands.push_back(vreg_list); instr->operands.push_back(GetIndexedOperand(index_type, dex_instr.vB)); } break; case dex::k3rc: // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB { auto vreg_range = Alloc<VRegRange>(dex_instr.vC, dex_instr.vA); instr->operands.push_back(vreg_range); instr->operands.push_back(GetIndexedOperand(index_type, dex_instr.vB)); } break; case dex::k21h: // op vAA, #+BBBB0000[00000000] switch (dex_instr.opcode) { case dex::OP_CONST_HIGH16: instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(Alloc<Const32>(dex_instr.vB << 16)); break; case dex::OP_CONST_WIDE_HIGH16: instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(Alloc<Const64>(dex::u8(dex_instr.vB) << 48)); break; default: SLICER_FATAL("Unexpected opcode 0x%02x", dex_instr.opcode); } break; case dex::k51l: // op vAA, #+BBBBBBBBBBBBBBBB instr->operands.push_back(GetRegA(dex_instr)); instr->operands.push_back(Alloc<Const64>(dex_instr.vB_wide)); break; default: SLICER_FATAL("Unexpected bytecode format (opcode 0x%02x)", dex_instr.opcode); } return instr; } // Get a indexed object (string, field, ...) // (index must be valid != kNoIndex) IndexedOperand* CodeIr::GetIndexedOperand(dex::InstructionIndexType index_type, dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); switch (index_type) { case dex::kIndexStringRef: return Alloc<String>(dex_ir->strings_map[index], index); case dex::kIndexTypeRef: return Alloc<Type>(dex_ir->types_map[index], index); case dex::kIndexFieldRef: return Alloc<Field>(dex_ir->fields_map[index], index); case dex::kIndexMethodRef: return Alloc<Method>(dex_ir->methods_map[index], index); default: SLICER_FATAL("Unexpected index type 0x%02x", index_type); } } // Get a type based on its index (potentially kNoIndex) Type* CodeIr::GetType(dex::u4 index) { auto ir_type = (index == dex::kNoIndex) ? nullptr : dex_ir->types_map[index]; return Alloc<Type>(ir_type, index); } // Get a string based on its index (potentially kNoIndex) String* CodeIr::GetString(dex::u4 index) { auto ir_string = (index == dex::kNoIndex) ? nullptr : dex_ir->strings_map[index]; return Alloc<String>(ir_string, index); } // Get en existing, or new label for a particular offset Label* CodeIr::GetLabel(dex::u4 offset) { auto& p = labels_[offset]; if (p == nullptr) { p = Alloc<Label>(offset); } ++p->refCount; return p; } } // namespace lir