/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "slicer/reader.h" #include "slicer/dex_bytecode.h" #include "slicer/chronometer.h" #include "slicer/dex_leb128.h" #include <assert.h> #include <string.h> #include <type_traits> #include <cstdlib> namespace dex { Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) { // init the header reference header_ = ptr<dex::Header>(0); ValidateHeader(); // start with an "empty" .dex IR dex_ir_ = std::make_shared<ir::DexFile>(); dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic)); } slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const { return section<dex::ClassDef>(header_->class_defs_off, header_->class_defs_size); } slicer::ArrayView<const dex::StringId> Reader::StringIds() const { return section<dex::StringId>(header_->string_ids_off, header_->string_ids_size); } slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const { return section<dex::TypeId>(header_->type_ids_off, header_->type_ids_size); } slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const { return section<dex::FieldId>(header_->field_ids_off, header_->field_ids_size); } slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const { return section<dex::MethodId>(header_->method_ids_off, header_->method_ids_size); } slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const { return section<dex::ProtoId>(header_->proto_ids_off, header_->proto_ids_size); } const dex::MapList* Reader::DexMapList() const { return dataPtr<dex::MapList>(header_->map_off); } const char* Reader::GetStringMUTF8(dex::u4 index) const { if (index == dex::kNoIndex) { return "<no_string>"; } const dex::u1* strData = GetStringData(index); dex::ReadULeb128(&strData); return reinterpret_cast<const char*>(strData); } void Reader::CreateFullIr() { size_t classCount = ClassDefs().size(); for (size_t i = 0; i < classCount; ++i) { CreateClassIr(i); } } void Reader::CreateClassIr(dex::u4 index) { auto ir_class = GetClass(index); SLICER_CHECK(ir_class != nullptr); } // Returns the index of the class with the specified // descriptor, or kNoIndex if not found dex::u4 Reader::FindClassIndex(const char* class_descriptor) const { auto classes = ClassDefs(); auto types = TypeIds(); for (dex::u4 i = 0; i < classes.size(); ++i) { auto typeId = types[classes[i].class_idx]; const char* descriptor = GetStringMUTF8(typeId.descriptor_idx); if (strcmp(class_descriptor, descriptor) == 0) { return i; } } return dex::kNoIndex; } // map a .dex index to corresponding .dex IR node // // NOTES: // 1. the mapping beween an index and the indexed // .dex IR nodes is 1:1 // 2. we do a single index lookup for both existing // nodes as well as new nodes // 3. dummy is an invalid, but non-null pointer value // used to check that the mapping loookup/update is atomic // 4. there should be no recursion with the same index // (we use the dummy value to guard against this too) // ir::Class* Reader::GetClass(dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); auto& p = dex_ir_->classes_map[index]; auto dummy = reinterpret_cast<ir::Class*>(1); if (p == nullptr) { p = dummy; auto newClass = ParseClass(index); SLICER_CHECK(p == dummy); p = newClass; dex_ir_->classes_indexes.MarkUsedIndex(index); } SLICER_CHECK(p != dummy); return p; } // map a .dex index to corresponding .dex IR node // (see the Reader::GetClass() comments) ir::Type* Reader::GetType(dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); auto& p = dex_ir_->types_map[index]; auto dummy = reinterpret_cast<ir::Type*>(1); if (p == nullptr) { p = dummy; auto newType = ParseType(index); SLICER_CHECK(p == dummy); p = newType; dex_ir_->types_indexes.MarkUsedIndex(index); } SLICER_CHECK(p != dummy); return p; } // map a .dex index to corresponding .dex IR node // (see the Reader::GetClass() comments) ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); auto& p = dex_ir_->fields_map[index]; auto dummy = reinterpret_cast<ir::FieldDecl*>(1); if (p == nullptr) { p = dummy; auto newField = ParseFieldDecl(index); SLICER_CHECK(p == dummy); p = newField; dex_ir_->fields_indexes.MarkUsedIndex(index); } SLICER_CHECK(p != dummy); return p; } // map a .dex index to corresponding .dex IR node // (see the Reader::GetClass() comments) ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); auto& p = dex_ir_->methods_map[index]; auto dummy = reinterpret_cast<ir::MethodDecl*>(1); if (p == nullptr) { p = dummy; auto newMethod = ParseMethodDecl(index); SLICER_CHECK(p == dummy); p = newMethod; dex_ir_->methods_indexes.MarkUsedIndex(index); } SLICER_CHECK(p != dummy); return p; } // map a .dex index to corresponding .dex IR node // (see the Reader::GetClass() comments) ir::Proto* Reader::GetProto(dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); auto& p = dex_ir_->protos_map[index]; auto dummy = reinterpret_cast<ir::Proto*>(1); if (p == nullptr) { p = dummy; auto newProto = ParseProto(index); SLICER_CHECK(p == dummy); p = newProto; dex_ir_->protos_indexes.MarkUsedIndex(index); } SLICER_CHECK(p != dummy); return p; } // map a .dex index to corresponding .dex IR node // (see the Reader::GetClass() comments) ir::String* Reader::GetString(dex::u4 index) { SLICER_CHECK(index != dex::kNoIndex); auto& p = dex_ir_->strings_map[index]; auto dummy = reinterpret_cast<ir::String*>(1); if (p == nullptr) { p = dummy; auto newString = ParseString(index); SLICER_CHECK(p == dummy); p = newString; dex_ir_->strings_indexes.MarkUsedIndex(index); } SLICER_CHECK(p != dummy); return p; } ir::Class* Reader::ParseClass(dex::u4 index) { auto& dex_class_def = ClassDefs()[index]; auto ir_class = dex_ir_->Alloc<ir::Class>(); ir_class->type = GetType(dex_class_def.class_idx); assert(ir_class->type->class_def == nullptr); ir_class->type->class_def = ir_class; ir_class->access_flags = dex_class_def.access_flags; ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off); if (dex_class_def.superclass_idx != dex::kNoIndex) { ir_class->super_class = GetType(dex_class_def.superclass_idx); } if (dex_class_def.source_file_idx != dex::kNoIndex) { ir_class->source_file = GetString(dex_class_def.source_file_idx); } if (dex_class_def.class_data_off != 0) { const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off); dex::u4 static_fields_count = dex::ReadULeb128(&class_data); dex::u4 instance_fields_count = dex::ReadULeb128(&class_data); dex::u4 direct_methods_count = dex::ReadULeb128(&class_data); dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data); dex::u4 base_index = dex::kNoIndex; for (dex::u4 i = 0; i < static_fields_count; ++i) { auto field = ParseEncodedField(&class_data, &base_index); ir_class->static_fields.push_back(field); } base_index = dex::kNoIndex; for (dex::u4 i = 0; i < instance_fields_count; ++i) { auto field = ParseEncodedField(&class_data, &base_index); ir_class->instance_fields.push_back(field); } base_index = dex::kNoIndex; for (dex::u4 i = 0; i < direct_methods_count; ++i) { auto method = ParseEncodedMethod(&class_data, &base_index); ir_class->direct_methods.push_back(method); } base_index = dex::kNoIndex; for (dex::u4 i = 0; i < virtual_methods_count; ++i) { auto method = ParseEncodedMethod(&class_data, &base_index); ir_class->virtual_methods.push_back(method); } } ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off); ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off); ir_class->orig_index = index; return ir_class; } ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) { if (offset == 0) { return nullptr; } SLICER_CHECK(offset % 4 == 0); // first check if we already extracted the same "annotations_directory_item" auto& ir_annotations = annotations_directories_[offset]; if (ir_annotations == nullptr) { ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>(); auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset); ir_annotations->class_annotation = ExtractAnnotationSet(dex_annotations->class_annotations_off); const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1); for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) { ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr)); } for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) { ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr)); } for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) { ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr)); } } return ir_annotations; } ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) { SLICER_CHECK(offset != 0); // first check if we already extracted the same "annotation_item" auto& ir_annotation = annotations_[offset]; if (ir_annotation == nullptr) { auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset); const dex::u1* ptr = dexAnnotationItem->annotation; ir_annotation = ParseAnnotation(&ptr); ir_annotation->visibility = dexAnnotationItem->visibility; } return ir_annotation; } ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) { if (offset == 0) { return nullptr; } SLICER_CHECK(offset % 4 == 0); // first check if we already extracted the same "annotation_set_item" auto& ir_annotation_set = annotation_sets_[offset]; if (ir_annotation_set == nullptr) { ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>(); auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset); for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) { auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]); assert(ir_annotation != nullptr); ir_annotation_set->annotations.push_back(ir_annotation); } } return ir_annotation_set; } ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) { SLICER_CHECK(offset % 4 == 0); auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset); auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>(); for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) { dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off; if (entry_offset != 0) { auto ir_annotation_set = ExtractAnnotationSet(entry_offset); SLICER_CHECK(ir_annotation_set != nullptr); ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set); } } return ir_annotation_set_ref_list; } ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) { auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr); auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>(); ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx); ir_field_annotation->annotations = ExtractAnnotationSet(dex_field_annotation->annotations_off); SLICER_CHECK(ir_field_annotation->annotations != nullptr); *pptr += sizeof(dex::FieldAnnotationsItem); return ir_field_annotation; } ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) { auto dex_method_annotation = reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr); auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>(); ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx); ir_method_annotation->annotations = ExtractAnnotationSet(dex_method_annotation->annotations_off); SLICER_CHECK(ir_method_annotation->annotations != nullptr); *pptr += sizeof(dex::MethodAnnotationsItem); return ir_method_annotation; } ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) { auto dex_param_annotation = reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr); auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>(); ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx); ir_param_annotation->annotations = ExtractAnnotationSetRefList(dex_param_annotation->annotations_off); SLICER_CHECK(ir_param_annotation->annotations != nullptr); *pptr += sizeof(dex::ParameterAnnotationsItem); return ir_param_annotation; } ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) { auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>(); auto field_index = dex::ReadULeb128(pptr); SLICER_CHECK(field_index != dex::kNoIndex); if (*base_index != dex::kNoIndex) { SLICER_CHECK(field_index != 0); field_index += *base_index; } *base_index = field_index; ir_encoded_field->decl = GetFieldDecl(field_index); ir_encoded_field->access_flags = dex::ReadULeb128(pptr); return ir_encoded_field; } // Parse an encoded variable-length integer value // (sign-extend signed types, zero-extend unsigned types) template <class T> static T ParseIntValue(const dex::u1** pptr, size_t size) { static_assert(std::is_integral<T>::value, "must be an integral type"); SLICER_CHECK(size > 0); SLICER_CHECK(size <= sizeof(T)); T value = 0; for (int i = 0; i < size; ++i) { value |= T(*(*pptr)++) << (i * 8); } // sign-extend? if (std::is_signed<T>::value) { size_t shift = (sizeof(T) - size) * 8; value = T(value << shift) >> shift; } return value; } // Parse an encoded variable-length floating point value // (zero-extend to the right) template <class T> static T ParseFloatValue(const dex::u1** pptr, size_t size) { SLICER_CHECK(size > 0); SLICER_CHECK(size <= sizeof(T)); T value = 0; int start_byte = sizeof(T) - size; for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0; --size) { *p++ = *(*pptr)++; } return value; } ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) { auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>(); SLICER_EXTRA(auto base_ptr = *pptr); dex::u1 header = *(*pptr)++; dex::u1 type = header & dex::kEncodedValueTypeMask; dex::u1 arg = header >> dex::kEncodedValueArgShift; ir_encoded_value->type = type; switch (type) { case dex::kEncodedByte: ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1); break; case dex::kEncodedShort: ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1); break; case dex::kEncodedChar: ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1); break; case dex::kEncodedInt: ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1); break; case dex::kEncodedLong: ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1); break; case dex::kEncodedFloat: ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1); break; case dex::kEncodedDouble: ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1); break; case dex::kEncodedString: { dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); ir_encoded_value->u.string_value = GetString(index); } break; case dex::kEncodedType: { dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); ir_encoded_value->u.type_value = GetType(index); } break; case dex::kEncodedField: { dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); ir_encoded_value->u.field_value = GetFieldDecl(index); } break; case dex::kEncodedMethod: { dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); ir_encoded_value->u.method_value = GetMethodDecl(index); } break; case dex::kEncodedEnum: { dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); ir_encoded_value->u.enum_value = GetFieldDecl(index); } break; case dex::kEncodedArray: SLICER_CHECK(arg == 0); ir_encoded_value->u.array_value = ParseEncodedArray(pptr); break; case dex::kEncodedAnnotation: SLICER_CHECK(arg == 0); ir_encoded_value->u.annotation_value = ParseAnnotation(pptr); break; case dex::kEncodedNull: SLICER_CHECK(arg == 0); break; case dex::kEncodedBoolean: SLICER_CHECK(arg < 2); ir_encoded_value->u.bool_value = (arg == 1); break; default: SLICER_CHECK(!"unexpected value type"); } SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr)); return ir_encoded_value; } ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) { auto ir_annotation = dex_ir_->Alloc<ir::Annotation>(); dex::u4 type_index = dex::ReadULeb128(pptr); dex::u4 elements_count = dex::ReadULeb128(pptr); ir_annotation->type = GetType(type_index); ir_annotation->visibility = dex::kVisibilityEncoded; for (dex::u4 i = 0; i < elements_count; ++i) { auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>(); ir_element->name = GetString(dex::ReadULeb128(pptr)); ir_element->value = ParseEncodedValue(pptr); ir_annotation->elements.push_back(ir_element); } return ir_annotation; } ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) { auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>(); dex::u4 count = dex::ReadULeb128(pptr); for (dex::u4 i = 0; i < count; ++i) { ir_encoded_array->values.push_back(ParseEncodedValue(pptr)); } return ir_encoded_array; } ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) { if (offset == 0) { return nullptr; } // first check if we already extracted the same "annotation_item" auto& ir_encoded_array = encoded_arrays_[offset]; if (ir_encoded_array == nullptr) { auto ptr = dataPtr<dex::u1>(offset); ir_encoded_array = ParseEncodedArray(&ptr); } return ir_encoded_array; } ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) { if (offset == 0) { return nullptr; } auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>(); const dex::u1* ptr = dataPtr<dex::u1>(offset); ir_debug_info->line_start = dex::ReadULeb128(&ptr); // TODO: implicit this param for non-static methods? dex::u4 param_count = dex::ReadULeb128(&ptr); for (dex::u4 i = 0; i < param_count; ++i) { dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; auto ir_string = (name_index == dex::kNoIndex) ? nullptr : GetString(name_index); ir_debug_info->param_names.push_back(ir_string); } // parse the debug info opcodes and note the // references to strings and types (to make sure the IR // is the full closure of all referenced items) // // TODO: design a generic debug info iterator? // auto base_ptr = ptr; dex::u1 opcode = 0; while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) { switch (opcode) { case dex::DBG_ADVANCE_PC: // addr_diff dex::ReadULeb128(&ptr); break; case dex::DBG_ADVANCE_LINE: // line_diff dex::ReadSLeb128(&ptr); break; case dex::DBG_START_LOCAL: { // register_num dex::ReadULeb128(&ptr); dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; if (name_index != dex::kNoIndex) { GetString(name_index); } dex::u4 type_index = dex::ReadULeb128(&ptr) - 1; if (type_index != dex::kNoIndex) { GetType(type_index); } } break; case dex::DBG_START_LOCAL_EXTENDED: { // register_num dex::ReadULeb128(&ptr); dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; if (name_index != dex::kNoIndex) { GetString(name_index); } dex::u4 type_index = dex::ReadULeb128(&ptr) - 1; if (type_index != dex::kNoIndex) { GetType(type_index); } dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1; if (sig_index != dex::kNoIndex) { GetString(sig_index); } } break; case dex::DBG_END_LOCAL: case dex::DBG_RESTART_LOCAL: // register_num dex::ReadULeb128(&ptr); break; case dex::DBG_SET_FILE: { dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; if (name_index != dex::kNoIndex) { GetString(name_index); } } break; } } ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr); return ir_debug_info; } ir::Code* Reader::ExtractCode(dex::u4 offset) { if (offset == 0) { return nullptr; } SLICER_CHECK(offset % 4 == 0); auto dex_code = dataPtr<dex::Code>(offset); auto ir_code = dex_ir_->Alloc<ir::Code>(); ir_code->registers = dex_code->registers_size; ir_code->ins_count = dex_code->ins_size; ir_code->outs_count = dex_code->outs_size; // instructions array ir_code->instructions = slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size); // parse the instructions to discover references to other // IR nodes (see debug info stream parsing too) ParseInstructions(ir_code->instructions); // try blocks & handlers // // TODO: a generic try/catch blocks iterator? // if (dex_code->tries_size != 0) { dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2; auto tries = reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count); auto handlers_list = reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size); ir_code->try_blocks = slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size); // parse the handlers list (and discover embedded references) auto ptr = handlers_list; dex::u4 handlers_count = dex::ReadULeb128(&ptr); SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size); for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) { int catch_count = dex::ReadSLeb128(&ptr); for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) { dex::u4 type_index = dex::ReadULeb128(&ptr); GetType(type_index); // address dex::ReadULeb128(&ptr); } if (catch_count < 1) { // catch_all_addr dex::ReadULeb128(&ptr); } } ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list); } ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off); return ir_code; } ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) { auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>(); auto method_index = dex::ReadULeb128(pptr); SLICER_CHECK(method_index != dex::kNoIndex); if (*base_index != dex::kNoIndex) { SLICER_CHECK(method_index != 0); method_index += *base_index; } *base_index = method_index; ir_encoded_method->decl = GetMethodDecl(method_index); ir_encoded_method->access_flags = dex::ReadULeb128(pptr); dex::u4 code_offset = dex::ReadULeb128(pptr); ir_encoded_method->code = ExtractCode(code_offset); // update the methods lookup table dex_ir_->methods_lookup.Insert(ir_encoded_method); return ir_encoded_method; } ir::Type* Reader::ParseType(dex::u4 index) { auto& dex_type = TypeIds()[index]; auto ir_type = dex_ir_->Alloc<ir::Type>(); ir_type->descriptor = GetString(dex_type.descriptor_idx); ir_type->orig_index = index; return ir_type; } ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) { auto& dex_field = FieldIds()[index]; auto ir_field = dex_ir_->Alloc<ir::FieldDecl>(); ir_field->name = GetString(dex_field.name_idx); ir_field->type = GetType(dex_field.type_idx); ir_field->parent = GetType(dex_field.class_idx); ir_field->orig_index = index; return ir_field; } ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) { auto& dex_method = MethodIds()[index]; auto ir_method = dex_ir_->Alloc<ir::MethodDecl>(); ir_method->name = GetString(dex_method.name_idx); ir_method->prototype = GetProto(dex_method.proto_idx); ir_method->parent = GetType(dex_method.class_idx); ir_method->orig_index = index; return ir_method; } ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) { if (offset == 0) { return nullptr; } // first check to see if we already extracted the same "type_list" auto& ir_type_list = type_lists_[offset]; if (ir_type_list == nullptr) { ir_type_list = dex_ir_->Alloc<ir::TypeList>(); auto dex_type_list = dataPtr<dex::TypeList>(offset); SLICER_WEAK_CHECK(dex_type_list->size > 0); for (dex::u4 i = 0; i < dex_type_list->size; ++i) { ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx)); } } return ir_type_list; } ir::Proto* Reader::ParseProto(dex::u4 index) { auto& dex_proto = ProtoIds()[index]; auto ir_proto = dex_ir_->Alloc<ir::Proto>(); ir_proto->shorty = GetString(dex_proto.shorty_idx); ir_proto->return_type = GetType(dex_proto.return_type_idx); ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off); ir_proto->orig_index = index; // update the prototypes lookup table dex_ir_->prototypes_lookup.Insert(ir_proto); return ir_proto; } ir::String* Reader::ParseString(dex::u4 index) { auto ir_string = dex_ir_->Alloc<ir::String>(); auto data = GetStringData(index); auto cstr = data; dex::ReadULeb128(&cstr); size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1; ir_string->data = slicer::MemView(data, size); ir_string->orig_index = index; // update the strings lookup table dex_ir_->strings_lookup.Insert(ir_string); return ir_string; } void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) { const dex::u2* ptr = code.begin(); while (ptr < code.end()) { auto dex_instr = dex::DecodeInstruction(ptr); dex::u4 index = dex::kNoIndex; switch (dex::GetFormatFromOpcode(dex_instr.opcode)) { case dex::k20bc: case dex::k21c: case dex::k31c: case dex::k35c: case dex::k3rc: index = dex_instr.vB; break; case dex::k22c: index = dex_instr.vC; break; default: break; } switch (GetIndexTypeFromOpcode(dex_instr.opcode)) { case dex::kIndexStringRef: GetString(index); break; case dex::kIndexTypeRef: GetType(index); break; case dex::kIndexFieldRef: GetFieldDecl(index); break; case dex::kIndexMethodRef: GetMethodDecl(index); break; default: break; } auto isize = dex::GetWidthFromBytecode(ptr); SLICER_CHECK(isize > 0); ptr += isize; } SLICER_CHECK(ptr == code.end()); } // Basic .dex header structural checks void Reader::ValidateHeader() { SLICER_CHECK(size_ > sizeof(dex::Header)); // Known issue: For performance reasons the initial size_ passed to jvmti events might be an // estimate. b/72402467 SLICER_CHECK(header_->file_size <= size_); SLICER_CHECK(header_->header_size == sizeof(dex::Header)); SLICER_CHECK(header_->endian_tag == dex::kEndianConstant); SLICER_CHECK(header_->data_size % 4 == 0); // Known issue: The fields might be slighly corrupted b/65452964 // SLICER_CHECK(header_->data_off + header_->data_size <= size_); SLICER_CHECK(header_->string_ids_off % 4 == 0); SLICER_CHECK(header_->type_ids_size < 65536); SLICER_CHECK(header_->type_ids_off % 4 == 0); SLICER_CHECK(header_->proto_ids_size < 65536); SLICER_CHECK(header_->proto_ids_off % 4 == 0); SLICER_CHECK(header_->field_ids_off % 4 == 0); SLICER_CHECK(header_->method_ids_off % 4 == 0); SLICER_CHECK(header_->class_defs_off % 4 == 0); SLICER_CHECK(header_->map_off >= header_->data_off && header_->map_off < size_); SLICER_CHECK(header_->link_size == 0); SLICER_CHECK(header_->link_off == 0); SLICER_CHECK(header_->data_off % 4 == 0); SLICER_CHECK(header_->map_off % 4 == 0); // we seem to have .dex files with extra bytes at the end ... // Known issue: For performance reasons the initial size_ passed to jvmti events might be an // estimate. b/72402467 SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_); // but we should still have the whole data section // Known issue: The fields might be slighly corrupted b/65452964 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an // estimate. b/72402467 // SLICER_CHECK(header_->data_off + header_->data_size <= size_); // validate the map // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size]) auto map_list = ptr<dex::MapList>(header_->map_off); SLICER_CHECK(map_list->size > 0); auto map_section_size = sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size; SLICER_CHECK(header_->map_off + map_section_size <= size_); } } // namespace dex