普通文本  |  979行  |  29.42 KB

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "slicer/reader.h"
#include "slicer/dex_bytecode.h"
#include "slicer/chronometer.h"
#include "slicer/dex_leb128.h"

#include <assert.h>
#include <string.h>
#include <type_traits>
#include <cstdlib>

namespace dex {

Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) {
  // init the header reference
  header_ = ptr<dex::Header>(0);
  ValidateHeader();

  // start with an "empty" .dex IR
  dex_ir_ = std::make_shared<ir::DexFile>();
  dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic));
}

slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const {
  return section<dex::ClassDef>(header_->class_defs_off,
                                header_->class_defs_size);
}

slicer::ArrayView<const dex::StringId> Reader::StringIds() const {
  return section<dex::StringId>(header_->string_ids_off,
                                header_->string_ids_size);
}

slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const {
  return section<dex::TypeId>(header_->type_ids_off,
                              header_->type_ids_size);
}

slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const {
  return section<dex::FieldId>(header_->field_ids_off,
                               header_->field_ids_size);
}

slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const {
  return section<dex::MethodId>(header_->method_ids_off,
                                header_->method_ids_size);
}

slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const {
  return section<dex::ProtoId>(header_->proto_ids_off,
                               header_->proto_ids_size);
}

const dex::MapList* Reader::DexMapList() const {
  return dataPtr<dex::MapList>(header_->map_off);
}

const char* Reader::GetStringMUTF8(dex::u4 index) const {
  if (index == dex::kNoIndex) {
    return "<no_string>";
  }
  const dex::u1* strData = GetStringData(index);
  dex::ReadULeb128(&strData);
  return reinterpret_cast<const char*>(strData);
}

void Reader::CreateFullIr() {
  size_t classCount = ClassDefs().size();
  for (size_t i = 0; i < classCount; ++i) {
    CreateClassIr(i);
  }
}

void Reader::CreateClassIr(dex::u4 index) {
  auto ir_class = GetClass(index);
  SLICER_CHECK(ir_class != nullptr);
}

// Returns the index of the class with the specified
// descriptor, or kNoIndex if not found
dex::u4 Reader::FindClassIndex(const char* class_descriptor) const {
  auto classes = ClassDefs();
  auto types = TypeIds();
  for (dex::u4 i = 0; i < classes.size(); ++i) {
    auto typeId = types[classes[i].class_idx];
    const char* descriptor = GetStringMUTF8(typeId.descriptor_idx);
    if (strcmp(class_descriptor, descriptor) == 0) {
      return i;
    }
  }
  return dex::kNoIndex;
}

// map a .dex index to corresponding .dex IR node
//
// NOTES:
//  1. the mapping beween an index and the indexed
//     .dex IR nodes is 1:1
//  2. we do a single index lookup for both existing
//     nodes as well as new nodes
//  3. dummy is an invalid, but non-null pointer value
//     used to check that the mapping loookup/update is atomic
//  4. there should be no recursion with the same index
//     (we use the dummy value to guard against this too)
//
ir::Class* Reader::GetClass(dex::u4 index) {
  SLICER_CHECK(index != dex::kNoIndex);
  auto& p = dex_ir_->classes_map[index];
  auto dummy = reinterpret_cast<ir::Class*>(1);
  if (p == nullptr) {
    p = dummy;
    auto newClass = ParseClass(index);
    SLICER_CHECK(p == dummy);
    p = newClass;
    dex_ir_->classes_indexes.MarkUsedIndex(index);
  }
  SLICER_CHECK(p != dummy);
  return p;
}

// map a .dex index to corresponding .dex IR node
// (see the Reader::GetClass() comments)
ir::Type* Reader::GetType(dex::u4 index) {
  SLICER_CHECK(index != dex::kNoIndex);
  auto& p = dex_ir_->types_map[index];
  auto dummy = reinterpret_cast<ir::Type*>(1);
  if (p == nullptr) {
    p = dummy;
    auto newType = ParseType(index);
    SLICER_CHECK(p == dummy);
    p = newType;
    dex_ir_->types_indexes.MarkUsedIndex(index);
  }
  SLICER_CHECK(p != dummy);
  return p;
}

// map a .dex index to corresponding .dex IR node
// (see the Reader::GetClass() comments)
ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) {
  SLICER_CHECK(index != dex::kNoIndex);
  auto& p = dex_ir_->fields_map[index];
  auto dummy = reinterpret_cast<ir::FieldDecl*>(1);
  if (p == nullptr) {
    p = dummy;
    auto newField = ParseFieldDecl(index);
    SLICER_CHECK(p == dummy);
    p = newField;
    dex_ir_->fields_indexes.MarkUsedIndex(index);
  }
  SLICER_CHECK(p != dummy);
  return p;
}

// map a .dex index to corresponding .dex IR node
// (see the Reader::GetClass() comments)
ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) {
  SLICER_CHECK(index != dex::kNoIndex);
  auto& p = dex_ir_->methods_map[index];
  auto dummy = reinterpret_cast<ir::MethodDecl*>(1);
  if (p == nullptr) {
    p = dummy;
    auto newMethod = ParseMethodDecl(index);
    SLICER_CHECK(p == dummy);
    p = newMethod;
    dex_ir_->methods_indexes.MarkUsedIndex(index);
  }
  SLICER_CHECK(p != dummy);
  return p;
}

// map a .dex index to corresponding .dex IR node
// (see the Reader::GetClass() comments)
ir::Proto* Reader::GetProto(dex::u4 index) {
  SLICER_CHECK(index != dex::kNoIndex);
  auto& p = dex_ir_->protos_map[index];
  auto dummy = reinterpret_cast<ir::Proto*>(1);
  if (p == nullptr) {
    p = dummy;
    auto newProto = ParseProto(index);
    SLICER_CHECK(p == dummy);
    p = newProto;
    dex_ir_->protos_indexes.MarkUsedIndex(index);
  }
  SLICER_CHECK(p != dummy);
  return p;
}

// map a .dex index to corresponding .dex IR node
// (see the Reader::GetClass() comments)
ir::String* Reader::GetString(dex::u4 index) {
  SLICER_CHECK(index != dex::kNoIndex);
  auto& p = dex_ir_->strings_map[index];
  auto dummy = reinterpret_cast<ir::String*>(1);
  if (p == nullptr) {
    p = dummy;
    auto newString = ParseString(index);
    SLICER_CHECK(p == dummy);
    p = newString;
    dex_ir_->strings_indexes.MarkUsedIndex(index);
  }
  SLICER_CHECK(p != dummy);
  return p;
}

ir::Class* Reader::ParseClass(dex::u4 index) {
  auto& dex_class_def = ClassDefs()[index];
  auto ir_class = dex_ir_->Alloc<ir::Class>();

  ir_class->type = GetType(dex_class_def.class_idx);
  assert(ir_class->type->class_def == nullptr);
  ir_class->type->class_def = ir_class;

  ir_class->access_flags = dex_class_def.access_flags;
  ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off);

  if (dex_class_def.superclass_idx != dex::kNoIndex) {
    ir_class->super_class = GetType(dex_class_def.superclass_idx);
  }

  if (dex_class_def.source_file_idx != dex::kNoIndex) {
    ir_class->source_file = GetString(dex_class_def.source_file_idx);
  }

  if (dex_class_def.class_data_off != 0) {
    const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off);

    dex::u4 static_fields_count = dex::ReadULeb128(&class_data);
    dex::u4 instance_fields_count = dex::ReadULeb128(&class_data);
    dex::u4 direct_methods_count = dex::ReadULeb128(&class_data);
    dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data);

    dex::u4 base_index = dex::kNoIndex;
    for (dex::u4 i = 0; i < static_fields_count; ++i) {
      auto field = ParseEncodedField(&class_data, &base_index);
      ir_class->static_fields.push_back(field);
    }

    base_index = dex::kNoIndex;
    for (dex::u4 i = 0; i < instance_fields_count; ++i) {
      auto field = ParseEncodedField(&class_data, &base_index);
      ir_class->instance_fields.push_back(field);
    }

    base_index = dex::kNoIndex;
    for (dex::u4 i = 0; i < direct_methods_count; ++i) {
      auto method = ParseEncodedMethod(&class_data, &base_index);
      ir_class->direct_methods.push_back(method);
    }

    base_index = dex::kNoIndex;
    for (dex::u4 i = 0; i < virtual_methods_count; ++i) {
      auto method = ParseEncodedMethod(&class_data, &base_index);
      ir_class->virtual_methods.push_back(method);
    }
  }

  ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off);
  ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off);
  ir_class->orig_index = index;

  return ir_class;
}

ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) {
  if (offset == 0) {
    return nullptr;
  }

  SLICER_CHECK(offset % 4 == 0);

  // first check if we already extracted the same "annotations_directory_item"
  auto& ir_annotations = annotations_directories_[offset];
  if (ir_annotations == nullptr) {
    ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>();

    auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset);

    ir_annotations->class_annotation =
        ExtractAnnotationSet(dex_annotations->class_annotations_off);

    const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1);

    for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) {
      ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr));
    }

    for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) {
      ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr));
    }

    for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) {
      ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr));
    }
  }
  return ir_annotations;
}

ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) {
  SLICER_CHECK(offset != 0);

  // first check if we already extracted the same "annotation_item"
  auto& ir_annotation = annotations_[offset];
  if (ir_annotation == nullptr) {
    auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset);
    const dex::u1* ptr = dexAnnotationItem->annotation;
    ir_annotation = ParseAnnotation(&ptr);
    ir_annotation->visibility = dexAnnotationItem->visibility;
  }
  return ir_annotation;
}

ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) {
  if (offset == 0) {
    return nullptr;
  }

  SLICER_CHECK(offset % 4 == 0);

  // first check if we already extracted the same "annotation_set_item"
  auto& ir_annotation_set = annotation_sets_[offset];
  if (ir_annotation_set == nullptr) {
    ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>();

    auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset);
    for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) {
      auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]);
      assert(ir_annotation != nullptr);
      ir_annotation_set->annotations.push_back(ir_annotation);
    }
  }
  return ir_annotation_set;
}

ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) {
  SLICER_CHECK(offset % 4 == 0);

  auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset);
  auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>();

  for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) {
    dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off;
    if (entry_offset != 0) {
      auto ir_annotation_set = ExtractAnnotationSet(entry_offset);
      SLICER_CHECK(ir_annotation_set != nullptr);
      ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set);
    }
  }

  return ir_annotation_set_ref_list;
}

ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) {
  auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr);
  auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>();

  ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx);

  ir_field_annotation->annotations =
      ExtractAnnotationSet(dex_field_annotation->annotations_off);
  SLICER_CHECK(ir_field_annotation->annotations != nullptr);

  *pptr += sizeof(dex::FieldAnnotationsItem);
  return ir_field_annotation;
}

ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) {
  auto dex_method_annotation =
      reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr);
  auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>();

  ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx);

  ir_method_annotation->annotations =
      ExtractAnnotationSet(dex_method_annotation->annotations_off);
  SLICER_CHECK(ir_method_annotation->annotations != nullptr);

  *pptr += sizeof(dex::MethodAnnotationsItem);
  return ir_method_annotation;
}

ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) {
  auto dex_param_annotation =
      reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr);
  auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>();

  ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx);

  ir_param_annotation->annotations =
      ExtractAnnotationSetRefList(dex_param_annotation->annotations_off);
  SLICER_CHECK(ir_param_annotation->annotations != nullptr);

  *pptr += sizeof(dex::ParameterAnnotationsItem);
  return ir_param_annotation;
}

ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) {
  auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>();

  auto field_index = dex::ReadULeb128(pptr);
  SLICER_CHECK(field_index != dex::kNoIndex);
  if (*base_index != dex::kNoIndex) {
    SLICER_CHECK(field_index != 0);
    field_index += *base_index;
  }
  *base_index = field_index;

  ir_encoded_field->decl = GetFieldDecl(field_index);
  ir_encoded_field->access_flags = dex::ReadULeb128(pptr);

  return ir_encoded_field;
}

// Parse an encoded variable-length integer value
// (sign-extend signed types, zero-extend unsigned types)
template <class T>
static T ParseIntValue(const dex::u1** pptr, size_t size) {
  static_assert(std::is_integral<T>::value, "must be an integral type");

  SLICER_CHECK(size > 0);
  SLICER_CHECK(size <= sizeof(T));

  T value = 0;
  for (int i = 0; i < size; ++i) {
    value |= T(*(*pptr)++) << (i * 8);
  }

  // sign-extend?
  if (std::is_signed<T>::value) {
    size_t shift = (sizeof(T) - size) * 8;
    value = T(value << shift) >> shift;
  }

  return value;
}

// Parse an encoded variable-length floating point value
// (zero-extend to the right)
template <class T>
static T ParseFloatValue(const dex::u1** pptr, size_t size) {
  SLICER_CHECK(size > 0);
  SLICER_CHECK(size <= sizeof(T));

  T value = 0;
  int start_byte = sizeof(T) - size;
  for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0;
       --size) {
    *p++ = *(*pptr)++;
  }
  return value;
}

ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) {
  auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>();

  SLICER_EXTRA(auto base_ptr = *pptr);

  dex::u1 header = *(*pptr)++;
  dex::u1 type = header & dex::kEncodedValueTypeMask;
  dex::u1 arg = header >> dex::kEncodedValueArgShift;

  ir_encoded_value->type = type;

  switch (type) {
    case dex::kEncodedByte:
      ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1);
      break;

    case dex::kEncodedShort:
      ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1);
      break;

    case dex::kEncodedChar:
      ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1);
      break;

    case dex::kEncodedInt:
      ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1);
      break;

    case dex::kEncodedLong:
      ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1);
      break;

    case dex::kEncodedFloat:
      ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1);
      break;

    case dex::kEncodedDouble:
      ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1);
      break;

    case dex::kEncodedString: {
      dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
      ir_encoded_value->u.string_value = GetString(index);
    } break;

    case dex::kEncodedType: {
      dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
      ir_encoded_value->u.type_value = GetType(index);
    } break;

    case dex::kEncodedField: {
      dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
      ir_encoded_value->u.field_value = GetFieldDecl(index);
    } break;

    case dex::kEncodedMethod: {
      dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
      ir_encoded_value->u.method_value = GetMethodDecl(index);
    } break;

    case dex::kEncodedEnum: {
      dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
      ir_encoded_value->u.enum_value = GetFieldDecl(index);
    } break;

    case dex::kEncodedArray:
      SLICER_CHECK(arg == 0);
      ir_encoded_value->u.array_value = ParseEncodedArray(pptr);
      break;

    case dex::kEncodedAnnotation:
      SLICER_CHECK(arg == 0);
      ir_encoded_value->u.annotation_value = ParseAnnotation(pptr);
      break;

    case dex::kEncodedNull:
      SLICER_CHECK(arg == 0);
      break;

    case dex::kEncodedBoolean:
      SLICER_CHECK(arg < 2);
      ir_encoded_value->u.bool_value = (arg == 1);
      break;

    default:
      SLICER_CHECK(!"unexpected value type");
  }

  SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr));

  return ir_encoded_value;
}

ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) {
  auto ir_annotation = dex_ir_->Alloc<ir::Annotation>();

  dex::u4 type_index = dex::ReadULeb128(pptr);
  dex::u4 elements_count = dex::ReadULeb128(pptr);

  ir_annotation->type = GetType(type_index);
  ir_annotation->visibility = dex::kVisibilityEncoded;

  for (dex::u4 i = 0; i < elements_count; ++i) {
    auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>();

    ir_element->name = GetString(dex::ReadULeb128(pptr));
    ir_element->value = ParseEncodedValue(pptr);

    ir_annotation->elements.push_back(ir_element);
  }

  return ir_annotation;
}

ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) {
  auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>();

  dex::u4 count = dex::ReadULeb128(pptr);
  for (dex::u4 i = 0; i < count; ++i) {
    ir_encoded_array->values.push_back(ParseEncodedValue(pptr));
  }

  return ir_encoded_array;
}

ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) {
  if (offset == 0) {
    return nullptr;
  }

  // first check if we already extracted the same "annotation_item"
  auto& ir_encoded_array = encoded_arrays_[offset];
  if (ir_encoded_array == nullptr) {
    auto ptr = dataPtr<dex::u1>(offset);
    ir_encoded_array = ParseEncodedArray(&ptr);
  }
  return ir_encoded_array;
}

ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) {
  if (offset == 0) {
    return nullptr;
  }

  auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>();
  const dex::u1* ptr = dataPtr<dex::u1>(offset);

  ir_debug_info->line_start = dex::ReadULeb128(&ptr);

  // TODO: implicit this param for non-static methods?
  dex::u4 param_count = dex::ReadULeb128(&ptr);
  for (dex::u4 i = 0; i < param_count; ++i) {
    dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
    auto ir_string =
        (name_index == dex::kNoIndex) ? nullptr : GetString(name_index);
    ir_debug_info->param_names.push_back(ir_string);
  }

  // parse the debug info opcodes and note the
  // references to strings and types (to make sure the IR
  // is the full closure of all referenced items)
  //
  // TODO: design a generic debug info iterator?
  //
  auto base_ptr = ptr;
  dex::u1 opcode = 0;
  while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) {
    switch (opcode) {
      case dex::DBG_ADVANCE_PC:
        // addr_diff
        dex::ReadULeb128(&ptr);
        break;

      case dex::DBG_ADVANCE_LINE:
        // line_diff
        dex::ReadSLeb128(&ptr);
        break;

      case dex::DBG_START_LOCAL: {
        // register_num
        dex::ReadULeb128(&ptr);

        dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
        if (name_index != dex::kNoIndex) {
          GetString(name_index);
        }

        dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
        if (type_index != dex::kNoIndex) {
          GetType(type_index);
        }
      } break;

      case dex::DBG_START_LOCAL_EXTENDED: {
        // register_num
        dex::ReadULeb128(&ptr);

        dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
        if (name_index != dex::kNoIndex) {
          GetString(name_index);
        }

        dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
        if (type_index != dex::kNoIndex) {
          GetType(type_index);
        }

        dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1;
        if (sig_index != dex::kNoIndex) {
          GetString(sig_index);
        }
      } break;

      case dex::DBG_END_LOCAL:
      case dex::DBG_RESTART_LOCAL:
        // register_num
        dex::ReadULeb128(&ptr);
        break;

      case dex::DBG_SET_FILE: {
        dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
        if (name_index != dex::kNoIndex) {
          GetString(name_index);
        }
      } break;
    }
  }

  ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr);

  return ir_debug_info;
}

ir::Code* Reader::ExtractCode(dex::u4 offset) {
  if (offset == 0) {
    return nullptr;
  }

  SLICER_CHECK(offset % 4 == 0);

  auto dex_code = dataPtr<dex::Code>(offset);
  auto ir_code = dex_ir_->Alloc<ir::Code>();

  ir_code->registers = dex_code->registers_size;
  ir_code->ins_count = dex_code->ins_size;
  ir_code->outs_count = dex_code->outs_size;

  // instructions array
  ir_code->instructions =
      slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size);

  // parse the instructions to discover references to other
  // IR nodes (see debug info stream parsing too)
  ParseInstructions(ir_code->instructions);

  // try blocks & handlers
  //
  // TODO: a generic try/catch blocks iterator?
  //
  if (dex_code->tries_size != 0) {
    dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2;
    auto tries =
        reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count);
    auto handlers_list =
        reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size);

    ir_code->try_blocks =
        slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size);

    // parse the handlers list (and discover embedded references)
    auto ptr = handlers_list;

    dex::u4 handlers_count = dex::ReadULeb128(&ptr);
    SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size);

    for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) {
      int catch_count = dex::ReadSLeb128(&ptr);

      for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) {
        dex::u4 type_index = dex::ReadULeb128(&ptr);
        GetType(type_index);

        // address
        dex::ReadULeb128(&ptr);
      }

      if (catch_count < 1) {
        // catch_all_addr
        dex::ReadULeb128(&ptr);
      }
    }

    ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list);
  }

  ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off);

  return ir_code;
}

ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) {
  auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>();

  auto method_index = dex::ReadULeb128(pptr);
  SLICER_CHECK(method_index != dex::kNoIndex);
  if (*base_index != dex::kNoIndex) {
    SLICER_CHECK(method_index != 0);
    method_index += *base_index;
  }
  *base_index = method_index;

  ir_encoded_method->decl = GetMethodDecl(method_index);
  ir_encoded_method->access_flags = dex::ReadULeb128(pptr);

  dex::u4 code_offset = dex::ReadULeb128(pptr);
  ir_encoded_method->code = ExtractCode(code_offset);

  // update the methods lookup table
  dex_ir_->methods_lookup.Insert(ir_encoded_method);

  return ir_encoded_method;
}

ir::Type* Reader::ParseType(dex::u4 index) {
  auto& dex_type = TypeIds()[index];
  auto ir_type = dex_ir_->Alloc<ir::Type>();

  ir_type->descriptor = GetString(dex_type.descriptor_idx);
  ir_type->orig_index = index;

  return ir_type;
}

ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) {
  auto& dex_field = FieldIds()[index];
  auto ir_field = dex_ir_->Alloc<ir::FieldDecl>();

  ir_field->name = GetString(dex_field.name_idx);
  ir_field->type = GetType(dex_field.type_idx);
  ir_field->parent = GetType(dex_field.class_idx);
  ir_field->orig_index = index;

  return ir_field;
}

ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) {
  auto& dex_method = MethodIds()[index];
  auto ir_method = dex_ir_->Alloc<ir::MethodDecl>();

  ir_method->name = GetString(dex_method.name_idx);
  ir_method->prototype = GetProto(dex_method.proto_idx);
  ir_method->parent = GetType(dex_method.class_idx);
  ir_method->orig_index = index;

  return ir_method;
}

ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) {
  if (offset == 0) {
    return nullptr;
  }

  // first check to see if we already extracted the same "type_list"
  auto& ir_type_list = type_lists_[offset];
  if (ir_type_list == nullptr) {
    ir_type_list = dex_ir_->Alloc<ir::TypeList>();

    auto dex_type_list = dataPtr<dex::TypeList>(offset);
    SLICER_WEAK_CHECK(dex_type_list->size > 0);

    for (dex::u4 i = 0; i < dex_type_list->size; ++i) {
      ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx));
    }
  }

  return ir_type_list;
}

ir::Proto* Reader::ParseProto(dex::u4 index) {
  auto& dex_proto = ProtoIds()[index];
  auto ir_proto = dex_ir_->Alloc<ir::Proto>();

  ir_proto->shorty = GetString(dex_proto.shorty_idx);
  ir_proto->return_type = GetType(dex_proto.return_type_idx);
  ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off);
  ir_proto->orig_index = index;

  // update the prototypes lookup table
  dex_ir_->prototypes_lookup.Insert(ir_proto);

  return ir_proto;
}

ir::String* Reader::ParseString(dex::u4 index) {
  auto ir_string = dex_ir_->Alloc<ir::String>();

  auto data = GetStringData(index);
  auto cstr = data;
  dex::ReadULeb128(&cstr);
  size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1;

  ir_string->data = slicer::MemView(data, size);
  ir_string->orig_index = index;

  // update the strings lookup table
  dex_ir_->strings_lookup.Insert(ir_string);

  return ir_string;
}

void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) {
  const dex::u2* ptr = code.begin();
  while (ptr < code.end()) {
    auto dex_instr = dex::DecodeInstruction(ptr);

    dex::u4 index = dex::kNoIndex;
    switch (dex::GetFormatFromOpcode(dex_instr.opcode)) {
      case dex::k20bc:
      case dex::k21c:
      case dex::k31c:
      case dex::k35c:
      case dex::k3rc:
        index = dex_instr.vB;
        break;

      case dex::k22c:
        index = dex_instr.vC;
        break;

      default:
        break;
    }

    switch (GetIndexTypeFromOpcode(dex_instr.opcode)) {
      case dex::kIndexStringRef:
        GetString(index);
        break;

      case dex::kIndexTypeRef:
        GetType(index);
        break;

      case dex::kIndexFieldRef:
        GetFieldDecl(index);
        break;

      case dex::kIndexMethodRef:
        GetMethodDecl(index);
        break;

      default:
        break;
    }

    auto isize = dex::GetWidthFromBytecode(ptr);
    SLICER_CHECK(isize > 0);
    ptr += isize;
  }
  SLICER_CHECK(ptr == code.end());
}

// Basic .dex header structural checks
void Reader::ValidateHeader() {
  SLICER_CHECK(size_ > sizeof(dex::Header));

  // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
  // estimate. b/72402467
  SLICER_CHECK(header_->file_size <= size_);
  SLICER_CHECK(header_->header_size == sizeof(dex::Header));
  SLICER_CHECK(header_->endian_tag == dex::kEndianConstant);
  SLICER_CHECK(header_->data_size % 4 == 0);

  // Known issue: The fields might be slighly corrupted b/65452964
  // SLICER_CHECK(header_->data_off + header_->data_size <= size_);

  SLICER_CHECK(header_->string_ids_off % 4 == 0);
  SLICER_CHECK(header_->type_ids_size < 65536);
  SLICER_CHECK(header_->type_ids_off % 4 == 0);
  SLICER_CHECK(header_->proto_ids_size < 65536);
  SLICER_CHECK(header_->proto_ids_off % 4 == 0);
  SLICER_CHECK(header_->field_ids_off % 4 == 0);
  SLICER_CHECK(header_->method_ids_off % 4 == 0);
  SLICER_CHECK(header_->class_defs_off % 4 == 0);
  SLICER_CHECK(header_->map_off >= header_->data_off && header_->map_off < size_);
  SLICER_CHECK(header_->link_size == 0);
  SLICER_CHECK(header_->link_off == 0);
  SLICER_CHECK(header_->data_off % 4 == 0);
  SLICER_CHECK(header_->map_off % 4 == 0);

  // we seem to have .dex files with extra bytes at the end ...
  // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
  // estimate. b/72402467
  SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_);

  // but we should still have the whole data section

  // Known issue: The fields might be slighly corrupted b/65452964
  // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
  // estimate. b/72402467
  // SLICER_CHECK(header_->data_off + header_->data_size <= size_);

  // validate the map
  // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size])
  auto map_list = ptr<dex::MapList>(header_->map_off);
  SLICER_CHECK(map_list->size > 0);
  auto map_section_size =
      sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size;
  SLICER_CHECK(header_->map_off + map_section_size <= size_);
}

}  // namespace dex