Android ART 虚拟机 - dex 文件格式要旨(下)

class_def_item、class_data_item

// art11/libdexfile/dex/dex_file_structs.h
namespace art {
namespace dex {

// Raw class_def_item.
struct ClassDef {
  dex::TypeIndex class_idx_;  // index into type_ids_ array for this class
  uint16_t pad1_;  // padding = 0
  uint32_t access_flags_;
  dex::TypeIndex superclass_idx_;  // index into type_ids_ array for superclass
  uint16_t pad2_;  // padding = 0
  uint32_t interfaces_off_;  // file offset to TypeList
  dex::StringIndex source_file_idx_;  // index into string_ids_ for source file name
  uint32_t annotations_off_;  // file offset to annotations_directory_item
  uint32_t class_data_off_;  // file offset to class_data_item
  uint32_t static_values_off_;  // file offset to EncodedArray

  // Returns the valid access flags, that is, Java modifier bits relevant to the ClassDef type
  // (class or interface). These are all in the lower 16b and do not contain runtime flags.
  uint32_t GetJavaAccessFlags() const {
    // Make sure that none of our runtime-only flags are set.
    static_assert((kAccValidClassFlags & kAccJavaFlagsMask) == kAccValidClassFlags,
                  "Valid class flags not a subset of Java flags");
    static_assert((kAccValidInterfaceFlags & kAccJavaFlagsMask) == kAccValidInterfaceFlags,
                  "Valid interface flags not a subset of Java flags");

    if ((access_flags_ & kAccInterface) != 0) {
      // Interface.
      return access_flags_ & kAccValidInterfaceFlags;
    } else {
      // Class.
      return access_flags_ & kAccValidClassFlags;
    }
  }

};

}   // namespace dex
}   // namespace art
  • dex::TypeIndex 两个字节,所以其后跟着一个 2 bytes 的 padding。这样才是 4 字节对齐的
  • class_data_item 在代码里没有直接对应的定义。通过 DexFile 的 GetClassData 来获取:
// art11/libdexfile/dex/dex_file.h
class DexFile {
 public:
  // Returns a pointer to the raw memory mapped class_data_item
  const uint8_t* GetClassData(const dex::ClassDef& class_def) const {
    return DataPointer<uint8_t>(class_def.class_data_off_);
  }

  template <typename T>
  const T* DataPointer(size_t offset) const {
    DCHECK_LT(offset, DataSize()) << "Offset past end of data section";
    return (offset != 0u) ? reinterpret_cast<const T*>(DataBegin() + offset) : nullptr;
  }
}
  • encoded_array_item 在代码里也没有相应的定义。跟 class_data_item 类似,获取它的方法为:
// art11/libdexfile/dex/dex_file.h
class DexFile {
 public:
  const uint8_t* GetEncodedStaticFieldValuesArray(const dex::ClassDef& class_def) const {
    return DataPointer<uint8_t>(class_def.static_values_off_);
  }
}

由于篇幅和精力关系,接下来我们只详细看 class_data_itemencoded_array_item 不再讲述。

class_data_item

class_data_item {
    uleb128         static_fields_size;
    uleb128         instance_fields_size;
    uleb128         direct_methods_size;
    uleb128         virtual_methods_size;
    encoded_field   static_fields[static_fields_size];
    encoded_field   instance_fields[instance_fields_size];
    encoded_method  direct_methods[direct_methods_size];
    encoded_method  virtual_methods[virtual_methods_size];
}

encoded_field {
    uleb128 field_idx_diff;
    uleb128 access_flags;
}

encoded_method {
    uleb128 method_idx_diff;
    uleb128 access_flags;
    uleb128 code_off;   // 0 if abstract or native, point to code_item if present
}
  • xxx_idx_diff 除第一个外,后续的 index 的取值为 前一个 + diff

dex::ClassAccesor

虽然代码里没有 class_data_item 对应的定义,但提供了 dex::ClassAccesor 给用户遍历 methods/fields:

// art11/libdexfile/dex/class_accessor-inl.h
inline ClassAccessor::ClassAccessor(const DexFile& dex_file,
                                    const uint8_t* class_data,
                                    uint32_t class_def_index,
                                    bool parse_hiddenapi_class_data)
    : dex_file_(dex_file),
      class_def_index_(class_def_index),
      ptr_pos_(class_data),
      hiddenapi_ptr_pos_(nullptr),
      num_static_fields_(ptr_pos_ != nullptr ? DecodeUnsignedLeb128(&ptr_pos_) : 0u),
      num_instance_fields_(ptr_pos_ != nullptr ? DecodeUnsignedLeb128(&ptr_pos_) : 0u),
      num_direct_methods_(ptr_pos_ != nullptr ? DecodeUnsignedLeb128(&ptr_pos_) : 0u),
      num_virtual_methods_(ptr_pos_ != nullptr ? DecodeUnsignedLeb128(&ptr_pos_) : 0u) {
  if (parse_hiddenapi_class_data && class_def_index != DexFile::kDexNoIndex32) {
    const dex::HiddenapiClassData* hiddenapi_class_data = dex_file.GetHiddenapiClassData();
    if (hiddenapi_class_data != nullptr) {
      hiddenapi_ptr_pos_ = hiddenapi_class_data->GetFlagsPointer(class_def_index);
    }
  }
}
  • DecodeUnsignedLeb128 会递增传递进去的指针,所以如前所述,class_data 的前面 4 个字段分别是 static_fields_sizeinstance_fields_sizedirect_methods_sizevirtual_methods_size

为了对类成员进行遍历,使用 VisitFieldsAndMethods 函数:

// art11/libdexfile/dex/class_accessor-inl.h
template <typename StaticFieldVisitor,
          typename InstanceFieldVisitor,
          typename DirectMethodVisitor,
          typename VirtualMethodVisitor>
inline void ClassAccessor::VisitFieldsAndMethods(
    const StaticFieldVisitor& static_field_visitor,
    const InstanceFieldVisitor& instance_field_visitor,
    const DirectMethodVisitor& direct_method_visitor,
    const VirtualMethodVisitor& virtual_method_visitor) const {
  Field field(dex_file_, ptr_pos_, hiddenapi_ptr_pos_);
  VisitMembers(num_static_fields_, static_field_visitor, &field);
  field.NextSection();
  VisitMembers(num_instance_fields_, instance_field_visitor, &field);

  Method method(dex_file_, field.ptr_pos_, field.hiddenapi_ptr_pos_, /*is_static_or_direct*/ true);
  VisitMembers(num_direct_methods_, direct_method_visitor, &method);
  method.NextSection();
  VisitMembers(num_virtual_methods_, virtual_method_visitor, &method);
}
  • Field、Method 是 ClassVisitor 的两个内部类:
// art11/libdexfile/dex/class_accessor-inl.h
// Classes to access Dex data.
class ClassAccessor {
 public:
  class BaseItem {
    // ...
   protected:
    // Internal data pointer for reading.
    const DexFile& dex_file_;
    const uint8_t* ptr_pos_ = nullptr;
    const uint8_t* hiddenapi_ptr_pos_ = nullptr;
    uint32_t index_ = 0u;
    uint32_t access_flags_ = 0u;
    uint32_t hiddenapi_flags_ = 0u;
  };

  class Method : public BaseItem {
    // ...
   private:
    bool is_static_or_direct_ = true;
    uint32_t code_off_ = 0u;
  }

  class Field : public BaseItem {
    // ...
   private:
    bool is_static_ = true;
  }
}
  • VisitMembers 调用 Filed 或 Method 类的 Read 方法读取一个 Field/Method,然后把自己传递给 visitor:
// art11/libdexfile/dex/class_accessor-inl.h
template <typename DataType, typename Visitor>
inline void ClassAccessor::VisitMembers(size_t count,
                                        const Visitor& visitor,
                                        DataType* data) const {
  for ( ; count != 0; --count) {
    data->Read();
    visitor(*data);
  }
}
  • 前面我们说 DecodeUnsignedLeb128 会递增传入的指针的值,所以在本函数里创建 field 变量的时候,ptr_pos_ 就指向 static_fields 的第一个字节。跟着,VisitMembers 调用 Field 的 Read 方法:
// art11/libdexfile/dex/class_accessor-inl.h
inline void ClassAccessor::Field::Read() {
  index_ += DecodeUnsignedLeb128(&ptr_pos_);
  access_flags_ = DecodeUnsignedLeb128(&ptr_pos_);
  if (hiddenapi_ptr_pos_ != nullptr) {
    hiddenapi_flags_ = DecodeUnsignedLeb128(&hiddenapi_ptr_pos_);
    DCHECK(hiddenapi::ApiList(hiddenapi_flags_).IsValid());
  }
}
  • 刚开始的时候,index_ 的值为 0 ,所以第一个 encoded_fieldfield_idx_diff 存放的就是真正的 index 值。
  • field_idx_diff 的下一个是 access_flag
  • 当 VisitorMembers 第二次调用 Field::Read 的时候,index_ 累加。这印证了前面我们对 field_idx_diff 的说明。

遍历完 static fields 后是 instance fields。这里没有创建新的 Field 对象,而是调用了 Field::NextSection。按照前面我们对 Field::Read 的理解,NextSection 至少会把 index_ 置为 0:

// art11/libdexfile/dex/class_accessor-inl.h
class ClassAccessor {
 public:
  class Field : public BaseItem {
    void NextSection() {
      index_ = 0u;
      is_static_ = false;
    }
  }
}

这里还有一个细节—— is_static_ 原先一直是 true,在这里被设置为了 false;反映了接下来的字段都是 instace field 这一事实。

Method 的实现类似:

// art11/libdexfile/dex/class_accessor-inl.h
inline void ClassAccessor::Method::Read() {
  index_ += DecodeUnsignedLeb128(&ptr_pos_);
  access_flags_ = DecodeUnsignedLeb128(&ptr_pos_);
  code_off_ = DecodeUnsignedLeb128(&ptr_pos_);
  if (hiddenapi_ptr_pos_ != nullptr) {
    hiddenapi_flags_ = DecodeUnsignedLeb128(&hiddenapi_ptr_pos_);
    DCHECK(hiddenapi::ApiList(hiddenapi_flags_).IsValid());
  }
}

// art11/libdexfile/dex/class_accessor-inl.h
class ClassAccessor {
 public:
  class Method : public BaseItem {
    // Move to virtual method section.
    void NextSection() {
      DCHECK(is_static_or_direct_) << "Already in the virtual methods section";
      is_static_or_direct_ = false;
      index_ = 0u;
    }
  }
}
  • 这里还可以看出,direct method 包括了静态方法

code_item

前面我们通过 ClassAccessor 可以得到类里面的 ClassAccessor::Method 对象。为了继续遍历方法的指令,可以通过 GetCodeItem 函数获取对应的 CodeItem:

// art11/libdexfile/dex/class_accessor-inl.h
inline const dex::CodeItem* ClassAccessor::Method::GetCodeItem() const {
  return dex_file_.GetCodeItem(code_off_);
}

// art11/libdexfile/dex/dex_file_structs.h
// Base code_item, compact dex and standard dex have different code item layouts.
struct CodeItem {
 protected:
  CodeItem() = default;
};
  • 从这里我们可以知道, dex file 有两种:compact dex 和 standard dex。

再看看 DexFile 的 GetCodeItem:

// art11/libdexfile/dex/dex_file.h
class DexFile {
 public:

  // Return the code item for a provided offset.
  const dex::CodeItem* GetCodeItem(const uint32_t code_off) const {
    // May be null for native or abstract methods.
    return DataPointer<dex::CodeItem>(code_off);
  }

  template <typename T>
  const T* DataPointer(size_t offset) const {
    return (offset != 0u) ? reinterpret_cast<const T*>(DataBegin() + offset) : nullptr;
  }
}

可以看到,这里只是直接返回了 code_off 处的数据,跟前面我们对 encoded_method 的描述一致。只是这里还是没有 code_item 的定义。

再回到前面的 compact dex 和 standard dex。DexFile 有这样的定义:

// art11/libdexfile/dex/dex_file.h
class DexFile {
 public:
  // Not virtual for performance reasons.
  ALWAYS_INLINE bool IsCompactDexFile() const {
    return is_compact_dex_;
  }
  ALWAYS_INLINE bool IsStandardDexFile() const {
    return !is_compact_dex_;
  }

 proteced:
  // If the dex file is a compact dex file. If false then the dex file is a standard dex file.
  const bool is_compact_dex_;
}

is_compact_dex_ 是在 DexFile 构造的时候赋值的,搜索代码,发现是 DexFile 两个子类 StandardDexFile 和 CompactDexFile 在构造的时候,分别传入了 false/true。

而判断一个 dex 文件是不是 compact,是在 DexFileLoader 里执行的:

// art11/libdexfile/dex/dex_file_loader.cc
std::unique_ptr<DexFile> DexFileLoader::OpenCommon(const uint8_t* base,
                                                   size_t size,
                                                   const uint8_t* data_base,
                                                   size_t data_size,
                                                   const std::string& location,
                                                   uint32_t location_checksum,
                                                   const OatDexFile* oat_dex_file,
                                                   bool verify,
                                                   bool verify_checksum,
                                                   std::string* error_msg,
                                                   std::unique_ptr<DexFileContainer> container,
                                                   VerifyResult* verify_result) {
  if (verify_result != nullptr) {
    *verify_result = VerifyResult::kVerifyNotAttempted;
  }
  std::unique_ptr<DexFile> dex_file;
  if (size >= sizeof(StandardDexFile::Header) && StandardDexFile::IsMagicValid(base)) {
    if (data_size != 0) {
      CHECK_EQ(base, data_base) << "Unsupported for standard dex";
    }
    dex_file.reset(new StandardDexFile(base,
                                       size,
                                       location,
                                       location_checksum,
                                       oat_dex_file,
                                       std::move(container)));
  } else if (size >= sizeof(CompactDexFile::Header) && CompactDexFile::IsMagicValid(base)) {
    if (data_base == nullptr) {
      // TODO: Is there a clean way to support both an explicit data section and reading the one
      // from the header.
      CHECK_EQ(data_size, 0u);
      const CompactDexFile::Header* const header = CompactDexFile::Header::At(base);
      data_base = base + header->data_off_;
      data_size = header->data_size_;
    }
    dex_file.reset(new CompactDexFile(base,
                                      size,
                                      data_base,
                                      data_size,
                                      location,
                                      location_checksum,
                                      oat_dex_file,
                                      std::move(container)));
    // Disable verification for CompactDex input.
    verify = false;
  } else {
    *error_msg = "Invalid or truncated dex file";
  }
    // ...
}
  • size 是 dex 文件的大小,所以在正常情况下,size >= sizeof(XxxDexFile::Header) 这个判断基本是会成功的
  • 一个 dex 文件是 standard 还是 compact,主要取决于 magic:
// art11/libdexfile/dex/standard_dex_file.cc
const uint8_t StandardDexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };

bool StandardDexFile::IsMagicValid(const uint8_t* magic) {
  return (memcmp(magic, kDexMagic, sizeof(kDexMagic)) == 0);
}


// art11/libdexfile/dex/compact_dex_file.h
static constexpr uint8_t kDexMagic[kDexMagicSize] = { 'c', 'd', 'e', 'x' };
// art11/libdexfile/dex/compact_dex_file.cc
bool CompactDexFile::IsMagicValid(const uint8_t* magic) {
  return (memcmp(magic, kDexMagic, sizeof(kDexMagic)) == 0);
}

这里说明了一个 dex 文件的 magic 不仅可以是 “dex\n”,还可以是 “cdex”

对于 StandardDexFile,CodeItem 的定义是:

class StandardDexFile : public DexFile {
 public:
  struct CodeItem : public dex::CodeItem {
      // ...
   private:
    uint16_t registers_size_;            // the number of registers used by this code
                                         //   (locals + parameters)
    uint16_t ins_size_;                  // the number of words of incoming arguments to the method
                                         //   that this code is for
    uint16_t outs_size_;                 // the number of words of outgoing argument space required
                                         //   by this code for method invocation
    uint16_t tries_size_;                // the number of try_items for this instance. If non-zero,
                                         //   then these appear as the tries array just after the
                                         //   insns in this instance.
    uint32_t debug_info_off_;            // Holds file offset to debug info stream.

    uint32_t insns_size_in_code_units_;  // size of the insns array, in 2 byte code units
    uint16_t insns_[1];                  // actual array of bytecode.
  };

}

而前面我们知道,ClassAccessor 返回的 code item 实际上只是 dex::CodeItem 。accessor 的用户为了读取 code item,势必需要把 dex::CodeItem 转换为 StandardDexFile::CodeItemCompactDexFile:CodeItem。我们继续往下看,这个问题的答案很快就会出现。

和访问 class_def_item 时候一样,访问 code_item 也有对应的 accessor。它们是 CodeItemInstructionAccessor、CodeItemDataAccessor 和 CodeItemDebugInfoAccessor,继承关系如下:

class CodeItemInstructionAccessor;
class CodeItemDataAccessor : public CodeItemInstructionAccessor;
class CodeItemDebugInfoAccessor : public CodeItemDataAccessor

在 CodeItemInstructionAccessor 的 Init 方法里,我们可以找到预期的 CodeItem 的类型转换:

// art11/libdexfile/dex/code_item_accessor-inl.h
inline void CodeItemInstructionAccessor::Init(const DexFile& dex_file,
                                              const dex::CodeItem* code_item) {
  if (code_item != nullptr) {
    DCHECK(dex_file.IsInDataSection(code_item));
    if (dex_file.IsCompactDexFile()) {
      Init(down_cast<const CompactDexFile::CodeItem&>(*code_item));
    } else {
      DCHECK(dex_file.IsStandardDexFile());
      Init(down_cast<const StandardDexFile::CodeItem&>(*code_item));
    }
  }
}

以 StandardDexFile 为例,这里的直接强转说明 code_item 的结构跟 StandardDexFile::CodeItem 类的成员变量的布局是一致的。即 code_item 的数据依次是 uint16_t register size、ins size、outs size、tries size……

code_item 里指令的访问

code_item 的访问,我们可以直接看 ClassAccessor::Method::GetInstructionsAndData

// art11/libdexfile/dex/class_accessor-inl.h
inline CodeItemDataAccessor ClassAccessor::Method::GetInstructionsAndData() const {
  return CodeItemDataAccessor(dex_file_, dex_file_.GetCodeItem(GetCodeItemOffset()));
}

CodeItemInstructionAccessor 实现了 begin 和 end 函数,可以用通过迭代器访问 code_item 的各个指令:

// art11/libdexfile/dex/code_item_accessor-inl.h
inline DexInstructionIterator CodeItemInstructionAccessor::begin() const {
  return DexInstructionIterator(insns_, 0u);
}

inline DexInstructionIterator CodeItemInstructionAccessor::end() const {
  return DexInstructionIterator(insns_, insns_size_in_code_units_);
}

// art11/libdexfile/dex/dex_instruction_iterator.h
class DexInstructionIterator : public DexInstructionIteratorBase {
 public:

  explicit DexInstructionIterator(const uint16_t* inst, uint32_t dex_pc)
      : DexInstructionIteratorBase(inst != nullptr ? Instruction::At(inst) : nullptr, dex_pc) {}

  // Value after modification.
  DexInstructionIterator& operator++() {
    data_.dex_pc_ += Inst().SizeInCodeUnits();
    return *this;
  }

    // ...
};
  • CodeItemInstructionAccessor 返回的迭代器是 DexInstructionIterator
  • data_ 是存储在父类 DexInstructionIteratorBase 的 DexInstructionPcPair:
// art11/libdexfile/dex/dex_instruction_iterator.h
class DexInstructionPcPair {
    // ...

  ALWAYS_INLINE const Instruction& Inst() const {
    return *Instruction::At(instructions_ + DexPc());
  }

  const uint16_t* instructions_ = nullptr;
  uint32_t dex_pc_ = 0;
};

总结一下:

  • CodeItemInstructionAccessor::begin() 所返回的迭代器的的 data_.instructions_ 指向 StandardDexFile::CodeItem.insns_,即第一个指令。每次迭代器递增,data_.dex_pc_ 增加当前指令的长度(SizeInCodeUnits
  • dex 指令由 Instruction 类表示。Instruction 类其实没有成员变量,指令的内容隐含地由 this 指针指向的内容块构成:
// art11/libdexfile/dex/dex_instruction.h
class Instruction {
 public:
    // ...

  // Reads an instruction out of the stream at the specified address.
  static const Instruction* At(const uint16_t* code) {
    return reinterpret_cast<const Instruction*>(code);
  }

    // 跟着,在需要读取指令数据时,会把 this 强制转换为 uint16_t。比方说 Opcode 的实现是:

  // Returns the opcode field of the instruction from the first 16 bits of instruction.
  Code Opcode() const {
    return Opcode(Fetch16(0));
  }
  uint16_t Fetch16(size_t offset) const {
    const uint16_t* insns = reinterpret_cast<const uint16_t*>(this);
    return insns[offset];
  }
};

异常信息的访问

前面我们提到,StandardDexFile::CodeItem 的最后就是一系列的指令。但其实在 insns_ 的后面,还跟着可选的异常相关的信息。为了访问这些异常信息,可以使用 CodeItemInstructionAccessor 的直接子类 CodeItemDataAccessor

// art11/libdexfile/dex/code_item_accessor.h
class CodeItemDataAccessor : public CodeItemInstructionAccessor {
 public:
  IterationRange<const dex::TryItem*> TryItems() const;
  const uint8_t* GetCatchHandlerData(size_t offset = 0) const;
}

// art11/libdexfile/dex/code_item_accessor-inl.h
inline IterationRange<const dex::TryItem*> CodeItemDataAccessor::TryItems() const {
  const dex::TryItem* try_items = DexFile::GetTryItems(end(), 0u);
  return {
    try_items,
    try_items + TriesSize() };
}

inline const dex::TryItem* DexFile::GetTryItems(const DexInstructionIterator& code_item_end,
                                                uint32_t offset) {
  return reinterpret_cast<const dex::TryItem*>
      (RoundUp(reinterpret_cast<uintptr_t>(&code_item_end.Inst()), dex::TryItem::kAlignment)) +
          offset;
}
  • 这里返回的 &end().Inst() 指向最后一条指令的下一个位置。由于指令 16 位对齐而 TryItem 是 32 位对齐的,所以这里把 &end().Inst() 向上对齐到 32 位后,就指向第一个 try_item。
    • 这里也说明了,在 insnstry_items 之间可能存在 16 bit 的 padding。
                   | insn0 |
                   | insn1 |
                   | insn2 |
                   |  ...  |
                   |       |
                   |       |
                   |       |
                   | insn_n|
  &end().Inst() -> |       |
                   |       |
  • try_item 由 dex::TryItem 表示:
// art11/libdexfile/dex/dex_file_structs.h
// Raw try_item.
struct TryItem {
  static constexpr size_t kAlignment = sizeof(uint32_t);
  uint32_t start_addr_;
  uint16_t insn_count_;
  uint16_t handler_off_;
};
  • start_addr_ 是 try 块的起始地址(try 块里第一个指令的 dex pc)
  • handler_off_ 是编码 try block 对应的 catch handlers 的 encoded_catch_handler_list 的 offset。CodeItemDataAccessor:: GetCatchHandlerData 函数的参数 offset 即是这个 handler_off_

遍历完 try_item 后,可以继续用 accessor 的 GetCatchHandlerData 访问对应的 catch handler:

// art11/libdexfile/dex/code_item_accessor-inl.h
inline const uint8_t* CodeItemDataAccessor::GetCatchHandlerData(size_t offset) const {
  return DexFile::GetCatchHandlerData(end(), TriesSize(), offset);
}

// art11/libdexfile/dex/dex_file-inl.h
// Get the base of the encoded data for the given DexCode.
inline const uint8_t* DexFile::GetCatchHandlerData(const DexInstructionIterator& code_item_end,
                                                   uint32_t tries_size,
                                                   uint32_t offset) {
  const uint8_t* handler_data =
      reinterpret_cast<const uint8_t*>(GetTryItems(code_item_end, tries_size));
  return handler_data + offset;
}
  • GetTryItems(code_item_end, tries_size) 返回的是 try_items 的下一个字节:
                   | insn0 |
                   | insn1 |
                   | insn2 |
                   |  ...  |
                   |       |
                   |       |
                   | insn_n|
  &end().Inst() -> |       |
                   |  try0 |
                   |  try1 |
                   |  ...  |
                   |       |
                   |  try_n|
   handler_data -> |       |
                   |       |
                   |       |

参考文档,handler_data 其实是 encoded_catch_handler_list

encoded_catch_handler_list {
  uleb128               handler_size;
    encoded_catch_handler   list[handler_size];
}

encoded_catch_handler {
    sleb128                 size;
  encoded_type_addr_pair    handlers[abs(size)];
  uleb128                   catch_all_addr;     // optional, only present if size is non-positive
}

encoded_type_addr_pair {
    uleb128     type_idx;
    uleb128     addr;
}
  • 一个方法体有多个 try block,每个 try block 都对应着一个 encoded_catch_handler
  • 一个 try block 里面可以有多个 handler,每个 handler 对应一个 encoded_type_addr_pair

为了了解 handler_data 的数据格式,我们还可以到源码里使用了 GetCatchHandlerData 的地方寻找线索。这里我们选择参考 MethodVerifier 的代码:

// art11/runtime/verifier/method_verifier.cc
template <bool kVerifierDebug>
bool MethodVerifier<kVerifierDebug>::ScanTryCatchBlocks() {
  // ...

  // 有删减,仅保留迭代 handlers 相关的代码
  const uint8_t* handlers_ptr = code_item_accessor_.GetCatchHandlerData();
  const uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
  for (uint32_t idx = 0; idx < handlers_size; idx++) {
    CatchHandlerIterator iterator(handlers_ptr);
    for (; iterator.HasNext(); iterator.Next()) {
      uint32_t dex_pc = iterator.GetHandlerAddress();
    }
    handlers_ptr = iterator.EndDataPointer();
  }
}
  • handler_data 的第一个字段是 uleb128 类型的 handler_size
  • 跟着是 encoded_catch_handler 列表,使用 CatchHandlerIterator 来遍历
// art11/libdexfile/dex/dex_file_exception_helpers.h
class CatchHandlerIterator {
 public:
  explicit CatchHandlerIterator(const uint8_t* handler_data) {
    Init(handler_data);
  }
}

// art11/libdexfile/dex/dex_file_exception_helpers.cc
void CatchHandlerIterator::Init(const uint8_t* handler_data) {
  current_data_ = handler_data;
  remaining_count_ = DecodeSignedLeb128(&current_data_);

  // If remaining_count_ is non-positive, then it is the negative of
  // the number of catch types, and the catches are followed by a
  // catch-all handler.
  if (remaining_count_ <= 0) {
    catch_all_ = true;
    remaining_count_ = -remaining_count_;
  } else {
    catch_all_ = false;
  }
  Next();
}
  • encoded_catch_handler 第一个字段 size 即是这里的 remaining_count_
  • remaining_count_ 的绝对值是 handler 的个数

下面我们看看 Next 函数:

// art11/libdexfile/dex/dex_file_exception_helpers.cc
void CatchHandlerIterator::Next() {
  if (remaining_count_ > 0) {
    handler_.type_idx_ = dex::TypeIndex(DecodeUnsignedLeb128(&current_data_));
    handler_.address_  = DecodeUnsignedLeb128(&current_data_);
    remaining_count_--;
    return;
  }

  if (catch_all_) {
    handler_.type_idx_ = dex::TypeIndex(DexFile::kDexNoIndex16);
    handler_.address_  = DecodeUnsignedLeb128(&current_data_);
    catch_all_ = false;
    return;
  }

  // no more handler
  remaining_count_ = -1;
}

// art11/libdexfile/dex/dex_file_exception_helpers.h
class CatchHandlerIterator {
 private:
  struct CatchHandlerItem {
    dex::TypeIndex type_idx_;  // type index of the caught exception type
    uint32_t address_;  // handler address
  } handler_;
}
  • 每个 encoded_type_addr_pair 有两个字段,uleb128 的 type index 和 address。address 是 handler 代码的位置
  • catch all 的 handler 没有对应的 type index,它可以捕获任意类型的异常(其实就是 finally)

异常相关的信息到这里就结束了。在异常信息的后面跟着的是调试相关的信息,通过 CodeItemDebugInfoAccessor 访问。

推荐阅读更多精彩内容