1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/zucchini/disassembler_dex.h"
6 
7 #include <stddef.h>
8 #include <stdlib.h>
9 
10 #include <algorithm>
11 #include <cctype>
12 #include <cmath>
13 #include <iterator>
14 #include <set>
15 #include <utility>
16 
17 #include "base/bind.h"
18 #include "base/callback.h"
19 #include "base/logging.h"
20 #include "base/numerics/safe_conversions.h"
21 #include "base/optional.h"
22 #include "base/strings/stringprintf.h"
23 #include "components/zucchini/buffer_source.h"
24 #include "components/zucchini/buffer_view.h"
25 #include "components/zucchini/io_utils.h"
26 
27 namespace zucchini {
28 
29 namespace {
30 
31 // A DEX item specified by an offset, if absent, has a sentinel value of 0 since
32 // 0 is never a valid item offset (it points to magic at start of DEX).
33 constexpr offset_t kDexSentinelOffset = 0U;
34 
35 // A DEX item specified by an index, if absent, has a sentinel value of
36 // NO_INDEX = 0xFFFFFFFF. This is represented as an offset_t for uniformity.
37 constexpr offset_t kDexSentinelIndexAsOffset = 0xFFFFFFFFU;
38 
39 static_assert(kDexSentinelIndexAsOffset != kInvalidOffset,
40               "Sentinel should not be confused with invalid offset.");
41 
42 // Size of a Dalvik instruction unit. Need to cast to signed int because
43 // sizeof() gives size_t, which dominates when operated on ptrdiff_t, then
44 // wrecks havoc for base::checked_cast<int16_t>().
45 constexpr int kInstrUnitSize = static_cast<int>(sizeof(uint16_t));
46 
47 // Checks if |offset| is byte aligned to 32 bits or 4 bytes.
Is32BitAligned(offset_t offset)48 bool Is32BitAligned(offset_t offset) {
49   return offset % 4 == 0;
50 }
51 
52 // Returns a lower bound for the size of an item of type |type_item_code|.
53 // - For fixed-length items (e.g., kTypeFieldIdItem) this is the exact size.
54 // - For variant-length items (e.g., kTypeCodeItem), returns a value that is
55 //   known to be less than the item length (e.g., header size).
56 // - For items not handled by this function, returns 1 for sanity check.
GetItemBaseSize(uint16_t type_item_code)57 size_t GetItemBaseSize(uint16_t type_item_code) {
58   switch (type_item_code) {
59     case dex::kTypeStringIdItem:
60       return sizeof(dex::StringIdItem);
61     case dex::kTypeTypeIdItem:
62       return sizeof(dex::TypeIdItem);
63     case dex::kTypeProtoIdItem:
64       return sizeof(dex::ProtoIdItem);
65     case dex::kTypeFieldIdItem:
66       return sizeof(dex::FieldIdItem);
67     case dex::kTypeMethodIdItem:
68       return sizeof(dex::MethodIdItem);
69     case dex::kTypeClassDefItem:
70       return sizeof(dex::ClassDefItem);
71     // No need to handle dex::kTypeMapList.
72     case dex::kTypeTypeList:
73       return sizeof(uint32_t);  // Variable-length.
74     case dex::kTypeAnnotationSetRefList:
75       return sizeof(uint32_t);  // Variable-length.
76     case dex::kTypeAnnotationSetItem:
77       return sizeof(uint32_t);  // Variable-length.
78     case dex::kTypeCodeItem:
79       return sizeof(dex::CodeItem);  // Variable-length.
80     case dex::kTypeAnnotationsDirectoryItem:
81       return sizeof(dex::AnnotationsDirectoryItem);  // Variable-length.
82     default:
83       return 1U;  // Unhandled item. For sanity check assume size >= 1.
84   }
85 }
86 
87 /******** CodeItemParser ********/
88 
89 // A parser to extract successive code items from a DEX image whose header has
90 // been parsed.
91 class CodeItemParser {
92  public:
93   using size_type = BufferSource::size_type;
94 
CodeItemParser(ConstBufferView image)95   explicit CodeItemParser(ConstBufferView image) : image_(image) {}
96 
97   // Initializes the parser, returns true on success and false on error.
Init(const dex::MapItem & code_map_item)98   bool Init(const dex::MapItem& code_map_item) {
99     // Sanity check to quickly fail if |code_map_item.offset| or
100     // |code_map_item.size| is too large. This is a heuristic because code item
101     // sizes need to be parsed (sizeof(dex::CodeItem) is a lower bound).
102     if (!image_.covers_array(code_map_item.offset, code_map_item.size,
103                              sizeof(dex::CodeItem))) {
104       return false;
105     }
106     source_ = std::move(BufferSource(image_).Skip(code_map_item.offset));
107     return true;
108   }
109 
110   // Extracts the header of the next code item, and skips the variable-length
111   // data. Returns the offset of the code item if successful. Otherwise returns
112   // kInvalidOffset, and thereafter the parser becomes valid. For reference,
113   // here's a pseudo-struct of a complete code item:
114   //
115   // struct code_item {
116   //   // 4-byte aligned here.
117   //   // 16-byte header defined (dex::CodeItem).
118   //   uint16_t registers_size;
119   //   uint16_t ins_size;
120   //   uint16_t outs_size;
121   //   uint16_t tries_size;
122   //   uint32_t debug_info_off;
123   //   uint32_t insns_size;
124   //
125   //   // Variable-length data follow.
126   //   uint16_t insns[insns_size];  // Instruction bytes.
127   //   uint16_t padding[(tries_size > 0 && insns_size % 2 == 1) ? 1 : 0];
128   //
129   //   if (tries_size > 0) {
130   //     // 4-byte aligned here.
131   //     struct try_item {  // dex::TryItem.
132   //       uint32_t start_addr;
133   //       uint16_t insn_count;
134   //       uint16_t handler_off;
135   //     } tries[tries_size];
136   //
137   //     struct encoded_catch_handler_list {
138   //       uleb128 handlers_size;
139   //       struct encoded_catch_handler {
140   //         sleb128 encoded_catch_handler_size;
141   //         struct encoded_type_addr_pair {
142   //           uleb128 type_idx;
143   //           uleb128 addr;
144   //         } handlers[abs(encoded_catch_handler_size)];
145   //         if (encoded_catch_handler_size <= 0) {
146   //           uleb128 catch_all_addr;
147   //         }
148   //       } handlers_list[handlers_size];
149   //     } handlers_group;  // Confusingly called "handlers" in DEX doc.
150   //   }
151   //
152   //   // Padding to 4-bytes align next code_item *only if more exist*.
153   // }
GetNext()154   offset_t GetNext() {
155     // Read header CodeItem.
156     if (!source_.AlignOn(image_, 4U))
157       return kInvalidOffset;
158     const offset_t code_item_offset =
159         base::checked_cast<offset_t>(source_.begin() - image_.begin());
160     const auto* code_item = source_.GetPointer<const dex::CodeItem>();
161     if (!code_item)
162       return kInvalidOffset;
163     DCHECK(Is32BitAligned(code_item_offset));
164 
165     // TODO(huangs): Fail if |code_item->insns_size == 0| (Constraint A1).
166     // Skip instruction bytes.
167     if (!source_.GetArray<uint16_t>(code_item->insns_size))
168       return kInvalidOffset;
169     // Skip padding if present.
170     if (code_item->tries_size > 0 && !source_.AlignOn(image_, 4U))
171       return kInvalidOffset;
172 
173     // Skip tries[] and handlers_group to arrive at the next code item. Parsing
174     // is nontrivial due to use of uleb128 / sleb128.
175     if (code_item->tries_size > 0) {
176       // Skip (try_item) tries[].
177       if (!source_.GetArray<dex::TryItem>(code_item->tries_size))
178         return kInvalidOffset;
179 
180       // Skip handlers_group.
181       uint32_t handlers_size = 0;
182       if (!source_.GetUleb128(&handlers_size))
183         return kInvalidOffset;
184       // Sanity check to quickly reject excessively large |handlers_size|.
185       if (source_.Remaining() < static_cast<size_type>(handlers_size))
186         return kInvalidOffset;
187 
188       // Skip (encoded_catch_handler) handlers_list[].
189       for (uint32_t k = 0; k < handlers_size; ++k) {
190         int32_t encoded_catch_handler_size = 0;
191         if (!source_.GetSleb128(&encoded_catch_handler_size))
192           return kInvalidOffset;
193         const size_type abs_size = std::abs(encoded_catch_handler_size);
194         if (source_.Remaining() < abs_size)  // Sanity check.
195           return kInvalidOffset;
196         // Skip (encoded_type_addr_pair) handlers[].
197         for (size_type j = 0; j < abs_size; ++j) {
198           if (!source_.SkipLeb128() || !source_.SkipLeb128())
199             return kInvalidOffset;
200         }
201         // Skip catch_all_addr.
202         if (encoded_catch_handler_size <= 0) {
203           if (!source_.SkipLeb128())
204             return kInvalidOffset;
205         }
206       }
207     }
208     // Success! |code_item->insns_size| is validated, but its content is still
209     // considered unsafe and requires validation.
210     return code_item_offset;
211   }
212 
213   // Given |code_item_offset| that points to the start of a valid code item in
214   // |image|, returns |insns| bytes as ConstBufferView.
GetCodeItemInsns(ConstBufferView image,offset_t code_item_offset)215   static ConstBufferView GetCodeItemInsns(ConstBufferView image,
216                                           offset_t code_item_offset) {
217     BufferSource source(BufferSource(image).Skip(code_item_offset));
218     const auto* code_item = source.GetPointer<const dex::CodeItem>();
219     DCHECK(code_item);
220     BufferRegion insns{0, code_item->insns_size * kInstrUnitSize};
221     DCHECK(source.covers(insns));
222     return source[insns];
223   }
224 
225  private:
226   ConstBufferView image_;
227   BufferSource source_;
228 };
229 
230 /******** InstructionParser ********/
231 
232 // A class that successively reads |code_item| for Dalvik instructions, which
233 // are found at |insns|, spanning |insns_size| uint16_t "units". These units
234 // store instructions followed by optional non-instruction "payload". Finding
235 // payload boundary requires parsing: On finding an instruction that uses (and
236 // points to) payload, the boundary is updated.
237 class InstructionParser {
238  public:
239   struct Value {
240     offset_t instr_offset;
241     const dex::Instruction* instr = nullptr;  // null for unknown instructions.
242   };
243 
244   // Returns pointer to DEX Instruction data for |opcode|, or null if |opcode|
245   // is unknown. An internal initialize-on-first-use table is used for fast
246   // lookup.
FindDalvikInstruction(uint8_t opcode)247   const dex::Instruction* FindDalvikInstruction(uint8_t opcode) {
248     static bool is_init = false;
249     static const dex::Instruction* instruction_table[256];
250     if (!is_init) {
251       is_init = true;
252       std::fill(std::begin(instruction_table), std::end(instruction_table),
253                 nullptr);
254       for (const dex::Instruction& instr : dex::kByteCode) {
255         std::fill(instruction_table + instr.opcode,
256                   instruction_table + instr.opcode + instr.variant, &instr);
257       }
258     }
259     return instruction_table[opcode];
260   }
261 
262   InstructionParser() = default;
263 
InstructionParser(ConstBufferView image,offset_t base_offset)264   InstructionParser(ConstBufferView image, offset_t base_offset)
265       : image_begin_(image.begin()),
266         insns_(CodeItemParser::GetCodeItemInsns(image, base_offset)),
267         payload_boundary_(insns_.end()) {}
268 
269   // Reads the next instruction. On success, makes the data read available via
270   // value() and returns true. Otherwise (done or found error) returns false.
ReadNext()271   bool ReadNext() {
272     // Do not scan past payload boundary.
273     if (insns_.begin() >= payload_boundary_)
274       return false;
275 
276     const offset_t instr_offset =
277         base::checked_cast<offset_t>(insns_.begin() - image_begin_);
278     const uint8_t op = insns_.read<uint8_t>(0);
279     const dex::Instruction* instr = FindDalvikInstruction(op);
280 
281     // Stop on finding unknown instructions. ODEX files might trigger this.
282     if (!instr) {
283       LOG(WARNING) << "Unknown Dalvik instruction detected at "
284                    << AsHex<8>(instr_offset) << ".";
285       return false;
286     }
287 
288     const int instr_length_units = instr->layout;
289     const size_t instr_length_bytes = instr_length_units * kInstrUnitSize;
290     if (insns_.size() < instr_length_bytes)
291       return false;
292 
293     // Handle instructions with variable-length data payload (31t).
294     if (instr->opcode == 0x26 ||  // fill-array-data
295         instr->opcode == 0x2B ||  // packed-switch
296         instr->opcode == 0x2C) {  // sparse-switch
297       const int32_t unsafe_payload_rel_units = insns_.read<int32_t>(2);
298       // Payload must be in current code item, after current instruction.
299       if (unsafe_payload_rel_units < instr_length_units ||
300           static_cast<uint32_t>(unsafe_payload_rel_units) >=
301               insns_.size() / kInstrUnitSize) {
302         LOG(WARNING) << "Invalid payload found.";
303         return false;
304       }
305       // Update boundary between instructions and payload.
306       const ConstBufferView::const_iterator payload_it =
307           insns_.begin() + unsafe_payload_rel_units * kInstrUnitSize;
308       payload_boundary_ = std::min(payload_boundary_, payload_it);
309     }
310 
311     insns_.remove_prefix(instr_length_bytes);
312     value_ = {instr_offset, instr};
313     return true;
314   }
315 
value() const316   const Value& value() const { return value_; }
317 
318  private:
319   ConstBufferView::const_iterator image_begin_;
320   ConstBufferView insns_;
321   ConstBufferView::const_iterator payload_boundary_;
322   Value value_;
323 };
324 
325 /******** InstructionReferenceReader ********/
326 
327 // A class to visit |code_items|, parse instructions, and emit embedded
328 // References of a type determined by |filter_| and |mapper_|. Only References
329 // located in |[lo, hi)| are emitted. |lo| and |hi| are assumed to never
330 // straddle the body of a Reference.
331 class InstructionReferenceReader : public ReferenceReader {
332  public:
333   // A function that takes a parsed Dalvik instruction and decides whether it
334   // contains a specific type of Reference. If true, then returns the Reference
335   // location. Otherwise returns kInvalidOffset.
336   using Filter =
337       base::RepeatingCallback<offset_t(const InstructionParser::Value&)>;
338   // A function that takes Reference location from |filter_| to extract the
339   // stored target. If valid, returns it. Otherwise returns kInvalidOffset.
340   using Mapper = base::RepeatingCallback<offset_t(offset_t)>;
341 
InstructionReferenceReader(ConstBufferView image,offset_t lo,offset_t hi,const std::vector<offset_t> & code_item_offsets,Filter && filter,Mapper && mapper)342   InstructionReferenceReader(ConstBufferView image,
343                              offset_t lo,
344                              offset_t hi,
345                              const std::vector<offset_t>& code_item_offsets,
346                              Filter&& filter,
347                              Mapper&& mapper)
348       : image_(image),
349         lo_(lo),
350         hi_(hi),
351         end_it_(code_item_offsets.end()),
352         filter_(std::move(filter)),
353         mapper_(std::move(mapper)) {
354     const auto begin_it = code_item_offsets.begin();
355     // Use binary search to find the code item that contains |lo_|.
356     auto comp = [](offset_t test_offset, offset_t code_item_offset) {
357       return test_offset < code_item_offset;
358     };
359     cur_it_ = std::upper_bound(begin_it, end_it_, lo_, comp);
360     if (cur_it_ != begin_it)
361       --cur_it_;
362     parser_ = InstructionParser(image_, *cur_it_);
363   }
364 
365   // ReferenceReader:
GetNext()366   base::Optional<Reference> GetNext() override {
367     while (true) {
368       while (parser_.ReadNext()) {
369         const auto& v = parser_.value();
370         DCHECK_NE(v.instr, nullptr);
371         if (v.instr_offset >= hi_)
372           return base::nullopt;
373         const offset_t location = filter_.Run(v);
374         if (location == kInvalidOffset || location < lo_)
375           continue;
376         // The general check is |location + reference_width > hi_|. However, by
377         // assumption |hi_| and |lo_| do not straddle the body of a Reference.
378         // So |reference_width| is unneeded.
379         if (location >= hi_)
380           return base::nullopt;
381         offset_t target = mapper_.Run(location);
382         if (target != kInvalidOffset)
383           return Reference{location, target};
384         else
385           LOG(WARNING) << "Invalid target at " << AsHex<8>(location) << ".";
386       }
387       ++cur_it_;
388       if (cur_it_ == end_it_)
389         return base::nullopt;
390       parser_ = InstructionParser(image_, *cur_it_);
391     }
392   }
393 
394  private:
395   const ConstBufferView image_;
396   const offset_t lo_;
397   const offset_t hi_;
398   const std::vector<offset_t>::const_iterator end_it_;
399   const Filter filter_;
400   const Mapper mapper_;
401   std::vector<offset_t>::const_iterator cur_it_;
402   InstructionParser parser_;
403 };
404 
405 /******** ItemReferenceReader ********/
406 
407 // A class to visit fixed-size item elements (determined by |item_size|) and
408 // emit a "member variable of interest" (MVI, determined by |rel_location| and
409 // |mapper|) as Reference. Only MVIs lying in |[lo, hi)| are emitted. |lo| and
410 // |hi| are assumed to never straddle the body of a Reference.
411 class ItemReferenceReader : public ReferenceReader {
412  public:
413   // A function that takes an MVI's location and emit its target offset.
414   using Mapper = base::RepeatingCallback<offset_t(offset_t)>;
415 
416   // |item_size| is the size of a fixed-size item. |rel_location| is the
417   // relative location of MVI from the start of the item containing it.
ItemReferenceReader(offset_t lo,offset_t hi,const dex::MapItem & map_item,size_t item_size,size_t rel_location,Mapper && mapper)418   ItemReferenceReader(offset_t lo,
419                       offset_t hi,
420                       const dex::MapItem& map_item,
421                       size_t item_size,
422                       size_t rel_location,
423                       Mapper&& mapper)
424       : hi_(hi),
425         item_base_offset_(base::checked_cast<offset_t>(map_item.offset)),
426         num_items_(base::checked_cast<uint32_t>(map_item.size)),
427         item_size_(base::checked_cast<uint32_t>(item_size)),
428         rel_location_(base::checked_cast<uint32_t>(rel_location)),
429         mapper_(std::move(mapper)) {
430     static_assert(sizeof(decltype(map_item.offset)) <= sizeof(offset_t),
431                   "map_item.offset too large.");
432     static_assert(sizeof(decltype(map_item.size)) <= sizeof(offset_t),
433                   "map_item.size too large.");
434     if (!item_base_offset_) {
435       // Empty item: Assign |cur_idx| to |num_items_| to skip everything.
436       cur_idx_ = num_items_;
437     } else if (lo < item_base_offset_) {
438       cur_idx_ = 0;
439     } else if (lo < OffsetOfIndex(num_items_)) {
440       cur_idx_ = (lo - item_base_offset_) / item_size_;
441       // Fine-tune: Advance if |lo| lies beyond the MVI.
442       if (lo > OffsetOfIndex(cur_idx_) + rel_location_)
443         ++cur_idx_;
444     } else {
445       cur_idx_ = num_items_;
446     }
447   }
448 
449   // ReferenceReader:
GetNext()450   base::Optional<Reference> GetNext() override {
451     while (cur_idx_ < num_items_) {
452       const offset_t item_offset = OffsetOfIndex(cur_idx_);
453       const offset_t location = item_offset + rel_location_;
454       // The general check is |location + reference_width > hi_|. However, by
455       // assumption |hi_| and |lo_| do not straddle the body of a Reference. So
456       // |reference_width| is unneeded.
457       if (location >= hi_)
458         break;
459       const offset_t target = mapper_.Run(location);
460 
461       // kDexSentinelOffset (0) may appear for the following:
462       // - ProtoIdItem: parameters_off.
463       // - ClassDefItem: interfaces_off, annotations_off, class_data_off,
464       //   static_values_off.
465       // - AnnotationsDirectoryItem: class_annotations_off.
466       // - AnnotationSetRefItem: annotations_off.
467       // kDexSentinelIndexAsOffset (0xFFFFFFFF) may appear for the following:
468       // - ClassDefItem: superclass_idx, source_file_idx.
469       if (target == kDexSentinelOffset || target == kDexSentinelIndexAsOffset) {
470         ++cur_idx_;
471         continue;
472       }
473 
474       if (target == kInvalidOffset) {
475         LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << ".";
476         break;
477       }
478       ++cur_idx_;
479       return Reference{location, target};
480     }
481     return base::nullopt;
482   }
483 
484  private:
OffsetOfIndex(uint32_t idx)485   offset_t OffsetOfIndex(uint32_t idx) {
486     return base::checked_cast<uint32_t>(item_base_offset_ + idx * item_size_);
487   }
488 
489   const offset_t hi_;
490   const offset_t item_base_offset_;
491   const uint32_t num_items_;
492   const uint32_t item_size_;
493   const uint32_t rel_location_;
494   const Mapper mapper_;
495   offset_t cur_idx_ = 0;
496 };
497 
498 // Parses a flattened jagged list of lists of items that looks like:
499 //   NTTT|NTT|NTTTT|N|NTT...
500 // where |N| is an uint32_t representing the number of items in each sub-list,
501 // and "T" is a fixed-size item (|item_width|) of type "T". On success, stores
502 // the offset of each |T| into |item_offsets|, and returns true. Otherwise
503 // (e.g., on finding any structural problem) returns false.
ParseItemOffsets(ConstBufferView image,const dex::MapItem & map_item,size_t item_width,std::vector<offset_t> * item_offsets)504 bool ParseItemOffsets(ConstBufferView image,
505                       const dex::MapItem& map_item,
506                       size_t item_width,
507                       std::vector<offset_t>* item_offsets) {
508   // Sanity check: |image| should at least fit |map_item.size| copies of "N".
509   if (!image.covers_array(map_item.offset, map_item.size, sizeof(uint32_t)))
510     return false;
511   BufferSource source = std::move(BufferSource(image).Skip(map_item.offset));
512   item_offsets->clear();
513   for (uint32_t i = 0; i < map_item.size; ++i) {
514     if (!source.AlignOn(image, 4U))
515       return false;
516     uint32_t unsafe_size;
517     if (!source.GetValue<uint32_t>(&unsafe_size))
518       return false;
519     DCHECK(Is32BitAligned(
520         base::checked_cast<offset_t>(source.begin() - image.begin())));
521     if (!source.covers_array(0, unsafe_size, item_width))
522       return false;
523     for (uint32_t j = 0; j < unsafe_size; ++j) {
524       item_offsets->push_back(
525           base::checked_cast<offset_t>(source.begin() - image.begin()));
526       source.Skip(item_width);
527     }
528   }
529   return true;
530 }
531 
532 // Parses AnnotationDirectoryItems of the format (using RegEx) "(AF*M*P*)*",
533 // where:
534 //   A = AnnotationsDirectoryItem (contains class annotation),
535 //   F = FieldAnnotation,
536 //   M = MethodAnnotation,
537 //   P = ParameterAnnotation.
538 // On success, stores the offsets of each class, field, method and parameter
539 // annotation for each item into |*_annotation_offsets|. Otherwise on finding
540 // structural issues returns false.
ParseAnnotationsDirectoryItems(ConstBufferView image,const dex::MapItem & annotations_directory_map_item,std::vector<offset_t> * annotations_directory_item_offsets,std::vector<offset_t> * field_annotation_offsets,std::vector<offset_t> * method_annotation_offsets,std::vector<offset_t> * parameter_annotation_offsets)541 bool ParseAnnotationsDirectoryItems(
542     ConstBufferView image,
543     const dex::MapItem& annotations_directory_map_item,
544     std::vector<offset_t>* annotations_directory_item_offsets,
545     std::vector<offset_t>* field_annotation_offsets,
546     std::vector<offset_t>* method_annotation_offsets,
547     std::vector<offset_t>* parameter_annotation_offsets) {
548   // Sanity check: |image| should at least fit
549   // |annotations_directory_map_item.size| copies of "A".
550   if (!image.covers_array(annotations_directory_map_item.offset,
551                           annotations_directory_map_item.size,
552                           sizeof(dex::AnnotationsDirectoryItem))) {
553     return false;
554   }
555   BufferSource source = std::move(
556       BufferSource(image).Skip(annotations_directory_map_item.offset));
557   annotations_directory_item_offsets->clear();
558   field_annotation_offsets->clear();
559   method_annotation_offsets->clear();
560   parameter_annotation_offsets->clear();
561 
562   // Helper to process sublists.
563   auto parse_list = [&source, image](uint32_t unsafe_size, size_t item_width,
564                                      std::vector<offset_t>* item_offsets) {
565     DCHECK(Is32BitAligned(
566         base::checked_cast<offset_t>(source.begin() - image.begin())));
567     if (!source.covers_array(0, unsafe_size, item_width))
568       return false;
569     item_offsets->reserve(item_offsets->size() + unsafe_size);
570     for (uint32_t i = 0; i < unsafe_size; ++i) {
571       item_offsets->push_back(
572           base::checked_cast<offset_t>(source.begin() - image.begin()));
573       source.Skip(item_width);
574     }
575     return true;
576   };
577 
578   annotations_directory_item_offsets->reserve(
579       annotations_directory_map_item.size);
580   for (uint32_t i = 0; i < annotations_directory_map_item.size; ++i) {
581     if (!source.AlignOn(image, 4U))
582       return false;
583     // Parse header.
584     annotations_directory_item_offsets->push_back(
585         base::checked_cast<offset_t>(source.begin() - image.begin()));
586     dex::AnnotationsDirectoryItem unsafe_annotations_directory_item;
587     if (!source.GetValue(&unsafe_annotations_directory_item))
588       return false;
589     // Parse sublists.
590     if (!(parse_list(unsafe_annotations_directory_item.fields_size,
591                      sizeof(dex::FieldAnnotation), field_annotation_offsets) &&
592           parse_list(unsafe_annotations_directory_item.annotated_methods_size,
593                      sizeof(dex::MethodAnnotation),
594                      method_annotation_offsets) &&
595           parse_list(
596               unsafe_annotations_directory_item.annotated_parameters_size,
597               sizeof(dex::ParameterAnnotation),
598               parameter_annotation_offsets))) {
599       return false;
600     }
601   }
602   return true;
603 }
604 
605 /******** CachedItemListReferenceReader ********/
606 
607 // A class that takes sorted |item_offsets|, and emits all member variable of
608 // interest (MVIs) that fall inside |[lo, hi)|. The MVI of each item has
609 // location of |rel_location| from item offset, and has target extracted with
610 // |mapper| (which performs validation). By the "atomicity assumption",
611 // [|lo, hi)| never cut across an MVI.
612 class CachedItemListReferenceReader : public ReferenceReader {
613  public:
614   // A function that takes an MVI's location and emit its target offset.
615   using Mapper = base::RepeatingCallback<offset_t(offset_t)>;
616 
CachedItemListReferenceReader(offset_t lo,offset_t hi,uint32_t rel_location,const std::vector<offset_t> & item_offsets,Mapper && mapper)617   CachedItemListReferenceReader(offset_t lo,
618                                 offset_t hi,
619                                 uint32_t rel_location,
620                                 const std::vector<offset_t>& item_offsets,
621                                 Mapper&& mapper)
622       : hi_(hi),
623         rel_location_(rel_location),
624         end_it_(item_offsets.cend()),
625         mapper_(mapper) {
626     cur_it_ = std::upper_bound(item_offsets.cbegin(), item_offsets.cend(), lo);
627     // Adding |rel_location_| is necessary as references can be offset from the
628     // start of the item.
629     if (cur_it_ != item_offsets.begin() && *(cur_it_ - 1) + rel_location_ >= lo)
630       --cur_it_;
631   }
632 
633   // ReferenceReader:
GetNext()634   base::Optional<Reference> GetNext() override {
635     while (cur_it_ < end_it_) {
636       const offset_t location = *cur_it_ + rel_location_;
637       if (location >= hi_)  // Check is simplified by atomicity assumption.
638         break;
639       const offset_t target = mapper_.Run(location);
640       if (target == kInvalidOffset) {
641         LOG(WARNING) << "Invalid item target at " << AsHex<8>(location) << ".";
642         break;
643       }
644       ++cur_it_;
645 
646       // kDexSentinelOffset is a sentinel for;
647       // - AnnotationsDirectoryItem: class_annotations_off
648       if (target == kDexSentinelOffset)
649         continue;
650       return Reference{location, target};
651     }
652     return base::nullopt;
653   }
654 
655  private:
656   const offset_t hi_;
657   const uint32_t rel_location_;
658   const std::vector<offset_t>::const_iterator end_it_;
659   const Mapper mapper_;
660   std::vector<offset_t>::const_iterator cur_it_;
661 
662   DISALLOW_COPY_AND_ASSIGN(CachedItemListReferenceReader);
663 };
664 
665 // Reads an INT index at |location| in |image| and translates the index to the
666 // offset of a fixed-size item specified by |target_map_item| and
667 // |target_item_size|. Returns the target offset if valid, or kInvalidOffset
668 // otherwise. This is compatible with
669 // CachedReferenceListReferenceReader::Mapper,
670 // InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper.
671 template <typename INT>
ReadTargetIndex(ConstBufferView image,const dex::MapItem & target_map_item,size_t target_item_size,offset_t location)672 static offset_t ReadTargetIndex(ConstBufferView image,
673                                 const dex::MapItem& target_map_item,
674                                 size_t target_item_size,
675                                 offset_t location) {
676   static_assert(sizeof(INT) <= sizeof(offset_t),
677                 "INT may not fit into offset_t.");
678   const offset_t unsafe_idx = image.read<INT>(location);
679   // kDexSentinalIndexAsOffset (0xFFFFFFFF) is a sentinel for
680   // - ClassDefItem: superclass_idx, source_file_idx.
681   if (unsafe_idx == kDexSentinelIndexAsOffset)
682     return unsafe_idx;
683   if (unsafe_idx >= target_map_item.size)
684     return kInvalidOffset;
685   return target_map_item.offset +
686          base::checked_cast<offset_t>(unsafe_idx * target_item_size);
687 }
688 
689 // Reads uint32_t value in |image| at (valid) |location| and checks whether it
690 // is a safe offset of a fixed-size item. Returns the target offset (possibly a
691 // sentinel) if valid, or kInvalidOffset otherwise. This is compatible with
692 // CachedReferenceListReferenceReader::Mapper,
693 // InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper.
ReadTargetOffset32(ConstBufferView image,offset_t location)694 static offset_t ReadTargetOffset32(ConstBufferView image, offset_t location) {
695   const offset_t unsafe_target =
696       static_cast<offset_t>(image.read<uint32_t>(location));
697   // Skip and don't validate kDexSentinelOffset as it is indicative of an
698   // empty reference.
699   if (unsafe_target == kDexSentinelOffset)
700     return unsafe_target;
701 
702   // TODO(huangs): Check that |unsafe_target| is within the correct data
703   // section.
704   if (unsafe_target >= image.size())
705     return kInvalidOffset;
706   return unsafe_target;
707 }
708 
709 /******** ReferenceWriterAdaptor ********/
710 
711 // A ReferenceWriter that adapts a callback that performs type-specific
712 // Reference writes.
713 class ReferenceWriterAdaptor : public ReferenceWriter {
714  public:
715   using Writer = base::RepeatingCallback<void(Reference, MutableBufferView)>;
716 
ReferenceWriterAdaptor(MutableBufferView image,Writer && writer)717   ReferenceWriterAdaptor(MutableBufferView image, Writer&& writer)
718       : image_(image), writer_(std::move(writer)) {}
719 
720   // ReferenceWriter:
PutNext(Reference ref)721   void PutNext(Reference ref) override { writer_.Run(ref, image_); }
722 
723  private:
724   MutableBufferView image_;
725   Writer writer_;
726 };
727 
728 // Helper that's compatible with ReferenceWriterAdaptor::Writer.
729 // Given that |ref.target| points to the start of a fixed size DEX item (e.g.,
730 // FieldIdItem), translates |ref.target| to item index, and writes the result to
731 // |ref.location| as |INT|.
732 template <typename INT>
WriteTargetIndex(const dex::MapItem & target_map_item,size_t target_item_size,Reference ref,MutableBufferView image)733 static void WriteTargetIndex(const dex::MapItem& target_map_item,
734                              size_t target_item_size,
735                              Reference ref,
736                              MutableBufferView image) {
737   const size_t unsafe_idx =
738       (ref.target - target_map_item.offset) / target_item_size;
739   // Verify that index is within bound.
740   if (unsafe_idx >= target_map_item.size) {
741     LOG(ERROR) << "Target index out of bounds at: " << AsHex<8>(ref.location)
742                << ".";
743     return;
744   }
745   // Verify that |ref.target| points to start of item.
746   DCHECK_EQ(ref.target, target_map_item.offset + unsafe_idx * target_item_size);
747   image.write<INT>(ref.location, base::checked_cast<INT>(unsafe_idx));
748 }
749 
750 // Buffer for ReadDexHeader() to optionally return results.
751 struct ReadDexHeaderResults {
752   BufferSource source;
753   const dex::HeaderItem* header;
754   int dex_version;
755 };
756 
757 // Returns whether |image| points to a DEX file. If this is a possibility and
758 // |opt_results| is not null, then uses it to pass extracted data to enable
759 // further parsing.
ReadDexHeader(ConstBufferView image,ReadDexHeaderResults * opt_results)760 bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) {
761   // This part needs to be fairly efficient since it may be called many times.
762   BufferSource source(image);
763   const dex::HeaderItem* header = source.GetPointer<dex::HeaderItem>();
764   if (!header)
765     return false;
766   if (header->magic[0] != 'd' || header->magic[1] != 'e' ||
767       header->magic[2] != 'x' || header->magic[3] != '\n' ||
768       header->magic[7] != '\0') {
769     return false;
770   }
771 
772   // Magic matches: More detailed tests can be conducted.
773   int dex_version = 0;
774   for (int i = 4; i < 7; ++i) {
775     if (!isdigit(header->magic[i]))
776       return false;
777     dex_version = dex_version * 10 + (header->magic[i] - '0');
778   }
779 
780   // Only support DEX versions 35 and 37.
781   // TODO(huangs): Handle version 38.
782   if (dex_version != 35 && dex_version != 37)
783     return false;
784 
785   if (header->file_size > image.size() ||
786       header->file_size < sizeof(dex::HeaderItem) ||
787       header->map_off < sizeof(dex::HeaderItem)) {
788     return false;
789   }
790 
791   if (opt_results)
792     *opt_results = {source, header, dex_version};
793   return true;
794 }
795 
796 }  // namespace
797 
798 /******** DisassemblerDex ********/
799 
DisassemblerDex()800 DisassemblerDex::DisassemblerDex() : Disassembler(4) {}
801 
802 DisassemblerDex::~DisassemblerDex() = default;
803 
804 // static.
QuickDetect(ConstBufferView image)805 bool DisassemblerDex::QuickDetect(ConstBufferView image) {
806   return ReadDexHeader(image, nullptr);
807 }
808 
GetExeType() const809 ExecutableType DisassemblerDex::GetExeType() const {
810   return kExeTypeDex;
811 }
812 
GetExeTypeString() const813 std::string DisassemblerDex::GetExeTypeString() const {
814   return base::StringPrintf("DEX (version %d)", dex_version_);
815 }
816 
MakeReferenceGroups() const817 std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const {
818   // Must follow DisassemblerDex::ReferenceType order. Initialized on first use.
819   return {
820       {{4, TypeTag(kTypeIdToDescriptorStringId), PoolTag(kStringId)},
821        &DisassemblerDex::MakeReadTypeIdToDescriptorStringId32,
822        &DisassemblerDex::MakeWriteStringId32},
823       {{4, TypeTag(kProtoIdToShortyStringId), PoolTag(kStringId)},
824        &DisassemblerDex::MakeReadProtoIdToShortyStringId32,
825        &DisassemblerDex::MakeWriteStringId32},
826       {{4, TypeTag(kFieldIdToNameStringId), PoolTag(kStringId)},
827        &DisassemblerDex::MakeReadFieldToNameStringId32,
828        &DisassemblerDex::MakeWriteStringId32},
829       {{4, TypeTag(kMethodIdToNameStringId), PoolTag(kStringId)},
830        &DisassemblerDex::MakeReadMethodIdToNameStringId32,
831        &DisassemblerDex::MakeWriteStringId32},
832       {{4, TypeTag(kClassDefToSourceFileStringId), PoolTag(kStringId)},
833        &DisassemblerDex::MakeReadClassDefToSourceFileStringId32,
834        &DisassemblerDex::MakeWriteStringId32},
835       {{2, TypeTag(kCodeToStringId16), PoolTag(kStringId)},
836        &DisassemblerDex::MakeReadCodeToStringId16,
837        &DisassemblerDex::MakeWriteStringId16},
838       {{4, TypeTag(kCodeToStringId32), PoolTag(kStringId)},
839        &DisassemblerDex::MakeReadCodeToStringId32,
840        &DisassemblerDex::MakeWriteStringId32},
841       {{4, TypeTag(kProtoIdToReturnTypeId), PoolTag(kTypeId)},
842        &DisassemblerDex::MakeReadProtoIdToReturnTypeId32,
843        &DisassemblerDex::MakeWriteTypeId32},
844       {{2, TypeTag(kFieldIdToClassTypeId), PoolTag(kTypeId)},
845        &DisassemblerDex::MakeReadFieldToClassTypeId16,
846        &DisassemblerDex::MakeWriteTypeId16},
847       {{2, TypeTag(kFieldIdToTypeId), PoolTag(kTypeId)},
848        &DisassemblerDex::MakeReadFieldToTypeId16,
849        &DisassemblerDex::MakeWriteTypeId16},
850       {{2, TypeTag(kMethodIdToClassTypeId), PoolTag(kTypeId)},
851        &DisassemblerDex::MakeReadMethodIdToClassTypeId16,
852        &DisassemblerDex::MakeWriteTypeId16},
853       {{4, TypeTag(kClassDefToClassTypeId), PoolTag(kTypeId)},
854        &DisassemblerDex::MakeReadClassDefToClassTypeId32,
855        &DisassemblerDex::MakeWriteTypeId32},
856       {{4, TypeTag(kClassDefToSuperClassTypeId), PoolTag(kTypeId)},
857        &DisassemblerDex::MakeReadClassDefToSuperClassTypeId32,
858        &DisassemblerDex::MakeWriteTypeId32},
859       {{2, TypeTag(kTypeListToTypeId), PoolTag(kTypeId)},
860        &DisassemblerDex::MakeReadTypeListToTypeId16,
861        &DisassemblerDex::MakeWriteTypeId16},
862       {{2, TypeTag(kCodeToTypeId), PoolTag(kTypeId)},
863        &DisassemblerDex::MakeReadCodeToTypeId16,
864        &DisassemblerDex::MakeWriteTypeId16},
865       {{2, TypeTag(kMethodIdToProtoId), PoolTag(kProtoId)},
866        &DisassemblerDex::MakeReadMethodIdToProtoId16,
867        &DisassemblerDex::MakeWriteProtoId16},
868       {{2, TypeTag(kCodeToFieldId), PoolTag(kFieldId)},
869        &DisassemblerDex::MakeReadCodeToFieldId16,
870        &DisassemblerDex::MakeWriteFieldId16},
871       {{4, TypeTag(kAnnotationsDirectoryToFieldId), PoolTag(kFieldId)},
872        &DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32,
873        &DisassemblerDex::MakeWriteFieldId32},
874       {{2, TypeTag(kCodeToMethodId), PoolTag(kMethodId)},
875        &DisassemblerDex::MakeReadCodeToMethodId16,
876        &DisassemblerDex::MakeWriteMethodId16},
877       {{4, TypeTag(kAnnotationsDirectoryToMethodId), PoolTag(kMethodId)},
878        &DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32,
879        &DisassemblerDex::MakeWriteMethodId32},
880       {{4, TypeTag(kAnnotationsDirectoryToParameterMethodId),
881         PoolTag(kMethodId)},
882        &DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32,
883        &DisassemblerDex::MakeWriteMethodId32},
884       {{4, TypeTag(kProtoIdToParametersTypeList), PoolTag(kTypeList)},
885        &DisassemblerDex::MakeReadProtoIdToParametersTypeList,
886        &DisassemblerDex::MakeWriteAbs32},
887       {{4, TypeTag(kClassDefToInterfacesTypeList), PoolTag(kTypeList)},
888        &DisassemblerDex::MakeReadClassDefToInterfacesTypeList,
889        &DisassemblerDex::MakeWriteAbs32},
890       {{4, TypeTag(kAnnotationsDirectoryToParameterAnnotationSetRef),
891         PoolTag(kAnnotationSetRefList)},
892        &DisassemblerDex::
893            MakeReadAnnotationsDirectoryToParameterAnnotationSetRef,
894        &DisassemblerDex::MakeWriteAbs32},
895       {{4, TypeTag(kAnnotationSetRefListToAnnotationSet),
896         PoolTag(kAnnotionSet)},
897        &DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet,
898        &DisassemblerDex::MakeWriteAbs32},
899       {{4, TypeTag(kAnnotationsDirectoryToClassAnnotationSet),
900         PoolTag(kAnnotionSet)},
901        &DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet,
902        &DisassemblerDex::MakeWriteAbs32},
903       {{4, TypeTag(kAnnotationsDirectoryToFieldAnnotationSet),
904         PoolTag(kAnnotionSet)},
905        &DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet,
906        &DisassemblerDex::MakeWriteAbs32},
907       {{4, TypeTag(kAnnotationsDirectoryToMethodAnnotationSet),
908         PoolTag(kAnnotionSet)},
909        &DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet,
910        &DisassemblerDex::MakeWriteAbs32},
911       {{4, TypeTag(kClassDefToClassData), PoolTag(kClassData)},
912        &DisassemblerDex::MakeReadClassDefToClassData,
913        &DisassemblerDex::MakeWriteAbs32},
914       {{1, TypeTag(kCodeToRelCode8), PoolTag(kCode)},
915        &DisassemblerDex::MakeReadCodeToRelCode8,
916        &DisassemblerDex::MakeWriteRelCode8},
917       {{2, TypeTag(kCodeToRelCode16), PoolTag(kCode)},
918        &DisassemblerDex::MakeReadCodeToRelCode16,
919        &DisassemblerDex::MakeWriteRelCode16},
920       {{4, TypeTag(kCodeToRelCode32), PoolTag(kCode)},
921        &DisassemblerDex::MakeReadCodeToRelCode32,
922        &DisassemblerDex::MakeWriteRelCode32},
923       {{4, TypeTag(kStringIdToStringData), PoolTag(kStringData)},
924        &DisassemblerDex::MakeReadStringIdToStringData,
925        &DisassemblerDex::MakeWriteAbs32},
926       {{4, TypeTag(kAnnotationSetToAnnotation), PoolTag(kAnnotation)},
927        &DisassemblerDex::MakeReadAnnotationSetToAnnotation,
928        &DisassemblerDex::MakeWriteAbs32},
929       {{4, TypeTag(kClassDefToStaticValuesEncodedArray),
930         PoolTag(kEncodedArray)},
931        &DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray,
932        &DisassemblerDex::MakeWriteAbs32},
933       {{4, TypeTag(kClassDefToAnnotationDirectory),
934         PoolTag(kAnnotationsDirectory)},
935        &DisassemblerDex::MakeReadClassDefToAnnotationDirectory,
936        &DisassemblerDex::MakeWriteAbs32},
937   };
938 }
939 
MakeReadStringIdToStringData(offset_t lo,offset_t hi)940 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadStringIdToStringData(
941     offset_t lo,
942     offset_t hi) {
943   // dex::StringIdItem::string_data_off mapper.
944   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
945   return std::make_unique<ItemReferenceReader>(
946       lo, hi, string_map_item_, sizeof(dex::StringIdItem),
947       offsetof(dex::StringIdItem, string_data_off), std::move(mapper));
948 }
949 
950 std::unique_ptr<ReferenceReader>
MakeReadTypeIdToDescriptorStringId32(offset_t lo,offset_t hi)951 DisassemblerDex::MakeReadTypeIdToDescriptorStringId32(offset_t lo,
952                                                       offset_t hi) {
953   auto mapper = base::BindRepeating(
954       ReadTargetIndex<decltype(dex::TypeIdItem::descriptor_idx)>, image_,
955       string_map_item_, sizeof(dex::StringIdItem));
956   return std::make_unique<ItemReferenceReader>(
957       lo, hi, type_map_item_, sizeof(dex::TypeIdItem),
958       offsetof(dex::TypeIdItem, descriptor_idx), std::move(mapper));
959 }
960 
961 std::unique_ptr<ReferenceReader>
MakeReadProtoIdToShortyStringId32(offset_t lo,offset_t hi)962 DisassemblerDex::MakeReadProtoIdToShortyStringId32(offset_t lo, offset_t hi) {
963   auto mapper = base::BindRepeating(
964       ReadTargetIndex<decltype(dex::ProtoIdItem::shorty_idx)>, image_,
965       string_map_item_, sizeof(dex::StringIdItem));
966   return std::make_unique<ItemReferenceReader>(
967       lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem),
968       offsetof(dex::ProtoIdItem, shorty_idx), std::move(mapper));
969 }
970 
971 std::unique_ptr<ReferenceReader>
MakeReadProtoIdToReturnTypeId32(offset_t lo,offset_t hi)972 DisassemblerDex::MakeReadProtoIdToReturnTypeId32(offset_t lo, offset_t hi) {
973   auto mapper = base::BindRepeating(
974       ReadTargetIndex<decltype(dex::ProtoIdItem::return_type_idx)>, image_,
975       type_map_item_, sizeof(dex::TypeIdItem));
976   return std::make_unique<ItemReferenceReader>(
977       lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem),
978       offsetof(dex::ProtoIdItem, return_type_idx), std::move(mapper));
979 }
980 
981 std::unique_ptr<ReferenceReader>
MakeReadProtoIdToParametersTypeList(offset_t lo,offset_t hi)982 DisassemblerDex::MakeReadProtoIdToParametersTypeList(offset_t lo, offset_t hi) {
983   // dex::ProtoIdItem::parameters_off mapper.
984   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
985   return std::make_unique<ItemReferenceReader>(
986       lo, hi, proto_map_item_, sizeof(dex::ProtoIdItem),
987       offsetof(dex::ProtoIdItem, parameters_off), std::move(mapper));
988 }
989 
MakeReadFieldToClassTypeId16(offset_t lo,offset_t hi)990 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToClassTypeId16(
991     offset_t lo,
992     offset_t hi) {
993   auto mapper = base::BindRepeating(
994       ReadTargetIndex<decltype(dex::FieldIdItem::class_idx)>, image_,
995       type_map_item_, sizeof(dex::TypeIdItem));
996   return std::make_unique<ItemReferenceReader>(
997       lo, hi, field_map_item_, sizeof(dex::FieldIdItem),
998       offsetof(dex::FieldIdItem, class_idx), std::move(mapper));
999 }
1000 
MakeReadFieldToTypeId16(offset_t lo,offset_t hi)1001 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToTypeId16(
1002     offset_t lo,
1003     offset_t hi) {
1004   auto mapper =
1005       base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::type_idx)>,
1006                           image_, type_map_item_, sizeof(dex::TypeIdItem));
1007   return std::make_unique<ItemReferenceReader>(
1008       lo, hi, field_map_item_, sizeof(dex::FieldIdItem),
1009       offsetof(dex::FieldIdItem, type_idx), std::move(mapper));
1010 }
1011 
MakeReadFieldToNameStringId32(offset_t lo,offset_t hi)1012 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToNameStringId32(
1013     offset_t lo,
1014     offset_t hi) {
1015   auto mapper =
1016       base::BindRepeating(ReadTargetIndex<decltype(dex::FieldIdItem::name_idx)>,
1017                           image_, string_map_item_, sizeof(dex::StringIdItem));
1018   return std::make_unique<ItemReferenceReader>(
1019       lo, hi, field_map_item_, sizeof(dex::FieldIdItem),
1020       offsetof(dex::FieldIdItem, name_idx), std::move(mapper));
1021 }
1022 
1023 std::unique_ptr<ReferenceReader>
MakeReadMethodIdToClassTypeId16(offset_t lo,offset_t hi)1024 DisassemblerDex::MakeReadMethodIdToClassTypeId16(offset_t lo, offset_t hi) {
1025   auto mapper = base::BindRepeating(
1026       ReadTargetIndex<decltype(dex::MethodIdItem::class_idx)>, image_,
1027       type_map_item_, sizeof(dex::TypeIdItem));
1028   return std::make_unique<ItemReferenceReader>(
1029       lo, hi, method_map_item_, sizeof(dex::MethodIdItem),
1030       offsetof(dex::MethodIdItem, class_idx), std::move(mapper));
1031 }
1032 
MakeReadMethodIdToProtoId16(offset_t lo,offset_t hi)1033 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadMethodIdToProtoId16(
1034     offset_t lo,
1035     offset_t hi) {
1036   auto mapper = base::BindRepeating(
1037       ReadTargetIndex<decltype(dex::MethodIdItem::proto_idx)>, image_,
1038       proto_map_item_, sizeof(dex::ProtoIdItem));
1039   return std::make_unique<ItemReferenceReader>(
1040       lo, hi, method_map_item_, sizeof(dex::MethodIdItem),
1041       offsetof(dex::MethodIdItem, proto_idx), std::move(mapper));
1042 }
1043 
1044 std::unique_ptr<ReferenceReader>
MakeReadMethodIdToNameStringId32(offset_t lo,offset_t hi)1045 DisassemblerDex::MakeReadMethodIdToNameStringId32(offset_t lo, offset_t hi) {
1046   auto mapper = base::BindRepeating(
1047       ReadTargetIndex<decltype(dex::MethodIdItem::name_idx)>, image_,
1048       string_map_item_, sizeof(dex::StringIdItem));
1049   return std::make_unique<ItemReferenceReader>(
1050       lo, hi, method_map_item_, sizeof(dex::MethodIdItem),
1051       offsetof(dex::MethodIdItem, name_idx), std::move(mapper));
1052 }
1053 
1054 std::unique_ptr<ReferenceReader>
MakeReadClassDefToClassTypeId32(offset_t lo,offset_t hi)1055 DisassemblerDex::MakeReadClassDefToClassTypeId32(offset_t lo, offset_t hi) {
1056   auto mapper = base::BindRepeating(
1057       ReadTargetIndex<decltype(dex::ClassDefItem::superclass_idx)>, image_,
1058       type_map_item_, sizeof(dex::TypeIdItem));
1059   return std::make_unique<ItemReferenceReader>(
1060       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1061       offsetof(dex::ClassDefItem, class_idx), std::move(mapper));
1062 }
1063 
1064 std::unique_ptr<ReferenceReader>
MakeReadClassDefToSuperClassTypeId32(offset_t lo,offset_t hi)1065 DisassemblerDex::MakeReadClassDefToSuperClassTypeId32(offset_t lo,
1066                                                       offset_t hi) {
1067   auto mapper = base::BindRepeating(
1068       ReadTargetIndex<decltype(dex::ClassDefItem::superclass_idx)>, image_,
1069       type_map_item_, sizeof(dex::TypeIdItem));
1070   return std::make_unique<ItemReferenceReader>(
1071       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1072       offsetof(dex::ClassDefItem, superclass_idx), std::move(mapper));
1073 }
1074 
1075 std::unique_ptr<ReferenceReader>
MakeReadClassDefToInterfacesTypeList(offset_t lo,offset_t hi)1076 DisassemblerDex::MakeReadClassDefToInterfacesTypeList(offset_t lo,
1077                                                       offset_t hi) {
1078   // dex::ClassDefItem::interfaces_off mapper.
1079   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1080   return std::make_unique<ItemReferenceReader>(
1081       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1082       offsetof(dex::ClassDefItem, interfaces_off), std::move(mapper));
1083 }
1084 
1085 std::unique_ptr<ReferenceReader>
MakeReadClassDefToSourceFileStringId32(offset_t lo,offset_t hi)1086 DisassemblerDex::MakeReadClassDefToSourceFileStringId32(offset_t lo,
1087                                                         offset_t hi) {
1088   auto mapper = base::BindRepeating(
1089       ReadTargetIndex<decltype(dex::ClassDefItem::source_file_idx)>, image_,
1090       string_map_item_, sizeof(dex::StringIdItem));
1091   return std::make_unique<ItemReferenceReader>(
1092       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1093       offsetof(dex::ClassDefItem, source_file_idx), std::move(mapper));
1094 }
1095 
1096 std::unique_ptr<ReferenceReader>
MakeReadClassDefToAnnotationDirectory(offset_t lo,offset_t hi)1097 DisassemblerDex::MakeReadClassDefToAnnotationDirectory(offset_t lo,
1098                                                        offset_t hi) {
1099   // dex::ClassDefItem::annotations_off mapper.
1100   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1101   return std::make_unique<ItemReferenceReader>(
1102       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1103       offsetof(dex::ClassDefItem, annotations_off), std::move(mapper));
1104 }
1105 
MakeReadClassDefToClassData(offset_t lo,offset_t hi)1106 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadClassDefToClassData(
1107     offset_t lo,
1108     offset_t hi) {
1109   // dex::ClassDefItem::class_data_off mapper.
1110   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1111   return std::make_unique<ItemReferenceReader>(
1112       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1113       offsetof(dex::ClassDefItem, class_data_off), std::move(mapper));
1114 }
1115 
1116 std::unique_ptr<ReferenceReader>
MakeReadClassDefToStaticValuesEncodedArray(offset_t lo,offset_t hi)1117 DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray(offset_t lo,
1118                                                             offset_t hi) {
1119   // dex::ClassDefItem::static_values_off mapper.
1120   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1121   return std::make_unique<ItemReferenceReader>(
1122       lo, hi, class_def_map_item_, sizeof(dex::ClassDefItem),
1123       offsetof(dex::ClassDefItem, static_values_off), std::move(mapper));
1124 }
1125 
MakeReadTypeListToTypeId16(offset_t lo,offset_t hi)1126 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadTypeListToTypeId16(
1127     offset_t lo,
1128     offset_t hi) {
1129   auto mapper =
1130       base::BindRepeating(ReadTargetIndex<decltype(dex::TypeItem::type_idx)>,
1131                           image_, type_map_item_, sizeof(dex::TypeIdItem));
1132   return std::make_unique<CachedItemListReferenceReader>(
1133       lo, hi, offsetof(dex::TypeItem, type_idx), type_list_offsets_,
1134       std::move(mapper));
1135 }
1136 
1137 std::unique_ptr<ReferenceReader>
MakeReadAnnotationSetToAnnotation(offset_t lo,offset_t hi)1138 DisassemblerDex::MakeReadAnnotationSetToAnnotation(offset_t lo, offset_t hi) {
1139   // dex::AnnotationOffItem::annotation_off mapper.
1140   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1141   return std::make_unique<CachedItemListReferenceReader>(
1142       lo, hi, offsetof(dex::AnnotationOffItem, annotation_off),
1143       annotation_set_offsets_, std::move(mapper));
1144 }
1145 
1146 std::unique_ptr<ReferenceReader>
MakeReadAnnotationSetRefListToAnnotationSet(offset_t lo,offset_t hi)1147 DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet(offset_t lo,
1148                                                              offset_t hi) {
1149   // dex::AnnotationSetRefItem::annotations_off mapper.
1150   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1151   return std::make_unique<CachedItemListReferenceReader>(
1152       lo, hi, offsetof(dex::AnnotationSetRefItem, annotations_off),
1153       annotation_set_ref_list_offsets_, std::move(mapper));
1154 }
1155 
1156 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo,offset_t hi)1157 DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo,
1158                                                                   offset_t hi) {
1159   // dex::AnnotationsDirectoryItem::class_annotations_off mapper.
1160   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1161   return std::make_unique<CachedItemListReferenceReader>(
1162       lo, hi, offsetof(dex::AnnotationsDirectoryItem, class_annotations_off),
1163       annotations_directory_item_offsets_, std::move(mapper));
1164 }
1165 
1166 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToFieldId32(offset_t lo,offset_t hi)1167 DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32(offset_t lo,
1168                                                          offset_t hi) {
1169   auto mapper = base::BindRepeating(
1170       ReadTargetIndex<decltype(dex::FieldAnnotation::field_idx)>, image_,
1171       field_map_item_, sizeof(dex::FieldIdItem));
1172   return std::make_unique<CachedItemListReferenceReader>(
1173       lo, hi, offsetof(dex::FieldAnnotation, field_idx),
1174       annotations_directory_item_field_annotation_offsets_, std::move(mapper));
1175 }
1176 
1177 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo,offset_t hi)1178 DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo,
1179                                                                   offset_t hi) {
1180   // dex::FieldAnnotation::annotations_off mapper.
1181   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1182   return std::make_unique<CachedItemListReferenceReader>(
1183       lo, hi, offsetof(dex::FieldAnnotation, annotations_off),
1184       annotations_directory_item_field_annotation_offsets_, std::move(mapper));
1185 }
1186 
1187 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToMethodId32(offset_t lo,offset_t hi)1188 DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32(offset_t lo,
1189                                                           offset_t hi) {
1190   auto mapper = base::BindRepeating(
1191       ReadTargetIndex<decltype(dex::MethodAnnotation::method_idx)>, image_,
1192       method_map_item_, sizeof(dex::MethodIdItem));
1193   return std::make_unique<CachedItemListReferenceReader>(
1194       lo, hi, offsetof(dex::MethodAnnotation, method_idx),
1195       annotations_directory_item_method_annotation_offsets_, std::move(mapper));
1196 }
1197 
1198 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToMethodAnnotationSet(offset_t lo,offset_t hi)1199 DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet(
1200     offset_t lo,
1201     offset_t hi) {
1202   // dex::MethodAnnotation::annotations_off mapper.
1203   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1204   return std::make_unique<CachedItemListReferenceReader>(
1205       lo, hi, offsetof(dex::MethodAnnotation, annotations_off),
1206       annotations_directory_item_method_annotation_offsets_, std::move(mapper));
1207 }
1208 
1209 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToParameterMethodId32(offset_t lo,offset_t hi)1210 DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32(
1211     offset_t lo,
1212     offset_t hi) {
1213   auto mapper = base::BindRepeating(
1214       ReadTargetIndex<decltype(dex::ParameterAnnotation::method_idx)>, image_,
1215       method_map_item_, sizeof(dex::MethodIdItem));
1216   return std::make_unique<CachedItemListReferenceReader>(
1217       lo, hi, offsetof(dex::ParameterAnnotation, method_idx),
1218       annotations_directory_item_parameter_annotation_offsets_,
1219       std::move(mapper));
1220 }
1221 
1222 std::unique_ptr<ReferenceReader>
MakeReadAnnotationsDirectoryToParameterAnnotationSetRef(offset_t lo,offset_t hi)1223 DisassemblerDex::MakeReadAnnotationsDirectoryToParameterAnnotationSetRef(
1224     offset_t lo,
1225     offset_t hi) {
1226   // dex::ParameterAnnotation::annotations_off mapper.
1227   auto mapper = base::BindRepeating(ReadTargetOffset32, image_);
1228   return std::make_unique<CachedItemListReferenceReader>(
1229       lo, hi, offsetof(dex::ParameterAnnotation, annotations_off),
1230       annotations_directory_item_parameter_annotation_offsets_,
1231       std::move(mapper));
1232 }
1233 
MakeReadCodeToStringId16(offset_t lo,offset_t hi)1234 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId16(
1235     offset_t lo,
1236     offset_t hi) {
1237   auto filter = base::BindRepeating(
1238       [](const InstructionParser::Value& value) -> offset_t {
1239         if (value.instr->format == dex::FormatId::c &&
1240             (value.instr->opcode == 0x1A)) {  // const-string
1241           // BBBB from e.g., const-string vAA, string@BBBB.
1242           return value.instr_offset + 2;
1243         }
1244         return kInvalidOffset;
1245       });
1246   auto mapper =
1247       base::BindRepeating(ReadTargetIndex<uint16_t>, image_, string_map_item_,
1248                           sizeof(dex::StringIdItem));
1249   return std::make_unique<InstructionReferenceReader>(
1250       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1251 }
1252 
MakeReadCodeToStringId32(offset_t lo,offset_t hi)1253 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId32(
1254     offset_t lo,
1255     offset_t hi) {
1256   auto filter = base::BindRepeating(
1257       [](const InstructionParser::Value& value) -> offset_t {
1258         if (value.instr->format == dex::FormatId::c &&
1259             (value.instr->opcode == 0x1B)) {  // const-string/jumbo
1260           // BBBBBBBB from e.g., const-string/jumbo vAA, string@BBBBBBBB.
1261           return value.instr_offset + 2;
1262         }
1263         return kInvalidOffset;
1264       });
1265   auto mapper =
1266       base::BindRepeating(ReadTargetIndex<uint32_t>, image_, string_map_item_,
1267                           sizeof(dex::StringIdItem));
1268   return std::make_unique<InstructionReferenceReader>(
1269       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1270 }
1271 
MakeReadCodeToTypeId16(offset_t lo,offset_t hi)1272 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToTypeId16(
1273     offset_t lo,
1274     offset_t hi) {
1275   auto filter = base::BindRepeating(
1276       [](const InstructionParser::Value& value) -> offset_t {
1277         if (value.instr->format == dex::FormatId::c &&
1278             (value.instr->opcode == 0x1C ||   // const-class
1279              value.instr->opcode == 0x1F ||   // check-cast
1280              value.instr->opcode == 0x20 ||   // instance-of
1281              value.instr->opcode == 0x22 ||   // new-instance
1282              value.instr->opcode == 0x23 ||   // new-array
1283              value.instr->opcode == 0x24 ||   // filled-new-array
1284              value.instr->opcode == 0x25)) {  // filled-new-array/range
1285           // BBBB from e.g., const-class vAA, type@BBBB.
1286           return value.instr_offset + 2;
1287         }
1288         return kInvalidOffset;
1289       });
1290   auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_,
1291                                     type_map_item_, sizeof(dex::TypeIdItem));
1292   return std::make_unique<InstructionReferenceReader>(
1293       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1294 }
1295 
MakeReadCodeToFieldId16(offset_t lo,offset_t hi)1296 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToFieldId16(
1297     offset_t lo,
1298     offset_t hi) {
1299   auto filter = base::BindRepeating(
1300       [](const InstructionParser::Value& value) -> offset_t {
1301         if (value.instr->format == dex::FormatId::c &&
1302             (value.instr->opcode == 0x52 ||   // iinstanceop (iget-*, iput-*)
1303              value.instr->opcode == 0x60)) {  // sstaticop (sget-*, sput-*)
1304           // CCCC from e.g., iget vA, vB, field@CCCC.
1305           return value.instr_offset + 2;
1306         }
1307         return kInvalidOffset;
1308       });
1309   auto mapper = base::BindRepeating(ReadTargetIndex<uint16_t>, image_,
1310                                     field_map_item_, sizeof(dex::FieldIdItem));
1311   return std::make_unique<InstructionReferenceReader>(
1312       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1313 }
1314 
MakeReadCodeToMethodId16(offset_t lo,offset_t hi)1315 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToMethodId16(
1316     offset_t lo,
1317     offset_t hi) {
1318   auto filter = base::BindRepeating(
1319       [](const InstructionParser::Value& value) -> offset_t {
1320         if (value.instr->format == dex::FormatId::c &&
1321             (value.instr->opcode == 0x6E ||   // invoke-kind
1322              value.instr->opcode == 0x74)) {  // invoke-kind/range
1323           // BBBB from e.g., invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB.
1324           return value.instr_offset + 2;
1325         }
1326         return kInvalidOffset;
1327       });
1328   auto mapper =
1329       base::BindRepeating(ReadTargetIndex<uint16_t>, image_, method_map_item_,
1330                           sizeof(dex::MethodIdItem));
1331   return std::make_unique<InstructionReferenceReader>(
1332       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1333 }
1334 
MakeReadCodeToRelCode8(offset_t lo,offset_t hi)1335 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode8(
1336     offset_t lo,
1337     offset_t hi) {
1338   auto filter = base::BindRepeating(
1339       [](const InstructionParser::Value& value) -> offset_t {
1340         if (value.instr->format == dex::FormatId::t &&
1341             value.instr->opcode == 0x28) {  // goto
1342           // +AA from e.g., goto +AA.
1343           return value.instr_offset + 1;
1344         }
1345         return kInvalidOffset;
1346       });
1347   auto mapper = base::BindRepeating(
1348       [](DisassemblerDex* dis, offset_t location) {
1349         // Address is relative to the current instruction, which begins 1 unit
1350         // before |location|. This needs to be subtracted out. Also, store as
1351         // int32_t so |unsafe_delta - 1| won't underflow!
1352         int32_t unsafe_delta = dis->image_.read<int8_t>(location);
1353         offset_t unsafe_target = static_cast<offset_t>(
1354             location + (unsafe_delta - 1) * kInstrUnitSize);
1355         // TODO(huangs): Check that |unsafe_target| stays within code item.
1356         return unsafe_target;
1357       },
1358       base::Unretained(this));
1359   return std::make_unique<InstructionReferenceReader>(
1360       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1361 }
1362 
MakeReadCodeToRelCode16(offset_t lo,offset_t hi)1363 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode16(
1364     offset_t lo,
1365     offset_t hi) {
1366   auto filter = base::BindRepeating(
1367       [](const InstructionParser::Value& value) -> offset_t {
1368         if (value.instr->format == dex::FormatId::t &&
1369             (value.instr->opcode == 0x29 ||   // goto/16
1370              value.instr->opcode == 0x32 ||   // if-test
1371              value.instr->opcode == 0x38)) {  // if-testz
1372           // +AAAA from e.g., goto/16 +AAAA.
1373           return value.instr_offset + 2;
1374         }
1375         return kInvalidOffset;
1376       });
1377   auto mapper = base::BindRepeating(
1378       [](DisassemblerDex* dis, offset_t location) {
1379         // Address is relative to the current instruction, which begins 1 unit
1380         // before |location|. This needs to be subtracted out. Also, store as
1381         // int32_t so |unsafe_delta - 1| won't underflow!
1382         int32_t unsafe_delta = dis->image_.read<int16_t>(location);
1383         offset_t unsafe_target = static_cast<offset_t>(
1384             location + (unsafe_delta - 1) * kInstrUnitSize);
1385         // TODO(huangs): Check that |unsafe_target| stays within code item.
1386         return unsafe_target;
1387       },
1388       base::Unretained(this));
1389   return std::make_unique<InstructionReferenceReader>(
1390       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1391 }
1392 
MakeReadCodeToRelCode32(offset_t lo,offset_t hi)1393 std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode32(
1394     offset_t lo,
1395     offset_t hi) {
1396   auto filter = base::BindRepeating(
1397       [](const InstructionParser::Value& value) -> offset_t {
1398         if (value.instr->format == dex::FormatId::t &&
1399             (value.instr->opcode == 0x26 ||   // fill-array-data
1400              value.instr->opcode == 0x2A ||   // goto/32
1401              value.instr->opcode == 0x2B ||   // packed-switch
1402              value.instr->opcode == 0x2C)) {  // sparse-switch
1403           // +BBBBBBBB from e.g., fill-array-data vAA, +BBBBBBBB.
1404           // +AAAAAAAA from e.g., goto/32 +AAAAAAAA.
1405           return value.instr_offset + 2;
1406         }
1407         return kInvalidOffset;
1408       });
1409   auto mapper = base::BindRepeating(
1410       [](DisassemblerDex* dis, offset_t location) {
1411         // Address is relative to the current instruction, which begins 1 unit
1412         // before |location|. This needs to be subtracted out. Use int64_t to
1413         // avoid underflow and overflow.
1414         int64_t unsafe_delta = dis->image_.read<int32_t>(location);
1415         int64_t unsafe_target = location + (unsafe_delta - 1) * kInstrUnitSize;
1416 
1417         // TODO(huangs): Check that |unsafe_target| stays within code item.
1418         offset_t checked_unsafe_target =
1419             static_cast<offset_t>(base::CheckedNumeric<offset_t>(unsafe_target)
1420                                       .ValueOrDefault(kInvalidOffset));
1421         return checked_unsafe_target < kOffsetBound ? checked_unsafe_target
1422                                                     : kInvalidOffset;
1423       },
1424       base::Unretained(this));
1425   return std::make_unique<InstructionReferenceReader>(
1426       image_, lo, hi, code_item_offsets_, std::move(filter), std::move(mapper));
1427 }
1428 
MakeWriteStringId16(MutableBufferView image)1429 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId16(
1430     MutableBufferView image) {
1431   auto writer = base::BindRepeating(
1432       WriteTargetIndex<uint16_t>, string_map_item_, sizeof(dex::StringIdItem));
1433   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1434 }
1435 
MakeWriteStringId32(MutableBufferView image)1436 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId32(
1437     MutableBufferView image) {
1438   auto writer = base::BindRepeating(
1439       WriteTargetIndex<uint32_t>, string_map_item_, sizeof(dex::StringIdItem));
1440   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1441 }
1442 
MakeWriteTypeId16(MutableBufferView image)1443 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId16(
1444     MutableBufferView image) {
1445   auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, type_map_item_,
1446                                     sizeof(dex::TypeIdItem));
1447   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1448 }
1449 
MakeWriteTypeId32(MutableBufferView image)1450 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId32(
1451     MutableBufferView image) {
1452   auto writer = base::BindRepeating(WriteTargetIndex<uint32_t>, type_map_item_,
1453                                     sizeof(dex::TypeIdItem));
1454   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1455 }
1456 
MakeWriteProtoId16(MutableBufferView image)1457 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteProtoId16(
1458     MutableBufferView image) {
1459   auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, proto_map_item_,
1460                                     sizeof(dex::ProtoIdItem));
1461   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1462 }
1463 
MakeWriteFieldId16(MutableBufferView image)1464 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId16(
1465     MutableBufferView image) {
1466   auto writer = base::BindRepeating(WriteTargetIndex<uint16_t>, field_map_item_,
1467                                     sizeof(dex::FieldIdItem));
1468   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1469 }
1470 
MakeWriteFieldId32(MutableBufferView image)1471 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId32(
1472     MutableBufferView image) {
1473   auto writer = base::BindRepeating(WriteTargetIndex<uint32_t>, field_map_item_,
1474                                     sizeof(dex::FieldIdItem));
1475   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1476 }
1477 
MakeWriteMethodId16(MutableBufferView image)1478 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId16(
1479     MutableBufferView image) {
1480   auto writer = base::BindRepeating(
1481       WriteTargetIndex<uint16_t>, method_map_item_, sizeof(dex::MethodIdItem));
1482   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1483 }
1484 
MakeWriteMethodId32(MutableBufferView image)1485 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId32(
1486     MutableBufferView image) {
1487   auto writer = base::BindRepeating(
1488       WriteTargetIndex<uint32_t>, method_map_item_, sizeof(dex::MethodIdItem));
1489   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1490 }
1491 
MakeWriteRelCode8(MutableBufferView image)1492 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode8(
1493     MutableBufferView image) {
1494   auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
1495     ptrdiff_t unsafe_byte_diff =
1496         static_cast<ptrdiff_t>(ref.target) - ref.location;
1497     DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize);
1498     // |delta| is relative to start of instruction, which is 1 unit before
1499     // |ref.location|. The subtraction above removed too much, so +1 to fix.
1500     base::CheckedNumeric<int8_t> delta((unsafe_byte_diff / kInstrUnitSize) + 1);
1501     if (!delta.IsValid()) {
1502       LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << ".";
1503       return;
1504     }
1505     image.write<int8_t>(ref.location, delta.ValueOrDie());
1506   });
1507   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1508 }
1509 
MakeWriteRelCode16(MutableBufferView image)1510 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode16(
1511     MutableBufferView image) {
1512   auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
1513     ptrdiff_t unsafe_byte_diff =
1514         static_cast<ptrdiff_t>(ref.target) - ref.location;
1515     DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize);
1516     // |delta| is relative to start of instruction, which is 1 unit before
1517     // |ref.location|. The subtraction above removed too much, so +1 to fix.
1518     base::CheckedNumeric<int16_t> delta((unsafe_byte_diff / kInstrUnitSize) +
1519                                         1);
1520     if (!delta.IsValid()) {
1521       LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << ".";
1522       return;
1523     }
1524     image.write<int16_t>(ref.location, delta.ValueOrDie());
1525   });
1526   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1527 }
1528 
MakeWriteRelCode32(MutableBufferView image)1529 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode32(
1530     MutableBufferView image) {
1531   auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
1532     ptrdiff_t unsafe_byte_diff =
1533         static_cast<ptrdiff_t>(ref.target) - ref.location;
1534     DCHECK_EQ(0, unsafe_byte_diff % kInstrUnitSize);
1535     // |delta| is relative to start of instruction, which is 1 unit before
1536     // |ref.location|. The subtraction above removed too much, so +1 to fix.
1537     base::CheckedNumeric<int32_t> delta((unsafe_byte_diff / kInstrUnitSize) +
1538                                         1);
1539     if (!delta.IsValid()) {
1540       LOG(ERROR) << "Invalid reference at: " << AsHex<8>(ref.location) << ".";
1541       return;
1542     }
1543     image.write<int32_t>(ref.location, delta.ValueOrDie());
1544   });
1545   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1546 }
1547 
MakeWriteAbs32(MutableBufferView image)1548 std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteAbs32(
1549     MutableBufferView image) {
1550   auto writer = base::BindRepeating([](Reference ref, MutableBufferView image) {
1551     image.write<uint32_t>(ref.location, ref.target);
1552   });
1553   return std::make_unique<ReferenceWriterAdaptor>(image, std::move(writer));
1554 }
1555 
Parse(ConstBufferView image)1556 bool DisassemblerDex::Parse(ConstBufferView image) {
1557   image_ = image;
1558   return ParseHeader();
1559 }
1560 
ParseHeader()1561 bool DisassemblerDex::ParseHeader() {
1562   ReadDexHeaderResults results;
1563   if (!ReadDexHeader(image_, &results))
1564     return false;
1565 
1566   header_ = results.header;
1567   dex_version_ = results.dex_version;
1568   BufferSource source = results.source;
1569 
1570   // DEX header contains file size, so use it to resize |image_| right away.
1571   image_.shrink(header_->file_size);
1572 
1573   // Read map list. This is not a fixed-size array, so instead of reading
1574   // MapList directly, read |MapList::size| first, then visit elements in
1575   // |MapList::list|.
1576   static_assert(
1577       offsetof(dex::MapList, list) == sizeof(decltype(dex::MapList::size)),
1578       "MapList size error.");
1579   source = std::move(BufferSource(image_).Skip(header_->map_off));
1580   decltype(dex::MapList::size) list_size = 0;
1581   if (!source.GetValue(&list_size) || list_size > dex::kMaxItemListSize)
1582     return false;
1583   const auto* item_list = source.GetArray<const dex::MapItem>(list_size);
1584   if (!item_list)
1585     return false;
1586 
1587   // Read and validate map list, ensuring that required item types are present.
1588   // - GetItemBaseSize() should have an entry for each item.
1589   // - dex::kTypeCodeItem is actually not required; it's possible to have a DEX
1590   //   file with classes that have no code. However, this is unlikely to appear
1591   //   in application, so for simplicity we require DEX files to have code.
1592   std::set<uint16_t> required_item_types = {
1593       dex::kTypeStringIdItem, dex::kTypeTypeIdItem,   dex::kTypeProtoIdItem,
1594       dex::kTypeFieldIdItem,  dex::kTypeMethodIdItem, dex::kTypeClassDefItem,
1595       dex::kTypeTypeList,     dex::kTypeCodeItem,
1596   };
1597   for (offset_t i = 0; i < list_size; ++i) {
1598     const dex::MapItem* item = &item_list[i];
1599     // Reject unreasonably large |item->size|.
1600     size_t item_size = GetItemBaseSize(item->type);
1601     // Confusing name: |item->size| is actually the number of items.
1602     if (!image_.covers_array(item->offset, item->size, item_size))
1603       return false;
1604     if (!map_item_map_.insert(std::make_pair(item->type, item)).second)
1605       return false;  // A given type must appear at most once.
1606     required_item_types.erase(item->type);
1607   }
1608   // TODO(huangs): Replace this with guards throughout file.
1609   if (!required_item_types.empty())
1610     return false;
1611 
1612   // Make local copies of main map items.
1613   string_map_item_ = *map_item_map_[dex::kTypeStringIdItem];
1614   type_map_item_ = *map_item_map_[dex::kTypeTypeIdItem];
1615   proto_map_item_ = *map_item_map_[dex::kTypeProtoIdItem];
1616   field_map_item_ = *map_item_map_[dex::kTypeFieldIdItem];
1617   method_map_item_ = *map_item_map_[dex::kTypeMethodIdItem];
1618   class_def_map_item_ = *map_item_map_[dex::kTypeClassDefItem];
1619   type_list_map_item_ = *map_item_map_[dex::kTypeTypeList];
1620   code_map_item_ = *map_item_map_[dex::kTypeCodeItem];
1621 
1622   // The following types are optional and may not be present in every DEX file.
1623   if (map_item_map_.count(dex::kTypeAnnotationSetRefList)) {
1624     annotation_set_ref_list_map_item_ =
1625         *map_item_map_[dex::kTypeAnnotationSetRefList];
1626   }
1627   if (map_item_map_.count(dex::kTypeAnnotationSetItem))
1628     annotation_set_map_item_ = *map_item_map_[dex::kTypeAnnotationSetItem];
1629   if (map_item_map_.count(dex::kTypeAnnotationsDirectoryItem)) {
1630     annotations_directory_map_item_ =
1631         *map_item_map_[dex::kTypeAnnotationsDirectoryItem];
1632   }
1633 
1634   // Iteratively parse variable length lists, annotations directory items, and
1635   // code items blocks. Any failure would indicate invalid DEX. Success
1636   // indicates that no structural problem is found. However, contained
1637   // references data read from parsed items still require validation.
1638   if (!(ParseItemOffsets(image_, type_list_map_item_, sizeof(dex::TypeItem),
1639                          &type_list_offsets_) &&
1640         ParseItemOffsets(image_, annotation_set_ref_list_map_item_,
1641                          sizeof(dex::AnnotationSetRefItem),
1642                          &annotation_set_ref_list_offsets_) &&
1643         ParseItemOffsets(image_, annotation_set_map_item_,
1644                          sizeof(dex::AnnotationOffItem),
1645                          &annotation_set_offsets_) &&
1646         ParseAnnotationsDirectoryItems(
1647             image_, annotations_directory_map_item_,
1648             &annotations_directory_item_offsets_,
1649             &annotations_directory_item_field_annotation_offsets_,
1650             &annotations_directory_item_method_annotation_offsets_,
1651             &annotations_directory_item_parameter_annotation_offsets_))) {
1652     return false;
1653   }
1654   CodeItemParser code_item_parser(image_);
1655   if (!code_item_parser.Init(code_map_item_))
1656     return false;
1657   code_item_offsets_.resize(code_map_item_.size);
1658   for (size_t i = 0; i < code_map_item_.size; ++i) {
1659     const offset_t code_item_offset = code_item_parser.GetNext();
1660     if (code_item_offset == kInvalidOffset)
1661       return false;
1662     code_item_offsets_[i] = code_item_offset;
1663   }
1664   // DEX files are required to have parsable code items.
1665   return !code_item_offsets_.empty();
1666 }
1667 
1668 }  // namespace zucchini
1669