1 //===-- DataExtractor.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/DataExtractor.h"
10 
11 #include "lldb/lldb-defines.h"
12 #include "lldb/lldb-enumerations.h"
13 #include "lldb/lldb-forward.h"
14 #include "lldb/lldb-types.h"
15 
16 #include "lldb/Utility/DataBuffer.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/LLDBAssert.h"
19 #include "lldb/Utility/Log.h"
20 #include "lldb/Utility/Stream.h"
21 #include "lldb/Utility/StreamString.h"
22 #include "lldb/Utility/UUID.h"
23 
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Support/LEB128.h"
27 #include "llvm/Support/MD5.h"
28 #include "llvm/Support/MathExtras.h"
29 
30 #include <algorithm>
31 #include <array>
32 #include <cassert>
33 #include <cstdint>
34 #include <string>
35 
36 #include <cctype>
37 #include <cinttypes>
38 #include <cstring>
39 
40 using namespace lldb;
41 using namespace lldb_private;
42 
43 static inline uint16_t ReadInt16(const unsigned char *ptr, offset_t offset) {
44   uint16_t value;
45   memcpy(&value, ptr + offset, 2);
46   return value;
47 }
48 
49 static inline uint32_t ReadInt32(const unsigned char *ptr,
50                                  offset_t offset = 0) {
51   uint32_t value;
52   memcpy(&value, ptr + offset, 4);
53   return value;
54 }
55 
56 static inline uint64_t ReadInt64(const unsigned char *ptr,
57                                  offset_t offset = 0) {
58   uint64_t value;
59   memcpy(&value, ptr + offset, 8);
60   return value;
61 }
62 
63 static inline uint16_t ReadInt16(const void *ptr) {
64   uint16_t value;
65   memcpy(&value, ptr, 2);
66   return value;
67 }
68 
69 static inline uint16_t ReadSwapInt16(const unsigned char *ptr,
70                                      offset_t offset) {
71   uint16_t value;
72   memcpy(&value, ptr + offset, 2);
73   return llvm::ByteSwap_16(value);
74 }
75 
76 static inline uint32_t ReadSwapInt32(const unsigned char *ptr,
77                                      offset_t offset) {
78   uint32_t value;
79   memcpy(&value, ptr + offset, 4);
80   return llvm::ByteSwap_32(value);
81 }
82 
83 static inline uint64_t ReadSwapInt64(const unsigned char *ptr,
84                                      offset_t offset) {
85   uint64_t value;
86   memcpy(&value, ptr + offset, 8);
87   return llvm::ByteSwap_64(value);
88 }
89 
90 static inline uint16_t ReadSwapInt16(const void *ptr) {
91   uint16_t value;
92   memcpy(&value, ptr, 2);
93   return llvm::ByteSwap_16(value);
94 }
95 
96 static inline uint32_t ReadSwapInt32(const void *ptr) {
97   uint32_t value;
98   memcpy(&value, ptr, 4);
99   return llvm::ByteSwap_32(value);
100 }
101 
102 static inline uint64_t ReadSwapInt64(const void *ptr) {
103   uint64_t value;
104   memcpy(&value, ptr, 8);
105   return llvm::ByteSwap_64(value);
106 }
107 
108 static inline uint64_t ReadMaxInt64(const uint8_t *data, size_t byte_size,
109                                     ByteOrder byte_order) {
110   uint64_t res = 0;
111   if (byte_order == eByteOrderBig)
112     for (size_t i = 0; i < byte_size; ++i)
113       res = (res << 8) | data[i];
114   else {
115     assert(byte_order == eByteOrderLittle);
116     for (size_t i = 0; i < byte_size; ++i)
117       res = (res << 8) | data[byte_size - 1 - i];
118   }
119   return res;
120 }
121 
122 DataExtractor::DataExtractor()
123     : m_byte_order(endian::InlHostByteOrder()), m_addr_size(sizeof(void *)),
124       m_data_sp() {}
125 
126 // This constructor allows us to use data that is owned by someone else. The
127 // data must stay around as long as this object is valid.
128 DataExtractor::DataExtractor(const void *data, offset_t length,
129                              ByteOrder endian, uint32_t addr_size,
130                              uint32_t target_byte_size /*=1*/)
131     : m_start(const_cast<uint8_t *>(static_cast<const uint8_t *>(data))),
132       m_end(const_cast<uint8_t *>(static_cast<const uint8_t *>(data)) + length),
133       m_byte_order(endian), m_addr_size(addr_size), m_data_sp(),
134       m_target_byte_size(target_byte_size) {
135   assert(addr_size >= 1 && addr_size <= 8);
136 }
137 
138 // Make a shared pointer reference to the shared data in "data_sp" and set the
139 // endian swapping setting to "swap", and the address size to "addr_size". The
140 // shared data reference will ensure the data lives as long as any
141 // DataExtractor objects exist that have a reference to this data.
142 DataExtractor::DataExtractor(const DataBufferSP &data_sp, ByteOrder endian,
143                              uint32_t addr_size,
144                              uint32_t target_byte_size /*=1*/)
145     : m_byte_order(endian), m_addr_size(addr_size), m_data_sp(),
146       m_target_byte_size(target_byte_size) {
147   assert(addr_size >= 1 && addr_size <= 8);
148   SetData(data_sp);
149 }
150 
151 // Initialize this object with a subset of the data bytes in "data". If "data"
152 // contains shared data, then a reference to this shared data will added and
153 // the shared data will stay around as long as any object contains a reference
154 // to that data. The endian swap and address size settings are copied from
155 // "data".
156 DataExtractor::DataExtractor(const DataExtractor &data, offset_t offset,
157                              offset_t length, uint32_t target_byte_size /*=1*/)
158     : m_byte_order(data.m_byte_order), m_addr_size(data.m_addr_size),
159       m_data_sp(), m_target_byte_size(target_byte_size) {
160   assert(m_addr_size >= 1 && m_addr_size <= 8);
161   if (data.ValidOffset(offset)) {
162     offset_t bytes_available = data.GetByteSize() - offset;
163     if (length > bytes_available)
164       length = bytes_available;
165     SetData(data, offset, length);
166   }
167 }
168 
169 DataExtractor::DataExtractor(const DataExtractor &rhs)
170     : m_start(rhs.m_start), m_end(rhs.m_end), m_byte_order(rhs.m_byte_order),
171       m_addr_size(rhs.m_addr_size), m_data_sp(rhs.m_data_sp),
172       m_target_byte_size(rhs.m_target_byte_size) {
173   assert(m_addr_size >= 1 && m_addr_size <= 8);
174 }
175 
176 // Assignment operator
177 const DataExtractor &DataExtractor::operator=(const DataExtractor &rhs) {
178   if (this != &rhs) {
179     m_start = rhs.m_start;
180     m_end = rhs.m_end;
181     m_byte_order = rhs.m_byte_order;
182     m_addr_size = rhs.m_addr_size;
183     m_data_sp = rhs.m_data_sp;
184   }
185   return *this;
186 }
187 
188 DataExtractor::~DataExtractor() = default;
189 
190 // Clears the object contents back to a default invalid state, and release any
191 // references to shared data that this object may contain.
192 void DataExtractor::Clear() {
193   m_start = nullptr;
194   m_end = nullptr;
195   m_byte_order = endian::InlHostByteOrder();
196   m_addr_size = sizeof(void *);
197   m_data_sp.reset();
198 }
199 
200 // If this object contains shared data, this function returns the offset into
201 // that shared data. Else zero is returned.
202 size_t DataExtractor::GetSharedDataOffset() const {
203   if (m_start != nullptr) {
204     const DataBuffer *data = m_data_sp.get();
205     if (data != nullptr) {
206       const uint8_t *data_bytes = data->GetBytes();
207       if (data_bytes != nullptr) {
208         assert(m_start >= data_bytes);
209         return m_start - data_bytes;
210       }
211     }
212   }
213   return 0;
214 }
215 
216 // Set the data with which this object will extract from to data starting at
217 // BYTES and set the length of the data to LENGTH bytes long. The data is
218 // externally owned must be around at least as long as this object points to
219 // the data. No copy of the data is made, this object just refers to this data
220 // and can extract from it. If this object refers to any shared data upon
221 // entry, the reference to that data will be released. Is SWAP is set to true,
222 // any data extracted will be endian swapped.
223 lldb::offset_t DataExtractor::SetData(const void *bytes, offset_t length,
224                                       ByteOrder endian) {
225   m_byte_order = endian;
226   m_data_sp.reset();
227   if (bytes == nullptr || length == 0) {
228     m_start = nullptr;
229     m_end = nullptr;
230   } else {
231     m_start = const_cast<uint8_t *>(static_cast<const uint8_t *>(bytes));
232     m_end = m_start + length;
233   }
234   return GetByteSize();
235 }
236 
237 // Assign the data for this object to be a subrange in "data" starting
238 // "data_offset" bytes into "data" and ending "data_length" bytes later. If
239 // "data_offset" is not a valid offset into "data", then this object will
240 // contain no bytes. If "data_offset" is within "data" yet "data_length" is too
241 // large, the length will be capped at the number of bytes remaining in "data".
242 // If "data" contains a shared pointer to other data, then a ref counted
243 // pointer to that data will be made in this object. If "data" doesn't contain
244 // a shared pointer to data, then the bytes referred to in "data" will need to
245 // exist at least as long as this object refers to those bytes. The address
246 // size and endian swap settings are copied from the current values in "data".
247 lldb::offset_t DataExtractor::SetData(const DataExtractor &data,
248                                       offset_t data_offset,
249                                       offset_t data_length) {
250   m_addr_size = data.m_addr_size;
251   assert(m_addr_size >= 1 && m_addr_size <= 8);
252   // If "data" contains shared pointer to data, then we can use that
253   if (data.m_data_sp) {
254     m_byte_order = data.m_byte_order;
255     return SetData(data.m_data_sp, data.GetSharedDataOffset() + data_offset,
256                    data_length);
257   }
258 
259   // We have a DataExtractor object that just has a pointer to bytes
260   if (data.ValidOffset(data_offset)) {
261     if (data_length > data.GetByteSize() - data_offset)
262       data_length = data.GetByteSize() - data_offset;
263     return SetData(data.GetDataStart() + data_offset, data_length,
264                    data.GetByteOrder());
265   }
266   return 0;
267 }
268 
269 // Assign the data for this object to be a subrange of the shared data in
270 // "data_sp" starting "data_offset" bytes into "data_sp" and ending
271 // "data_length" bytes later. If "data_offset" is not a valid offset into
272 // "data_sp", then this object will contain no bytes. If "data_offset" is
273 // within "data_sp" yet "data_length" is too large, the length will be capped
274 // at the number of bytes remaining in "data_sp". A ref counted pointer to the
275 // data in "data_sp" will be made in this object IF the number of bytes this
276 // object refers to in greater than zero (if at least one byte was available
277 // starting at "data_offset") to ensure the data stays around as long as it is
278 // needed. The address size and endian swap settings will remain unchanged from
279 // their current settings.
280 lldb::offset_t DataExtractor::SetData(const DataBufferSP &data_sp,
281                                       offset_t data_offset,
282                                       offset_t data_length) {
283   m_start = m_end = nullptr;
284 
285   if (data_length > 0) {
286     m_data_sp = data_sp;
287     if (data_sp) {
288       const size_t data_size = data_sp->GetByteSize();
289       if (data_offset < data_size) {
290         m_start = data_sp->GetBytes() + data_offset;
291         const size_t bytes_left = data_size - data_offset;
292         // Cap the length of we asked for too many
293         if (data_length <= bytes_left)
294           m_end = m_start + data_length; // We got all the bytes we wanted
295         else
296           m_end = m_start + bytes_left; // Not all the bytes requested were
297                                         // available in the shared data
298       }
299     }
300   }
301 
302   size_t new_size = GetByteSize();
303 
304   // Don't hold a shared pointer to the data buffer if we don't share any valid
305   // bytes in the shared buffer.
306   if (new_size == 0)
307     m_data_sp.reset();
308 
309   return new_size;
310 }
311 
312 // Extract a single unsigned char from the binary data and update the offset
313 // pointed to by "offset_ptr".
314 //
315 // RETURNS the byte that was extracted, or zero on failure.
316 uint8_t DataExtractor::GetU8(offset_t *offset_ptr) const {
317   const uint8_t *data = static_cast<const uint8_t *>(GetData(offset_ptr, 1));
318   if (data)
319     return *data;
320   return 0;
321 }
322 
323 // Extract "count" unsigned chars from the binary data and update the offset
324 // pointed to by "offset_ptr". The extracted data is copied into "dst".
325 //
326 // RETURNS the non-nullptr buffer pointer upon successful extraction of
327 // all the requested bytes, or nullptr when the data is not available in the
328 // buffer due to being out of bounds, or insufficient data.
329 void *DataExtractor::GetU8(offset_t *offset_ptr, void *dst,
330                            uint32_t count) const {
331   const uint8_t *data =
332       static_cast<const uint8_t *>(GetData(offset_ptr, count));
333   if (data) {
334     // Copy the data into the buffer
335     memcpy(dst, data, count);
336     // Return a non-nullptr pointer to the converted data as an indicator of
337     // success
338     return dst;
339   }
340   return nullptr;
341 }
342 
343 // Extract a single uint16_t from the data and update the offset pointed to by
344 // "offset_ptr".
345 //
346 // RETURNS the uint16_t that was extracted, or zero on failure.
347 uint16_t DataExtractor::GetU16(offset_t *offset_ptr) const {
348   uint16_t val = 0;
349   const uint8_t *data =
350       static_cast<const uint8_t *>(GetData(offset_ptr, sizeof(val)));
351   if (data) {
352     if (m_byte_order != endian::InlHostByteOrder())
353       val = ReadSwapInt16(data);
354     else
355       val = ReadInt16(data);
356   }
357   return val;
358 }
359 
360 uint16_t DataExtractor::GetU16_unchecked(offset_t *offset_ptr) const {
361   uint16_t val;
362   if (m_byte_order == endian::InlHostByteOrder())
363     val = ReadInt16(m_start, *offset_ptr);
364   else
365     val = ReadSwapInt16(m_start, *offset_ptr);
366   *offset_ptr += sizeof(val);
367   return val;
368 }
369 
370 uint32_t DataExtractor::GetU32_unchecked(offset_t *offset_ptr) const {
371   uint32_t val;
372   if (m_byte_order == endian::InlHostByteOrder())
373     val = ReadInt32(m_start, *offset_ptr);
374   else
375     val = ReadSwapInt32(m_start, *offset_ptr);
376   *offset_ptr += sizeof(val);
377   return val;
378 }
379 
380 uint64_t DataExtractor::GetU64_unchecked(offset_t *offset_ptr) const {
381   uint64_t val;
382   if (m_byte_order == endian::InlHostByteOrder())
383     val = ReadInt64(m_start, *offset_ptr);
384   else
385     val = ReadSwapInt64(m_start, *offset_ptr);
386   *offset_ptr += sizeof(val);
387   return val;
388 }
389 
390 // Extract "count" uint16_t values from the binary data and update the offset
391 // pointed to by "offset_ptr". The extracted data is copied into "dst".
392 //
393 // RETURNS the non-nullptr buffer pointer upon successful extraction of
394 // all the requested bytes, or nullptr when the data is not available in the
395 // buffer due to being out of bounds, or insufficient data.
396 void *DataExtractor::GetU16(offset_t *offset_ptr, void *void_dst,
397                             uint32_t count) const {
398   const size_t src_size = sizeof(uint16_t) * count;
399   const uint16_t *src =
400       static_cast<const uint16_t *>(GetData(offset_ptr, src_size));
401   if (src) {
402     if (m_byte_order != endian::InlHostByteOrder()) {
403       uint16_t *dst_pos = static_cast<uint16_t *>(void_dst);
404       uint16_t *dst_end = dst_pos + count;
405       const uint16_t *src_pos = src;
406       while (dst_pos < dst_end) {
407         *dst_pos = ReadSwapInt16(src_pos);
408         ++dst_pos;
409         ++src_pos;
410       }
411     } else {
412       memcpy(void_dst, src, src_size);
413     }
414     // Return a non-nullptr pointer to the converted data as an indicator of
415     // success
416     return void_dst;
417   }
418   return nullptr;
419 }
420 
421 // Extract a single uint32_t from the data and update the offset pointed to by
422 // "offset_ptr".
423 //
424 // RETURNS the uint32_t that was extracted, or zero on failure.
425 uint32_t DataExtractor::GetU32(offset_t *offset_ptr) const {
426   uint32_t val = 0;
427   const uint8_t *data =
428       static_cast<const uint8_t *>(GetData(offset_ptr, sizeof(val)));
429   if (data) {
430     if (m_byte_order != endian::InlHostByteOrder()) {
431       val = ReadSwapInt32(data);
432     } else {
433       memcpy(&val, data, 4);
434     }
435   }
436   return val;
437 }
438 
439 // Extract "count" uint32_t values from the binary data and update the offset
440 // pointed to by "offset_ptr". The extracted data is copied into "dst".
441 //
442 // RETURNS the non-nullptr buffer pointer upon successful extraction of
443 // all the requested bytes, or nullptr when the data is not available in the
444 // buffer due to being out of bounds, or insufficient data.
445 void *DataExtractor::GetU32(offset_t *offset_ptr, void *void_dst,
446                             uint32_t count) const {
447   const size_t src_size = sizeof(uint32_t) * count;
448   const uint32_t *src =
449       static_cast<const uint32_t *>(GetData(offset_ptr, src_size));
450   if (src) {
451     if (m_byte_order != endian::InlHostByteOrder()) {
452       uint32_t *dst_pos = static_cast<uint32_t *>(void_dst);
453       uint32_t *dst_end = dst_pos + count;
454       const uint32_t *src_pos = src;
455       while (dst_pos < dst_end) {
456         *dst_pos = ReadSwapInt32(src_pos);
457         ++dst_pos;
458         ++src_pos;
459       }
460     } else {
461       memcpy(void_dst, src, src_size);
462     }
463     // Return a non-nullptr pointer to the converted data as an indicator of
464     // success
465     return void_dst;
466   }
467   return nullptr;
468 }
469 
470 // Extract a single uint64_t from the data and update the offset pointed to by
471 // "offset_ptr".
472 //
473 // RETURNS the uint64_t that was extracted, or zero on failure.
474 uint64_t DataExtractor::GetU64(offset_t *offset_ptr) const {
475   uint64_t val = 0;
476   const uint8_t *data =
477       static_cast<const uint8_t *>(GetData(offset_ptr, sizeof(val)));
478   if (data) {
479     if (m_byte_order != endian::InlHostByteOrder()) {
480       val = ReadSwapInt64(data);
481     } else {
482       memcpy(&val, data, 8);
483     }
484   }
485   return val;
486 }
487 
488 // GetU64
489 //
490 // Get multiple consecutive 64 bit values. Return true if the entire read
491 // succeeds and increment the offset pointed to by offset_ptr, else return
492 // false and leave the offset pointed to by offset_ptr unchanged.
493 void *DataExtractor::GetU64(offset_t *offset_ptr, void *void_dst,
494                             uint32_t count) const {
495   const size_t src_size = sizeof(uint64_t) * count;
496   const uint64_t *src =
497       static_cast<const uint64_t *>(GetData(offset_ptr, src_size));
498   if (src) {
499     if (m_byte_order != endian::InlHostByteOrder()) {
500       uint64_t *dst_pos = static_cast<uint64_t *>(void_dst);
501       uint64_t *dst_end = dst_pos + count;
502       const uint64_t *src_pos = src;
503       while (dst_pos < dst_end) {
504         *dst_pos = ReadSwapInt64(src_pos);
505         ++dst_pos;
506         ++src_pos;
507       }
508     } else {
509       memcpy(void_dst, src, src_size);
510     }
511     // Return a non-nullptr pointer to the converted data as an indicator of
512     // success
513     return void_dst;
514   }
515   return nullptr;
516 }
517 
518 uint32_t DataExtractor::GetMaxU32(offset_t *offset_ptr,
519                                   size_t byte_size) const {
520   lldbassert(byte_size > 0 && byte_size <= 4 && "GetMaxU32 invalid byte_size!");
521   return GetMaxU64(offset_ptr, byte_size);
522 }
523 
524 uint64_t DataExtractor::GetMaxU64(offset_t *offset_ptr,
525                                   size_t byte_size) const {
526   lldbassert(byte_size > 0 && byte_size <= 8 && "GetMaxU64 invalid byte_size!");
527   switch (byte_size) {
528   case 1:
529     return GetU8(offset_ptr);
530   case 2:
531     return GetU16(offset_ptr);
532   case 4:
533     return GetU32(offset_ptr);
534   case 8:
535     return GetU64(offset_ptr);
536   default: {
537     // General case.
538     const uint8_t *data =
539         static_cast<const uint8_t *>(GetData(offset_ptr, byte_size));
540     if (data == nullptr)
541       return 0;
542     return ReadMaxInt64(data, byte_size, m_byte_order);
543   }
544   }
545   return 0;
546 }
547 
548 uint64_t DataExtractor::GetMaxU64_unchecked(offset_t *offset_ptr,
549                                             size_t byte_size) const {
550   switch (byte_size) {
551   case 1:
552     return GetU8_unchecked(offset_ptr);
553   case 2:
554     return GetU16_unchecked(offset_ptr);
555   case 4:
556     return GetU32_unchecked(offset_ptr);
557   case 8:
558     return GetU64_unchecked(offset_ptr);
559   default: {
560     uint64_t res = ReadMaxInt64(&m_start[*offset_ptr], byte_size, m_byte_order);
561     *offset_ptr += byte_size;
562     return res;
563   }
564   }
565   return 0;
566 }
567 
568 int64_t DataExtractor::GetMaxS64(offset_t *offset_ptr, size_t byte_size) const {
569   uint64_t u64 = GetMaxU64(offset_ptr, byte_size);
570   return llvm::SignExtend64(u64, 8 * byte_size);
571 }
572 
573 uint64_t DataExtractor::GetMaxU64Bitfield(offset_t *offset_ptr, size_t size,
574                                           uint32_t bitfield_bit_size,
575                                           uint32_t bitfield_bit_offset) const {
576   assert(bitfield_bit_size <= 64);
577   uint64_t uval64 = GetMaxU64(offset_ptr, size);
578 
579   if (bitfield_bit_size == 0)
580     return uval64;
581 
582   int32_t lsbcount = bitfield_bit_offset;
583   if (m_byte_order == eByteOrderBig)
584     lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size;
585 
586   if (lsbcount > 0)
587     uval64 >>= lsbcount;
588 
589   uint64_t bitfield_mask =
590       (bitfield_bit_size == 64
591            ? std::numeric_limits<uint64_t>::max()
592            : ((static_cast<uint64_t>(1) << bitfield_bit_size) - 1));
593   if (!bitfield_mask && bitfield_bit_offset == 0 && bitfield_bit_size == 64)
594     return uval64;
595 
596   uval64 &= bitfield_mask;
597 
598   return uval64;
599 }
600 
601 int64_t DataExtractor::GetMaxS64Bitfield(offset_t *offset_ptr, size_t size,
602                                          uint32_t bitfield_bit_size,
603                                          uint32_t bitfield_bit_offset) const {
604   assert(size >= 1 && "GetMaxS64Bitfield size must be >= 1");
605   assert(size <= 8 && "GetMaxS64Bitfield size must be <= 8");
606   int64_t sval64 = GetMaxS64(offset_ptr, size);
607   if (bitfield_bit_size == 0)
608     return sval64;
609   int32_t lsbcount = bitfield_bit_offset;
610   if (m_byte_order == eByteOrderBig)
611     lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size;
612   if (lsbcount > 0)
613     sval64 >>= lsbcount;
614   uint64_t bitfield_mask = llvm::maskTrailingOnes<uint64_t>(bitfield_bit_size);
615   sval64 &= bitfield_mask;
616   // sign extend if needed
617   if (sval64 & ((static_cast<uint64_t>(1)) << (bitfield_bit_size - 1)))
618     sval64 |= ~bitfield_mask;
619   return sval64;
620 }
621 
622 float DataExtractor::GetFloat(offset_t *offset_ptr) const {
623   return Get<float>(offset_ptr, 0.0f);
624 }
625 
626 double DataExtractor::GetDouble(offset_t *offset_ptr) const {
627   return Get<double>(offset_ptr, 0.0);
628 }
629 
630 long double DataExtractor::GetLongDouble(offset_t *offset_ptr) const {
631   long double val = 0.0;
632 #if defined(__i386__) || defined(__amd64__) || defined(__x86_64__) ||          \
633     defined(_M_IX86) || defined(_M_IA64) || defined(_M_X64)
634   *offset_ptr += CopyByteOrderedData(*offset_ptr, 10, &val, sizeof(val),
635                                      endian::InlHostByteOrder());
636 #else
637   *offset_ptr += CopyByteOrderedData(*offset_ptr, sizeof(val), &val,
638                                      sizeof(val), endian::InlHostByteOrder());
639 #endif
640   return val;
641 }
642 
643 // Extract a single address from the data and update the offset pointed to by
644 // "offset_ptr". The size of the extracted address comes from the
645 // "this->m_addr_size" member variable and should be set correctly prior to
646 // extracting any address values.
647 //
648 // RETURNS the address that was extracted, or zero on failure.
649 uint64_t DataExtractor::GetAddress(offset_t *offset_ptr) const {
650   assert(m_addr_size >= 1 && m_addr_size <= 8);
651   return GetMaxU64(offset_ptr, m_addr_size);
652 }
653 
654 uint64_t DataExtractor::GetAddress_unchecked(offset_t *offset_ptr) const {
655   assert(m_addr_size >= 1 && m_addr_size <= 8);
656   return GetMaxU64_unchecked(offset_ptr, m_addr_size);
657 }
658 
659 size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length,
660                                    ByteOrder dst_byte_order, void *dst) const {
661   const uint8_t *src = PeekData(offset, length);
662   if (src) {
663     if (dst_byte_order != GetByteOrder()) {
664       // Validate that only a word- or register-sized dst is byte swapped
665       assert(length == 1 || length == 2 || length == 4 || length == 8 ||
666              length == 10 || length == 16 || length == 32);
667 
668       for (uint32_t i = 0; i < length; ++i)
669         (static_cast<uint8_t *>(dst))[i] = src[length - i - 1];
670     } else
671       ::memcpy(dst, src, length);
672     return length;
673   }
674   return 0;
675 }
676 
677 // Extract data as it exists in target memory
678 lldb::offset_t DataExtractor::CopyData(offset_t offset, offset_t length,
679                                        void *dst) const {
680   const uint8_t *src = PeekData(offset, length);
681   if (src) {
682     ::memcpy(dst, src, length);
683     return length;
684   }
685   return 0;
686 }
687 
688 // Extract data and swap if needed when doing the copy
689 lldb::offset_t
690 DataExtractor::CopyByteOrderedData(offset_t src_offset, offset_t src_len,
691                                    void *dst_void_ptr, offset_t dst_len,
692                                    ByteOrder dst_byte_order) const {
693   // Validate the source info
694   if (!ValidOffsetForDataOfSize(src_offset, src_len))
695     assert(ValidOffsetForDataOfSize(src_offset, src_len));
696   assert(src_len > 0);
697   assert(m_byte_order == eByteOrderBig || m_byte_order == eByteOrderLittle);
698 
699   // Validate the destination info
700   assert(dst_void_ptr != nullptr);
701   assert(dst_len > 0);
702   assert(dst_byte_order == eByteOrderBig || dst_byte_order == eByteOrderLittle);
703 
704   // Validate that only a word- or register-sized dst is byte swapped
705   assert(dst_byte_order == m_byte_order || dst_len == 1 || dst_len == 2 ||
706          dst_len == 4 || dst_len == 8 || dst_len == 10 || dst_len == 16 ||
707          dst_len == 32);
708 
709   // Must have valid byte orders set in this object and for destination
710   if (!(dst_byte_order == eByteOrderBig ||
711         dst_byte_order == eByteOrderLittle) ||
712       !(m_byte_order == eByteOrderBig || m_byte_order == eByteOrderLittle))
713     return 0;
714 
715   uint8_t *dst = static_cast<uint8_t *>(dst_void_ptr);
716   const uint8_t *src = PeekData(src_offset, src_len);
717   if (src) {
718     if (dst_len >= src_len) {
719       // We are copying the entire value from src into dst. Calculate how many,
720       // if any, zeroes we need for the most significant bytes if "dst_len" is
721       // greater than "src_len"...
722       const size_t num_zeroes = dst_len - src_len;
723       if (dst_byte_order == eByteOrderBig) {
724         // Big endian, so we lead with zeroes...
725         if (num_zeroes > 0)
726           ::memset(dst, 0, num_zeroes);
727         // Then either copy or swap the rest
728         if (m_byte_order == eByteOrderBig) {
729           ::memcpy(dst + num_zeroes, src, src_len);
730         } else {
731           for (uint32_t i = 0; i < src_len; ++i)
732             dst[i + num_zeroes] = src[src_len - 1 - i];
733         }
734       } else {
735         // Little endian destination, so we lead the value bytes
736         if (m_byte_order == eByteOrderBig) {
737           for (uint32_t i = 0; i < src_len; ++i)
738             dst[i] = src[src_len - 1 - i];
739         } else {
740           ::memcpy(dst, src, src_len);
741         }
742         // And zero the rest...
743         if (num_zeroes > 0)
744           ::memset(dst + src_len, 0, num_zeroes);
745       }
746       return src_len;
747     } else {
748       // We are only copying some of the value from src into dst..
749 
750       if (dst_byte_order == eByteOrderBig) {
751         // Big endian dst
752         if (m_byte_order == eByteOrderBig) {
753           // Big endian dst, with big endian src
754           ::memcpy(dst, src + (src_len - dst_len), dst_len);
755         } else {
756           // Big endian dst, with little endian src
757           for (uint32_t i = 0; i < dst_len; ++i)
758             dst[i] = src[dst_len - 1 - i];
759         }
760       } else {
761         // Little endian dst
762         if (m_byte_order == eByteOrderBig) {
763           // Little endian dst, with big endian src
764           for (uint32_t i = 0; i < dst_len; ++i)
765             dst[i] = src[src_len - 1 - i];
766         } else {
767           // Little endian dst, with big endian src
768           ::memcpy(dst, src, dst_len);
769         }
770       }
771       return dst_len;
772     }
773   }
774   return 0;
775 }
776 
777 // Extracts a variable length NULL terminated C string from the data at the
778 // offset pointed to by "offset_ptr".  The "offset_ptr" will be updated with
779 // the offset of the byte that follows the NULL terminator byte.
780 //
781 // If the offset pointed to by "offset_ptr" is out of bounds, or if "length" is
782 // non-zero and there aren't enough available bytes, nullptr will be returned
783 // and "offset_ptr" will not be updated.
784 const char *DataExtractor::GetCStr(offset_t *offset_ptr) const {
785   const char *start = reinterpret_cast<const char *>(PeekData(*offset_ptr, 1));
786   // Already at the end of the data.
787   if (!start)
788     return nullptr;
789 
790   const char *end = reinterpret_cast<const char *>(m_end);
791 
792   // Check all bytes for a null terminator that terminates a C string.
793   const char *terminator_or_end = std::find(start, end, '\0');
794 
795   // We didn't find a null terminator, so return nullptr to indicate that there
796   // is no valid C string at that offset.
797   if (terminator_or_end == end)
798     return nullptr;
799 
800   // Update offset_ptr for the caller to point to the data behind the
801   // terminator (which is 1 byte long).
802   *offset_ptr += (terminator_or_end - start + 1UL);
803   return start;
804 }
805 
806 // Extracts a NULL terminated C string from the fixed length field of length
807 // "len" at the offset pointed to by "offset_ptr". The "offset_ptr" will be
808 // updated with the offset of the byte that follows the fixed length field.
809 //
810 // If the offset pointed to by "offset_ptr" is out of bounds, or if the offset
811 // plus the length of the field is out of bounds, or if the field does not
812 // contain a NULL terminator byte, nullptr will be returned and "offset_ptr"
813 // will not be updated.
814 const char *DataExtractor::GetCStr(offset_t *offset_ptr, offset_t len) const {
815   const char *cstr = reinterpret_cast<const char *>(PeekData(*offset_ptr, len));
816   if (cstr != nullptr) {
817     if (memchr(cstr, '\0', len) == nullptr) {
818       return nullptr;
819     }
820     *offset_ptr += len;
821     return cstr;
822   }
823   return nullptr;
824 }
825 
826 // Peeks at a string in the contained data. No verification is done to make
827 // sure the entire string lies within the bounds of this object's data, only
828 // "offset" is verified to be a valid offset.
829 //
830 // Returns a valid C string pointer if "offset" is a valid offset in this
831 // object's data, else nullptr is returned.
832 const char *DataExtractor::PeekCStr(offset_t offset) const {
833   return reinterpret_cast<const char *>(PeekData(offset, 1));
834 }
835 
836 // Extracts an unsigned LEB128 number from this object's data starting at the
837 // offset pointed to by "offset_ptr". The offset pointed to by "offset_ptr"
838 // will be updated with the offset of the byte following the last extracted
839 // byte.
840 //
841 // Returned the extracted integer value.
842 uint64_t DataExtractor::GetULEB128(offset_t *offset_ptr) const {
843   const uint8_t *src = PeekData(*offset_ptr, 1);
844   if (src == nullptr)
845     return 0;
846 
847   unsigned byte_count = 0;
848   uint64_t result = llvm::decodeULEB128(src, &byte_count, m_end);
849   *offset_ptr += byte_count;
850   return result;
851 }
852 
853 // Extracts an signed LEB128 number from this object's data starting at the
854 // offset pointed to by "offset_ptr". The offset pointed to by "offset_ptr"
855 // will be updated with the offset of the byte following the last extracted
856 // byte.
857 //
858 // Returned the extracted integer value.
859 int64_t DataExtractor::GetSLEB128(offset_t *offset_ptr) const {
860   const uint8_t *src = PeekData(*offset_ptr, 1);
861   if (src == nullptr)
862     return 0;
863 
864   unsigned byte_count = 0;
865   int64_t result = llvm::decodeSLEB128(src, &byte_count, m_end);
866   *offset_ptr += byte_count;
867   return result;
868 }
869 
870 // Skips a ULEB128 number (signed or unsigned) from this object's data starting
871 // at the offset pointed to by "offset_ptr". The offset pointed to by
872 // "offset_ptr" will be updated with the offset of the byte following the last
873 // extracted byte.
874 //
875 // Returns the number of bytes consumed during the extraction.
876 uint32_t DataExtractor::Skip_LEB128(offset_t *offset_ptr) const {
877   uint32_t bytes_consumed = 0;
878   const uint8_t *src = PeekData(*offset_ptr, 1);
879   if (src == nullptr)
880     return 0;
881 
882   const uint8_t *end = m_end;
883 
884   if (src < end) {
885     const uint8_t *src_pos = src;
886     while ((src_pos < end) && (*src_pos++ & 0x80))
887       ++bytes_consumed;
888     *offset_ptr += src_pos - src;
889   }
890   return bytes_consumed;
891 }
892 
893 // Dumps bytes from this object's data to the stream "s" starting
894 // "start_offset" bytes into this data, and ending with the byte before
895 // "end_offset". "base_addr" will be added to the offset into the dumped data
896 // when showing the offset into the data in the output information.
897 // "num_per_line" objects of type "type" will be dumped with the option to
898 // override the format for each object with "type_format". "type_format" is a
899 // printf style formatting string. If "type_format" is nullptr, then an
900 // appropriate format string will be used for the supplied "type". If the
901 // stream "s" is nullptr, then the output will be send to Log().
902 lldb::offset_t DataExtractor::PutToLog(Log *log, offset_t start_offset,
903                                        offset_t length, uint64_t base_addr,
904                                        uint32_t num_per_line,
905                                        DataExtractor::Type type) const {
906   if (log == nullptr)
907     return start_offset;
908 
909   offset_t offset;
910   offset_t end_offset;
911   uint32_t count;
912   StreamString sstr;
913   for (offset = start_offset, end_offset = offset + length, count = 0;
914        ValidOffset(offset) && offset < end_offset; ++count) {
915     if ((count % num_per_line) == 0) {
916       // Print out any previous string
917       if (sstr.GetSize() > 0) {
918         log->PutString(sstr.GetString());
919         sstr.Clear();
920       }
921       // Reset string offset and fill the current line string with address:
922       if (base_addr != LLDB_INVALID_ADDRESS)
923         sstr.Printf("0x%8.8" PRIx64 ":",
924                     static_cast<uint64_t>(base_addr + (offset - start_offset)));
925     }
926 
927     switch (type) {
928     case TypeUInt8:
929       sstr.Printf(" %2.2x", GetU8(&offset));
930       break;
931     case TypeChar: {
932       char ch = GetU8(&offset);
933       sstr.Printf(" %c", llvm::isPrint(ch) ? ch : ' ');
934     } break;
935     case TypeUInt16:
936       sstr.Printf(" %4.4x", GetU16(&offset));
937       break;
938     case TypeUInt32:
939       sstr.Printf(" %8.8x", GetU32(&offset));
940       break;
941     case TypeUInt64:
942       sstr.Printf(" %16.16" PRIx64, GetU64(&offset));
943       break;
944     case TypePointer:
945       sstr.Printf(" 0x%" PRIx64, GetAddress(&offset));
946       break;
947     case TypeULEB128:
948       sstr.Printf(" 0x%" PRIx64, GetULEB128(&offset));
949       break;
950     case TypeSLEB128:
951       sstr.Printf(" %" PRId64, GetSLEB128(&offset));
952       break;
953     }
954   }
955 
956   if (!sstr.Empty())
957     log->PutString(sstr.GetString());
958 
959   return offset; // Return the offset at which we ended up
960 }
961 
962 size_t DataExtractor::Copy(DataExtractor &dest_data) const {
963   if (m_data_sp) {
964     // we can pass along the SP to the data
965     dest_data.SetData(m_data_sp);
966   } else {
967     const uint8_t *base_ptr = m_start;
968     size_t data_size = GetByteSize();
969     dest_data.SetData(DataBufferSP(new DataBufferHeap(base_ptr, data_size)));
970   }
971   return GetByteSize();
972 }
973 
974 bool DataExtractor::Append(DataExtractor &rhs) {
975   if (rhs.GetByteOrder() != GetByteOrder())
976     return false;
977 
978   if (rhs.GetByteSize() == 0)
979     return true;
980 
981   if (GetByteSize() == 0)
982     return (rhs.Copy(*this) > 0);
983 
984   size_t bytes = GetByteSize() + rhs.GetByteSize();
985 
986   DataBufferHeap *buffer_heap_ptr = nullptr;
987   DataBufferSP buffer_sp(buffer_heap_ptr = new DataBufferHeap(bytes, 0));
988 
989   if (!buffer_sp || buffer_heap_ptr == nullptr)
990     return false;
991 
992   uint8_t *bytes_ptr = buffer_heap_ptr->GetBytes();
993 
994   memcpy(bytes_ptr, GetDataStart(), GetByteSize());
995   memcpy(bytes_ptr + GetByteSize(), rhs.GetDataStart(), rhs.GetByteSize());
996 
997   SetData(buffer_sp);
998 
999   return true;
1000 }
1001 
1002 bool DataExtractor::Append(void *buf, offset_t length) {
1003   if (buf == nullptr)
1004     return false;
1005 
1006   if (length == 0)
1007     return true;
1008 
1009   size_t bytes = GetByteSize() + length;
1010 
1011   DataBufferHeap *buffer_heap_ptr = nullptr;
1012   DataBufferSP buffer_sp(buffer_heap_ptr = new DataBufferHeap(bytes, 0));
1013 
1014   if (!buffer_sp || buffer_heap_ptr == nullptr)
1015     return false;
1016 
1017   uint8_t *bytes_ptr = buffer_heap_ptr->GetBytes();
1018 
1019   if (GetByteSize() > 0)
1020     memcpy(bytes_ptr, GetDataStart(), GetByteSize());
1021 
1022   memcpy(bytes_ptr + GetByteSize(), buf, length);
1023 
1024   SetData(buffer_sp);
1025 
1026   return true;
1027 }
1028 
1029 void DataExtractor::Checksum(llvm::SmallVectorImpl<uint8_t> &dest,
1030                              uint64_t max_data) {
1031   if (max_data == 0)
1032     max_data = GetByteSize();
1033   else
1034     max_data = std::min(max_data, GetByteSize());
1035 
1036   llvm::MD5 md5;
1037 
1038   const llvm::ArrayRef<uint8_t> data(GetDataStart(), max_data);
1039   md5.update(data);
1040 
1041   llvm::MD5::MD5Result result;
1042   md5.final(result);
1043 
1044   dest.clear();
1045   dest.append(result.begin(), result.end());
1046 }
1047