1 //! Defining custom `Reader`s quickly.
2 
3 use alloc::borrow::Cow;
4 use alloc::rc::Rc;
5 use alloc::string::String;
6 use alloc::sync::Arc;
7 use core::fmt::Debug;
8 use core::ops::{Deref, Index, Range, RangeFrom, RangeTo};
9 use core::slice;
10 use core::str;
11 use stable_deref_trait::CloneStableDeref;
12 
13 use crate::endianity::Endianity;
14 use crate::read::{Error, Reader, ReaderOffsetId, Result};
15 
16 /// A reference counted, non-thread-safe slice of bytes and associated
17 /// endianity.
18 ///
19 /// ```
20 /// # #[cfg(feature = "std")] {
21 /// use std::rc::Rc;
22 ///
23 /// let buf = Rc::from(&[1, 2, 3, 4][..]);
24 /// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian);
25 /// # let _ = reader;
26 /// # }
27 /// ```
28 pub type EndianRcSlice<Endian> = EndianReader<Endian, Rc<[u8]>>;
29 
30 /// An atomically reference counted, thread-safe slice of bytes and associated
31 /// endianity.
32 ///
33 /// ```
34 /// # #[cfg(feature = "std")] {
35 /// use std::sync::Arc;
36 ///
37 /// let buf = Arc::from(&[1, 2, 3, 4][..]);
38 /// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian);
39 /// # let _ = reader;
40 /// # }
41 /// ```
42 pub type EndianArcSlice<Endian> = EndianReader<Endian, Arc<[u8]>>;
43 
44 /// An easy way to define a custom `Reader` implementation with a reference to a
45 /// generic buffer of bytes and an associated endianity.
46 ///
47 /// Note that the whole original buffer is kept alive in memory even if there is
48 /// only one reader that references only a handful of bytes from that original
49 /// buffer. That is, `EndianReader` will not do any copying, moving, or
50 /// compacting in order to free up unused regions of the original buffer. If you
51 /// require this kind of behavior, it is up to you to implement `Reader`
52 /// directly by-hand.
53 ///
54 /// # Example
55 ///
56 /// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`.
57 /// You can wrap that `mmap`ed file up in a `MmapFile` type and use
58 /// `EndianReader<Rc<MmapFile>>` or `EndianReader<Arc<MmapFile>>` as readers as
59 /// long as `MmapFile` dereferences to the underlying `[u8]` data.
60 ///
61 /// ```
62 /// use std::io;
63 /// use std::ops::Deref;
64 /// use std::path::Path;
65 /// use std::slice;
66 /// use std::sync::Arc;
67 ///
68 /// /// A type that represents an `mmap`ed file.
69 /// #[derive(Debug)]
70 /// pub struct MmapFile {
71 ///     ptr: *const u8,
72 ///     len: usize,
73 /// }
74 ///
75 /// impl MmapFile {
76 ///     pub fn new(path: &Path) -> io::Result<MmapFile> {
77 ///         // Call `mmap` and check for errors and all that...
78 /// #       unimplemented!()
79 ///     }
80 /// }
81 ///
82 /// impl Drop for MmapFile {
83 ///     fn drop(&mut self) {
84 ///         // Call `munmap` to clean up after ourselves...
85 /// #       unimplemented!()
86 ///     }
87 /// }
88 ///
89 /// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory.
90 /// impl Deref for MmapFile {
91 ///     type Target = [u8];
92 ///     fn deref(&self) -> &[u8] {
93 ///         unsafe {
94 ///             slice::from_raw_parts(self.ptr, self.len)
95 ///         }
96 ///     }
97 /// }
98 ///
99 /// /// A type that represents a shared `mmap`ed file.
100 /// #[derive(Debug, Clone)]
101 /// pub struct ArcMmapFile(Arc<MmapFile>);
102 ///
103 /// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory.
104 /// impl Deref for ArcMmapFile {
105 ///     type Target = [u8];
106 ///     fn deref(&self) -> &[u8] {
107 ///         &self.0
108 ///     }
109 /// }
110 ///
111 /// // These are both valid for any `Rc` or `Arc`.
112 /// unsafe impl gimli::StableDeref for ArcMmapFile {}
113 /// unsafe impl gimli::CloneStableDeref for ArcMmapFile {}
114 ///
115 /// /// A `gimli::Reader` that is backed by an `mmap`ed file!
116 /// pub type MmapFileReader<Endian> = gimli::EndianReader<Endian, ArcMmapFile>;
117 /// # fn test(_: &MmapFileReader<gimli::NativeEndian>) { }
118 /// ```
119 #[derive(Debug, Clone, Copy, Hash)]
120 pub struct EndianReader<Endian, T>
121 where
122     Endian: Endianity,
123     T: CloneStableDeref<Target = [u8]> + Debug,
124 {
125     range: SubRange<T>,
126     endian: Endian,
127 }
128 
129 impl<Endian, T1, T2> PartialEq<EndianReader<Endian, T2>> for EndianReader<Endian, T1>
130 where
131     Endian: Endianity,
132     T1: CloneStableDeref<Target = [u8]> + Debug,
133     T2: CloneStableDeref<Target = [u8]> + Debug,
134 {
eq(&self, rhs: &EndianReader<Endian, T2>) -> bool135     fn eq(&self, rhs: &EndianReader<Endian, T2>) -> bool {
136         self.bytes() == rhs.bytes()
137     }
138 }
139 
140 impl<Endian, T> Eq for EndianReader<Endian, T>
141 where
142     Endian: Endianity,
143     T: CloneStableDeref<Target = [u8]> + Debug,
144 {
145 }
146 
147 // This is separated out from `EndianReader` so that we can avoid running afoul
148 // of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call
149 // `self.endian.read_whatever` on the result. The problem is that the returned
150 // slice keeps the `&mut self` borrow active, so we wouldn't be able to access
151 // `self.endian`. Splitting the sub-range out from the endian lets us work
152 // around this, making it so that only the `self.range` borrow is held active,
153 // not all of `self`.
154 //
155 // This also serves to encapsulate the unsafe code concerning `CloneStableDeref`.
156 // The `bytes` member is held so that the bytes live long enough, and the
157 // `CloneStableDeref` ensures these bytes never move.  The `ptr` and `len`
158 // members point inside `bytes`, and are updated during read operations.
159 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
160 struct SubRange<T>
161 where
162     T: CloneStableDeref<Target = [u8]> + Debug,
163 {
164     bytes: T,
165     ptr: *const u8,
166     len: usize,
167 }
168 
169 unsafe impl<T> Send for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Send {}
170 
171 unsafe impl<T> Sync for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Sync {}
172 
173 impl<T> SubRange<T>
174 where
175     T: CloneStableDeref<Target = [u8]> + Debug,
176 {
177     #[inline]
new(bytes: T) -> Self178     fn new(bytes: T) -> Self {
179         let ptr = bytes.as_ptr();
180         let len = bytes.len();
181         SubRange { bytes, ptr, len }
182     }
183 
184     #[inline]
bytes(&self) -> &[u8]185     fn bytes(&self) -> &[u8] {
186         // Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified,
187         // and all operations that modify `ptr` and `len` ensure they stay in range.
188         unsafe { slice::from_raw_parts(self.ptr, self.len) }
189     }
190 
191     #[inline]
len(&self) -> usize192     fn len(&self) -> usize {
193         self.len
194     }
195 
196     #[inline]
truncate(&mut self, len: usize)197     fn truncate(&mut self, len: usize) {
198         assert!(len <= self.len);
199         self.len = len;
200     }
201 
202     #[inline]
skip(&mut self, len: usize)203     fn skip(&mut self, len: usize) {
204         assert!(len <= self.len);
205         self.ptr = unsafe { self.ptr.add(len) };
206         self.len -= len;
207     }
208 
209     #[inline]
read_slice(&mut self, len: usize) -> Option<&[u8]>210     fn read_slice(&mut self, len: usize) -> Option<&[u8]> {
211         if self.len() < len {
212             None
213         } else {
214             // Same as for `bytes()`.
215             let bytes = unsafe { slice::from_raw_parts(self.ptr, len) };
216             self.skip(len);
217             Some(bytes)
218         }
219     }
220 }
221 
222 impl<Endian, T> EndianReader<Endian, T>
223 where
224     Endian: Endianity,
225     T: CloneStableDeref<Target = [u8]> + Debug,
226 {
227     /// Construct a new `EndianReader` with the given bytes.
228     #[inline]
new(bytes: T, endian: Endian) -> EndianReader<Endian, T>229     pub fn new(bytes: T, endian: Endian) -> EndianReader<Endian, T> {
230         EndianReader {
231             range: SubRange::new(bytes),
232             endian,
233         }
234     }
235 
236     /// Return a reference to the raw bytes underlying this reader.
237     #[inline]
bytes(&self) -> &[u8]238     pub fn bytes(&self) -> &[u8] {
239         self.range.bytes()
240     }
241 }
242 
243 /// # Range Methods
244 ///
245 /// Unfortunately, `std::ops::Index` *must* return a reference, so we can't
246 /// implement `Index<Range<usize>>` to return a new `EndianReader` the way we
247 /// would like to. Instead, we abandon fancy indexing operators and have these
248 /// plain old methods.
249 impl<Endian, T> EndianReader<Endian, T>
250 where
251     Endian: Endianity,
252     T: CloneStableDeref<Target = [u8]> + Debug,
253 {
254     /// Take the given `start..end` range of the underlying buffer and return a
255     /// new `EndianReader`.
256     ///
257     /// ```
258     /// # #[cfg(feature = "std")] {
259     /// use gimli::{EndianReader, LittleEndian};
260     /// use std::sync::Arc;
261     ///
262     /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
263     /// let reader = EndianReader::new(buf.clone(), LittleEndian);
264     /// assert_eq!(reader.range(1..3),
265     ///            EndianReader::new(&buf[1..3], LittleEndian));
266     /// # }
267     /// ```
268     ///
269     /// # Panics
270     ///
271     /// Panics if the range is out of bounds.
range(&self, idx: Range<usize>) -> EndianReader<Endian, T>272     pub fn range(&self, idx: Range<usize>) -> EndianReader<Endian, T> {
273         let mut r = self.clone();
274         r.range.skip(idx.start);
275         r.range.truncate(idx.len());
276         r
277     }
278 
279     /// Take the given `start..` range of the underlying buffer and return a new
280     /// `EndianReader`.
281     ///
282     /// ```
283     /// # #[cfg(feature = "std")] {
284     /// use gimli::{EndianReader, LittleEndian};
285     /// use std::sync::Arc;
286     ///
287     /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
288     /// let reader = EndianReader::new(buf.clone(), LittleEndian);
289     /// assert_eq!(reader.range_from(2..),
290     ///            EndianReader::new(&buf[2..], LittleEndian));
291     /// # }
292     /// ```
293     ///
294     /// # Panics
295     ///
296     /// Panics if the range is out of bounds.
range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T>297     pub fn range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T> {
298         let mut r = self.clone();
299         r.range.skip(idx.start);
300         r
301     }
302 
303     /// Take the given `..end` range of the underlying buffer and return a new
304     /// `EndianReader`.
305     ///
306     /// ```
307     /// # #[cfg(feature = "std")] {
308     /// use gimli::{EndianReader, LittleEndian};
309     /// use std::sync::Arc;
310     ///
311     /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
312     /// let reader = EndianReader::new(buf.clone(), LittleEndian);
313     /// assert_eq!(reader.range_to(..3),
314     ///            EndianReader::new(&buf[..3], LittleEndian));
315     /// # }
316     /// ```
317     ///
318     /// # Panics
319     ///
320     /// Panics if the range is out of bounds.
range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T>321     pub fn range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T> {
322         let mut r = self.clone();
323         r.range.truncate(idx.end);
324         r
325     }
326 }
327 
328 impl<Endian, T> Index<usize> for EndianReader<Endian, T>
329 where
330     Endian: Endianity,
331     T: CloneStableDeref<Target = [u8]> + Debug,
332 {
333     type Output = u8;
index(&self, idx: usize) -> &Self::Output334     fn index(&self, idx: usize) -> &Self::Output {
335         &self.bytes()[idx]
336     }
337 }
338 
339 impl<Endian, T> Index<RangeFrom<usize>> for EndianReader<Endian, T>
340 where
341     Endian: Endianity,
342     T: CloneStableDeref<Target = [u8]> + Debug,
343 {
344     type Output = [u8];
index(&self, idx: RangeFrom<usize>) -> &Self::Output345     fn index(&self, idx: RangeFrom<usize>) -> &Self::Output {
346         &self.bytes()[idx]
347     }
348 }
349 
350 impl<Endian, T> Deref for EndianReader<Endian, T>
351 where
352     Endian: Endianity,
353     T: CloneStableDeref<Target = [u8]> + Debug,
354 {
355     type Target = [u8];
deref(&self) -> &Self::Target356     fn deref(&self) -> &Self::Target {
357         self.bytes()
358     }
359 }
360 
361 impl<Endian, T> Reader for EndianReader<Endian, T>
362 where
363     Endian: Endianity,
364     T: CloneStableDeref<Target = [u8]> + Debug,
365 {
366     type Endian = Endian;
367     type Offset = usize;
368 
369     #[inline]
endian(&self) -> Endian370     fn endian(&self) -> Endian {
371         self.endian
372     }
373 
374     #[inline]
len(&self) -> usize375     fn len(&self) -> usize {
376         self.range.len()
377     }
378 
379     #[inline]
empty(&mut self)380     fn empty(&mut self) {
381         self.range.truncate(0);
382     }
383 
384     #[inline]
truncate(&mut self, len: usize) -> Result<()>385     fn truncate(&mut self, len: usize) -> Result<()> {
386         if self.len() < len {
387             Err(Error::UnexpectedEof(self.offset_id()))
388         } else {
389             self.range.truncate(len);
390             Ok(())
391         }
392     }
393 
394     #[inline]
offset_from(&self, base: &EndianReader<Endian, T>) -> usize395     fn offset_from(&self, base: &EndianReader<Endian, T>) -> usize {
396         let base_ptr = base.bytes().as_ptr() as *const u8 as usize;
397         let ptr = self.bytes().as_ptr() as *const u8 as usize;
398         debug_assert!(base_ptr <= ptr);
399         debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len());
400         ptr - base_ptr
401     }
402 
403     #[inline]
offset_id(&self) -> ReaderOffsetId404     fn offset_id(&self) -> ReaderOffsetId {
405         ReaderOffsetId(self.bytes().as_ptr() as u64)
406     }
407 
408     #[inline]
lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset>409     fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset> {
410         let id = id.0;
411         let self_id = self.bytes().as_ptr() as u64;
412         let self_len = self.bytes().len() as u64;
413         if id >= self_id && id <= self_id + self_len {
414             Some((id - self_id) as usize)
415         } else {
416             None
417         }
418     }
419 
420     #[inline]
find(&self, byte: u8) -> Result<usize>421     fn find(&self, byte: u8) -> Result<usize> {
422         self.bytes()
423             .iter()
424             .position(|x| *x == byte)
425             .ok_or_else(|| Error::UnexpectedEof(self.offset_id()))
426     }
427 
428     #[inline]
skip(&mut self, len: usize) -> Result<()>429     fn skip(&mut self, len: usize) -> Result<()> {
430         if self.len() < len {
431             Err(Error::UnexpectedEof(self.offset_id()))
432         } else {
433             self.range.skip(len);
434             Ok(())
435         }
436     }
437 
438     #[inline]
split(&mut self, len: usize) -> Result<Self>439     fn split(&mut self, len: usize) -> Result<Self> {
440         if self.len() < len {
441             Err(Error::UnexpectedEof(self.offset_id()))
442         } else {
443             let mut r = self.clone();
444             r.range.truncate(len);
445             self.range.skip(len);
446             Ok(r)
447         }
448     }
449 
450     #[inline]
to_slice(&self) -> Result<Cow<[u8]>>451     fn to_slice(&self) -> Result<Cow<[u8]>> {
452         Ok(self.bytes().into())
453     }
454 
455     #[inline]
to_string(&self) -> Result<Cow<str>>456     fn to_string(&self) -> Result<Cow<str>> {
457         match str::from_utf8(self.bytes()) {
458             Ok(s) => Ok(s.into()),
459             _ => Err(Error::BadUtf8),
460         }
461     }
462 
463     #[inline]
to_string_lossy(&self) -> Result<Cow<str>>464     fn to_string_lossy(&self) -> Result<Cow<str>> {
465         Ok(String::from_utf8_lossy(self.bytes()))
466     }
467 
468     #[inline]
read_slice(&mut self, buf: &mut [u8]) -> Result<()>469     fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> {
470         match self.range.read_slice(buf.len()) {
471             Some(slice) => {
472                 buf.clone_from_slice(slice);
473                 Ok(())
474             }
475             None => Err(Error::UnexpectedEof(self.offset_id())),
476         }
477     }
478 }
479 
480 #[cfg(test)]
481 mod tests {
482     use super::*;
483     use crate::endianity::NativeEndian;
484     use crate::read::Reader;
485 
native_reader<T: CloneStableDeref<Target = [u8]> + Debug>( bytes: T, ) -> EndianReader<NativeEndian, T>486     fn native_reader<T: CloneStableDeref<Target = [u8]> + Debug>(
487         bytes: T,
488     ) -> EndianReader<NativeEndian, T> {
489         EndianReader::new(bytes, NativeEndian)
490     }
491 
492     const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
493 
494     #[test]
test_reader_split()495     fn test_reader_split() {
496         let mut reader = native_reader(BUF);
497         let left = reader.split(3).unwrap();
498         assert_eq!(left, native_reader(&BUF[..3]));
499         assert_eq!(reader, native_reader(&BUF[3..]));
500     }
501 
502     #[test]
test_reader_split_out_of_bounds()503     fn test_reader_split_out_of_bounds() {
504         let mut reader = native_reader(BUF);
505         assert!(reader.split(30).is_err());
506     }
507 
508     #[test]
bytes_and_len_and_range_and_eq()509     fn bytes_and_len_and_range_and_eq() {
510         let reader = native_reader(BUF);
511         assert_eq!(reader.len(), BUF.len());
512         assert_eq!(reader.bytes(), BUF);
513         assert_eq!(reader, native_reader(BUF));
514 
515         let range = reader.range(2..8);
516         let buf_range = &BUF[2..8];
517         assert_eq!(range.len(), buf_range.len());
518         assert_eq!(range.bytes(), buf_range);
519         assert_ne!(range, native_reader(BUF));
520         assert_eq!(range, native_reader(buf_range));
521 
522         let range_from = range.range_from(1..);
523         let buf_range_from = &buf_range[1..];
524         assert_eq!(range_from.len(), buf_range_from.len());
525         assert_eq!(range_from.bytes(), buf_range_from);
526         assert_ne!(range_from, native_reader(BUF));
527         assert_eq!(range_from, native_reader(buf_range_from));
528 
529         let range_to = range_from.range_to(..4);
530         let buf_range_to = &buf_range_from[..4];
531         assert_eq!(range_to.len(), buf_range_to.len());
532         assert_eq!(range_to.bytes(), buf_range_to);
533         assert_ne!(range_to, native_reader(BUF));
534         assert_eq!(range_to, native_reader(buf_range_to));
535     }
536 
537     #[test]
find()538     fn find() {
539         let mut reader = native_reader(BUF);
540         reader.skip(2).unwrap();
541         assert_eq!(
542             reader.find(5),
543             Ok(BUF[2..].iter().position(|x| *x == 5).unwrap())
544         );
545     }
546 
547     #[test]
indexing()548     fn indexing() {
549         let mut reader = native_reader(BUF);
550         reader.skip(2).unwrap();
551         assert_eq!(reader[0], BUF[2]);
552     }
553 
554     #[test]
555     #[should_panic]
indexing_out_of_bounds()556     fn indexing_out_of_bounds() {
557         let mut reader = native_reader(BUF);
558         reader.skip(2).unwrap();
559         let _ = reader[900];
560     }
561 
562     #[test]
endian()563     fn endian() {
564         let reader = native_reader(BUF);
565         assert_eq!(reader.endian(), NativeEndian);
566     }
567 
568     #[test]
empty()569     fn empty() {
570         let mut reader = native_reader(BUF);
571         assert!(!reader.is_empty());
572         reader.empty();
573         assert!(reader.is_empty());
574         assert!(reader.bytes().is_empty());
575     }
576 
577     #[test]
truncate()578     fn truncate() {
579         let reader = native_reader(BUF);
580         let mut reader = reader.range(2..8);
581         reader.truncate(2).unwrap();
582         assert_eq!(reader.bytes(), &BUF[2..4]);
583     }
584 
585     #[test]
offset_from()586     fn offset_from() {
587         let reader = native_reader(BUF);
588         let sub = reader.range(2..8);
589         assert_eq!(sub.offset_from(&reader), 2);
590     }
591 
592     #[test]
skip()593     fn skip() {
594         let mut reader = native_reader(BUF);
595         reader.skip(2).unwrap();
596         assert_eq!(reader.bytes(), &BUF[2..]);
597     }
598 
599     #[test]
to_slice()600     fn to_slice() {
601         assert_eq!(
602             native_reader(BUF).range(2..5).to_slice(),
603             Ok(Cow::from(&BUF[2..5]))
604         );
605     }
606 
607     #[test]
to_string_ok()608     fn to_string_ok() {
609         let buf = b"hello, world!";
610         let reader = native_reader(&buf[..]);
611         let reader = reader.range_from(7..);
612         assert_eq!(reader.to_string(), Ok(Cow::from("world!")));
613     }
614 
615     // The rocket emoji (�� = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one
616     // to make it invalid UTF-8.
617     const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0];
618 
619     #[test]
to_string_err()620     fn to_string_err() {
621         let reader = native_reader(BAD_UTF8);
622         assert!(reader.to_string().is_err());
623     }
624 
625     #[test]
to_string_lossy()626     fn to_string_lossy() {
627         let reader = native_reader(BAD_UTF8);
628         assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����")));
629     }
630 
631     #[test]
read_u8_array()632     fn read_u8_array() {
633         let mut reader = native_reader(BAD_UTF8);
634         reader.skip(1).unwrap();
635         let arr: [u8; 2] = reader.read_u8_array().unwrap();
636         assert_eq!(arr, &BAD_UTF8[1..3]);
637         assert_eq!(reader.bytes(), &BAD_UTF8[3..]);
638     }
639 }
640