1 //! Defining custom `Reader`s quickly. 2 3 use alloc::borrow::Cow; 4 use alloc::rc::Rc; 5 use alloc::string::String; 6 use alloc::sync::Arc; 7 use core::fmt::Debug; 8 use core::ops::{Deref, Index, Range, RangeFrom, RangeTo}; 9 use core::slice; 10 use core::str; 11 use stable_deref_trait::CloneStableDeref; 12 13 use crate::endianity::Endianity; 14 use crate::read::{Error, Reader, ReaderOffsetId, Result}; 15 16 /// A reference counted, non-thread-safe slice of bytes and associated 17 /// endianity. 18 /// 19 /// ``` 20 /// # #[cfg(feature = "std")] { 21 /// use std::rc::Rc; 22 /// 23 /// let buf = Rc::from(&[1, 2, 3, 4][..]); 24 /// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian); 25 /// # let _ = reader; 26 /// # } 27 /// ``` 28 pub type EndianRcSlice<Endian> = EndianReader<Endian, Rc<[u8]>>; 29 30 /// An atomically reference counted, thread-safe slice of bytes and associated 31 /// endianity. 32 /// 33 /// ``` 34 /// # #[cfg(feature = "std")] { 35 /// use std::sync::Arc; 36 /// 37 /// let buf = Arc::from(&[1, 2, 3, 4][..]); 38 /// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian); 39 /// # let _ = reader; 40 /// # } 41 /// ``` 42 pub type EndianArcSlice<Endian> = EndianReader<Endian, Arc<[u8]>>; 43 44 /// An easy way to define a custom `Reader` implementation with a reference to a 45 /// generic buffer of bytes and an associated endianity. 46 /// 47 /// Note that the whole original buffer is kept alive in memory even if there is 48 /// only one reader that references only a handful of bytes from that original 49 /// buffer. That is, `EndianReader` will not do any copying, moving, or 50 /// compacting in order to free up unused regions of the original buffer. If you 51 /// require this kind of behavior, it is up to you to implement `Reader` 52 /// directly by-hand. 53 /// 54 /// # Example 55 /// 56 /// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`. 57 /// You can wrap that `mmap`ed file up in a `MmapFile` type and use 58 /// `EndianReader<Rc<MmapFile>>` or `EndianReader<Arc<MmapFile>>` as readers as 59 /// long as `MmapFile` dereferences to the underlying `[u8]` data. 60 /// 61 /// ``` 62 /// use std::io; 63 /// use std::ops::Deref; 64 /// use std::path::Path; 65 /// use std::slice; 66 /// use std::sync::Arc; 67 /// 68 /// /// A type that represents an `mmap`ed file. 69 /// #[derive(Debug)] 70 /// pub struct MmapFile { 71 /// ptr: *const u8, 72 /// len: usize, 73 /// } 74 /// 75 /// impl MmapFile { 76 /// pub fn new(path: &Path) -> io::Result<MmapFile> { 77 /// // Call `mmap` and check for errors and all that... 78 /// # unimplemented!() 79 /// } 80 /// } 81 /// 82 /// impl Drop for MmapFile { 83 /// fn drop(&mut self) { 84 /// // Call `munmap` to clean up after ourselves... 85 /// # unimplemented!() 86 /// } 87 /// } 88 /// 89 /// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory. 90 /// impl Deref for MmapFile { 91 /// type Target = [u8]; 92 /// fn deref(&self) -> &[u8] { 93 /// unsafe { 94 /// slice::from_raw_parts(self.ptr, self.len) 95 /// } 96 /// } 97 /// } 98 /// 99 /// /// A type that represents a shared `mmap`ed file. 100 /// #[derive(Debug, Clone)] 101 /// pub struct ArcMmapFile(Arc<MmapFile>); 102 /// 103 /// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory. 104 /// impl Deref for ArcMmapFile { 105 /// type Target = [u8]; 106 /// fn deref(&self) -> &[u8] { 107 /// &self.0 108 /// } 109 /// } 110 /// 111 /// // These are both valid for any `Rc` or `Arc`. 112 /// unsafe impl gimli::StableDeref for ArcMmapFile {} 113 /// unsafe impl gimli::CloneStableDeref for ArcMmapFile {} 114 /// 115 /// /// A `gimli::Reader` that is backed by an `mmap`ed file! 116 /// pub type MmapFileReader<Endian> = gimli::EndianReader<Endian, ArcMmapFile>; 117 /// # fn test(_: &MmapFileReader<gimli::NativeEndian>) { } 118 /// ``` 119 #[derive(Debug, Clone, Copy, Hash)] 120 pub struct EndianReader<Endian, T> 121 where 122 Endian: Endianity, 123 T: CloneStableDeref<Target = [u8]> + Debug, 124 { 125 range: SubRange<T>, 126 endian: Endian, 127 } 128 129 impl<Endian, T1, T2> PartialEq<EndianReader<Endian, T2>> for EndianReader<Endian, T1> 130 where 131 Endian: Endianity, 132 T1: CloneStableDeref<Target = [u8]> + Debug, 133 T2: CloneStableDeref<Target = [u8]> + Debug, 134 { eq(&self, rhs: &EndianReader<Endian, T2>) -> bool135 fn eq(&self, rhs: &EndianReader<Endian, T2>) -> bool { 136 self.bytes() == rhs.bytes() 137 } 138 } 139 140 impl<Endian, T> Eq for EndianReader<Endian, T> 141 where 142 Endian: Endianity, 143 T: CloneStableDeref<Target = [u8]> + Debug, 144 { 145 } 146 147 // This is separated out from `EndianReader` so that we can avoid running afoul 148 // of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call 149 // `self.endian.read_whatever` on the result. The problem is that the returned 150 // slice keeps the `&mut self` borrow active, so we wouldn't be able to access 151 // `self.endian`. Splitting the sub-range out from the endian lets us work 152 // around this, making it so that only the `self.range` borrow is held active, 153 // not all of `self`. 154 // 155 // This also serves to encapsulate the unsafe code concerning `CloneStableDeref`. 156 // The `bytes` member is held so that the bytes live long enough, and the 157 // `CloneStableDeref` ensures these bytes never move. The `ptr` and `len` 158 // members point inside `bytes`, and are updated during read operations. 159 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 160 struct SubRange<T> 161 where 162 T: CloneStableDeref<Target = [u8]> + Debug, 163 { 164 bytes: T, 165 ptr: *const u8, 166 len: usize, 167 } 168 169 unsafe impl<T> Send for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Send {} 170 171 unsafe impl<T> Sync for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Sync {} 172 173 impl<T> SubRange<T> 174 where 175 T: CloneStableDeref<Target = [u8]> + Debug, 176 { 177 #[inline] new(bytes: T) -> Self178 fn new(bytes: T) -> Self { 179 let ptr = bytes.as_ptr(); 180 let len = bytes.len(); 181 SubRange { bytes, ptr, len } 182 } 183 184 #[inline] bytes(&self) -> &[u8]185 fn bytes(&self) -> &[u8] { 186 // Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified, 187 // and all operations that modify `ptr` and `len` ensure they stay in range. 188 unsafe { slice::from_raw_parts(self.ptr, self.len) } 189 } 190 191 #[inline] len(&self) -> usize192 fn len(&self) -> usize { 193 self.len 194 } 195 196 #[inline] truncate(&mut self, len: usize)197 fn truncate(&mut self, len: usize) { 198 assert!(len <= self.len); 199 self.len = len; 200 } 201 202 #[inline] skip(&mut self, len: usize)203 fn skip(&mut self, len: usize) { 204 assert!(len <= self.len); 205 self.ptr = unsafe { self.ptr.add(len) }; 206 self.len -= len; 207 } 208 209 #[inline] read_slice(&mut self, len: usize) -> Option<&[u8]>210 fn read_slice(&mut self, len: usize) -> Option<&[u8]> { 211 if self.len() < len { 212 None 213 } else { 214 // Same as for `bytes()`. 215 let bytes = unsafe { slice::from_raw_parts(self.ptr, len) }; 216 self.skip(len); 217 Some(bytes) 218 } 219 } 220 } 221 222 impl<Endian, T> EndianReader<Endian, T> 223 where 224 Endian: Endianity, 225 T: CloneStableDeref<Target = [u8]> + Debug, 226 { 227 /// Construct a new `EndianReader` with the given bytes. 228 #[inline] new(bytes: T, endian: Endian) -> EndianReader<Endian, T>229 pub fn new(bytes: T, endian: Endian) -> EndianReader<Endian, T> { 230 EndianReader { 231 range: SubRange::new(bytes), 232 endian, 233 } 234 } 235 236 /// Return a reference to the raw bytes underlying this reader. 237 #[inline] bytes(&self) -> &[u8]238 pub fn bytes(&self) -> &[u8] { 239 self.range.bytes() 240 } 241 } 242 243 /// # Range Methods 244 /// 245 /// Unfortunately, `std::ops::Index` *must* return a reference, so we can't 246 /// implement `Index<Range<usize>>` to return a new `EndianReader` the way we 247 /// would like to. Instead, we abandon fancy indexing operators and have these 248 /// plain old methods. 249 impl<Endian, T> EndianReader<Endian, T> 250 where 251 Endian: Endianity, 252 T: CloneStableDeref<Target = [u8]> + Debug, 253 { 254 /// Take the given `start..end` range of the underlying buffer and return a 255 /// new `EndianReader`. 256 /// 257 /// ``` 258 /// # #[cfg(feature = "std")] { 259 /// use gimli::{EndianReader, LittleEndian}; 260 /// use std::sync::Arc; 261 /// 262 /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); 263 /// let reader = EndianReader::new(buf.clone(), LittleEndian); 264 /// assert_eq!(reader.range(1..3), 265 /// EndianReader::new(&buf[1..3], LittleEndian)); 266 /// # } 267 /// ``` 268 /// 269 /// # Panics 270 /// 271 /// Panics if the range is out of bounds. range(&self, idx: Range<usize>) -> EndianReader<Endian, T>272 pub fn range(&self, idx: Range<usize>) -> EndianReader<Endian, T> { 273 let mut r = self.clone(); 274 r.range.skip(idx.start); 275 r.range.truncate(idx.len()); 276 r 277 } 278 279 /// Take the given `start..` range of the underlying buffer and return a new 280 /// `EndianReader`. 281 /// 282 /// ``` 283 /// # #[cfg(feature = "std")] { 284 /// use gimli::{EndianReader, LittleEndian}; 285 /// use std::sync::Arc; 286 /// 287 /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); 288 /// let reader = EndianReader::new(buf.clone(), LittleEndian); 289 /// assert_eq!(reader.range_from(2..), 290 /// EndianReader::new(&buf[2..], LittleEndian)); 291 /// # } 292 /// ``` 293 /// 294 /// # Panics 295 /// 296 /// Panics if the range is out of bounds. range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T>297 pub fn range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T> { 298 let mut r = self.clone(); 299 r.range.skip(idx.start); 300 r 301 } 302 303 /// Take the given `..end` range of the underlying buffer and return a new 304 /// `EndianReader`. 305 /// 306 /// ``` 307 /// # #[cfg(feature = "std")] { 308 /// use gimli::{EndianReader, LittleEndian}; 309 /// use std::sync::Arc; 310 /// 311 /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); 312 /// let reader = EndianReader::new(buf.clone(), LittleEndian); 313 /// assert_eq!(reader.range_to(..3), 314 /// EndianReader::new(&buf[..3], LittleEndian)); 315 /// # } 316 /// ``` 317 /// 318 /// # Panics 319 /// 320 /// Panics if the range is out of bounds. range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T>321 pub fn range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T> { 322 let mut r = self.clone(); 323 r.range.truncate(idx.end); 324 r 325 } 326 } 327 328 impl<Endian, T> Index<usize> for EndianReader<Endian, T> 329 where 330 Endian: Endianity, 331 T: CloneStableDeref<Target = [u8]> + Debug, 332 { 333 type Output = u8; index(&self, idx: usize) -> &Self::Output334 fn index(&self, idx: usize) -> &Self::Output { 335 &self.bytes()[idx] 336 } 337 } 338 339 impl<Endian, T> Index<RangeFrom<usize>> for EndianReader<Endian, T> 340 where 341 Endian: Endianity, 342 T: CloneStableDeref<Target = [u8]> + Debug, 343 { 344 type Output = [u8]; index(&self, idx: RangeFrom<usize>) -> &Self::Output345 fn index(&self, idx: RangeFrom<usize>) -> &Self::Output { 346 &self.bytes()[idx] 347 } 348 } 349 350 impl<Endian, T> Deref for EndianReader<Endian, T> 351 where 352 Endian: Endianity, 353 T: CloneStableDeref<Target = [u8]> + Debug, 354 { 355 type Target = [u8]; deref(&self) -> &Self::Target356 fn deref(&self) -> &Self::Target { 357 self.bytes() 358 } 359 } 360 361 impl<Endian, T> Reader for EndianReader<Endian, T> 362 where 363 Endian: Endianity, 364 T: CloneStableDeref<Target = [u8]> + Debug, 365 { 366 type Endian = Endian; 367 type Offset = usize; 368 369 #[inline] endian(&self) -> Endian370 fn endian(&self) -> Endian { 371 self.endian 372 } 373 374 #[inline] len(&self) -> usize375 fn len(&self) -> usize { 376 self.range.len() 377 } 378 379 #[inline] empty(&mut self)380 fn empty(&mut self) { 381 self.range.truncate(0); 382 } 383 384 #[inline] truncate(&mut self, len: usize) -> Result<()>385 fn truncate(&mut self, len: usize) -> Result<()> { 386 if self.len() < len { 387 Err(Error::UnexpectedEof(self.offset_id())) 388 } else { 389 self.range.truncate(len); 390 Ok(()) 391 } 392 } 393 394 #[inline] offset_from(&self, base: &EndianReader<Endian, T>) -> usize395 fn offset_from(&self, base: &EndianReader<Endian, T>) -> usize { 396 let base_ptr = base.bytes().as_ptr() as *const u8 as usize; 397 let ptr = self.bytes().as_ptr() as *const u8 as usize; 398 debug_assert!(base_ptr <= ptr); 399 debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len()); 400 ptr - base_ptr 401 } 402 403 #[inline] offset_id(&self) -> ReaderOffsetId404 fn offset_id(&self) -> ReaderOffsetId { 405 ReaderOffsetId(self.bytes().as_ptr() as u64) 406 } 407 408 #[inline] lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset>409 fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset> { 410 let id = id.0; 411 let self_id = self.bytes().as_ptr() as u64; 412 let self_len = self.bytes().len() as u64; 413 if id >= self_id && id <= self_id + self_len { 414 Some((id - self_id) as usize) 415 } else { 416 None 417 } 418 } 419 420 #[inline] find(&self, byte: u8) -> Result<usize>421 fn find(&self, byte: u8) -> Result<usize> { 422 self.bytes() 423 .iter() 424 .position(|x| *x == byte) 425 .ok_or_else(|| Error::UnexpectedEof(self.offset_id())) 426 } 427 428 #[inline] skip(&mut self, len: usize) -> Result<()>429 fn skip(&mut self, len: usize) -> Result<()> { 430 if self.len() < len { 431 Err(Error::UnexpectedEof(self.offset_id())) 432 } else { 433 self.range.skip(len); 434 Ok(()) 435 } 436 } 437 438 #[inline] split(&mut self, len: usize) -> Result<Self>439 fn split(&mut self, len: usize) -> Result<Self> { 440 if self.len() < len { 441 Err(Error::UnexpectedEof(self.offset_id())) 442 } else { 443 let mut r = self.clone(); 444 r.range.truncate(len); 445 self.range.skip(len); 446 Ok(r) 447 } 448 } 449 450 #[inline] to_slice(&self) -> Result<Cow<[u8]>>451 fn to_slice(&self) -> Result<Cow<[u8]>> { 452 Ok(self.bytes().into()) 453 } 454 455 #[inline] to_string(&self) -> Result<Cow<str>>456 fn to_string(&self) -> Result<Cow<str>> { 457 match str::from_utf8(self.bytes()) { 458 Ok(s) => Ok(s.into()), 459 _ => Err(Error::BadUtf8), 460 } 461 } 462 463 #[inline] to_string_lossy(&self) -> Result<Cow<str>>464 fn to_string_lossy(&self) -> Result<Cow<str>> { 465 Ok(String::from_utf8_lossy(self.bytes())) 466 } 467 468 #[inline] read_slice(&mut self, buf: &mut [u8]) -> Result<()>469 fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { 470 match self.range.read_slice(buf.len()) { 471 Some(slice) => { 472 buf.copy_from_slice(slice); 473 Ok(()) 474 } 475 None => Err(Error::UnexpectedEof(self.offset_id())), 476 } 477 } 478 } 479 480 #[cfg(test)] 481 mod tests { 482 use super::*; 483 use crate::endianity::NativeEndian; 484 use crate::read::Reader; 485 native_reader<T: CloneStableDeref<Target = [u8]> + Debug>( bytes: T, ) -> EndianReader<NativeEndian, T>486 fn native_reader<T: CloneStableDeref<Target = [u8]> + Debug>( 487 bytes: T, 488 ) -> EndianReader<NativeEndian, T> { 489 EndianReader::new(bytes, NativeEndian) 490 } 491 492 const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; 493 494 #[test] test_reader_split()495 fn test_reader_split() { 496 let mut reader = native_reader(BUF); 497 let left = reader.split(3).unwrap(); 498 assert_eq!(left, native_reader(&BUF[..3])); 499 assert_eq!(reader, native_reader(&BUF[3..])); 500 } 501 502 #[test] test_reader_split_out_of_bounds()503 fn test_reader_split_out_of_bounds() { 504 let mut reader = native_reader(BUF); 505 assert!(reader.split(30).is_err()); 506 } 507 508 #[test] bytes_and_len_and_range_and_eq()509 fn bytes_and_len_and_range_and_eq() { 510 let reader = native_reader(BUF); 511 assert_eq!(reader.len(), BUF.len()); 512 assert_eq!(reader.bytes(), BUF); 513 assert_eq!(reader, native_reader(BUF)); 514 515 let range = reader.range(2..8); 516 let buf_range = &BUF[2..8]; 517 assert_eq!(range.len(), buf_range.len()); 518 assert_eq!(range.bytes(), buf_range); 519 assert_ne!(range, native_reader(BUF)); 520 assert_eq!(range, native_reader(buf_range)); 521 522 let range_from = range.range_from(1..); 523 let buf_range_from = &buf_range[1..]; 524 assert_eq!(range_from.len(), buf_range_from.len()); 525 assert_eq!(range_from.bytes(), buf_range_from); 526 assert_ne!(range_from, native_reader(BUF)); 527 assert_eq!(range_from, native_reader(buf_range_from)); 528 529 let range_to = range_from.range_to(..4); 530 let buf_range_to = &buf_range_from[..4]; 531 assert_eq!(range_to.len(), buf_range_to.len()); 532 assert_eq!(range_to.bytes(), buf_range_to); 533 assert_ne!(range_to, native_reader(BUF)); 534 assert_eq!(range_to, native_reader(buf_range_to)); 535 } 536 537 #[test] find()538 fn find() { 539 let mut reader = native_reader(BUF); 540 reader.skip(2).unwrap(); 541 assert_eq!( 542 reader.find(5), 543 Ok(BUF[2..].iter().position(|x| *x == 5).unwrap()) 544 ); 545 } 546 547 #[test] indexing()548 fn indexing() { 549 let mut reader = native_reader(BUF); 550 reader.skip(2).unwrap(); 551 assert_eq!(reader[0], BUF[2]); 552 } 553 554 #[test] 555 #[should_panic] indexing_out_of_bounds()556 fn indexing_out_of_bounds() { 557 let mut reader = native_reader(BUF); 558 reader.skip(2).unwrap(); 559 let _ = reader[900]; 560 } 561 562 #[test] endian()563 fn endian() { 564 let reader = native_reader(BUF); 565 assert_eq!(reader.endian(), NativeEndian); 566 } 567 568 #[test] empty()569 fn empty() { 570 let mut reader = native_reader(BUF); 571 assert!(!reader.is_empty()); 572 reader.empty(); 573 assert!(reader.is_empty()); 574 assert!(reader.bytes().is_empty()); 575 } 576 577 #[test] truncate()578 fn truncate() { 579 let reader = native_reader(BUF); 580 let mut reader = reader.range(2..8); 581 reader.truncate(2).unwrap(); 582 assert_eq!(reader.bytes(), &BUF[2..4]); 583 } 584 585 #[test] offset_from()586 fn offset_from() { 587 let reader = native_reader(BUF); 588 let sub = reader.range(2..8); 589 assert_eq!(sub.offset_from(&reader), 2); 590 } 591 592 #[test] skip()593 fn skip() { 594 let mut reader = native_reader(BUF); 595 reader.skip(2).unwrap(); 596 assert_eq!(reader.bytes(), &BUF[2..]); 597 } 598 599 #[test] to_slice()600 fn to_slice() { 601 assert_eq!( 602 native_reader(BUF).range(2..5).to_slice(), 603 Ok(Cow::from(&BUF[2..5])) 604 ); 605 } 606 607 #[test] to_string_ok()608 fn to_string_ok() { 609 let buf = b"hello, world!"; 610 let reader = native_reader(&buf[..]); 611 let reader = reader.range_from(7..); 612 assert_eq!(reader.to_string(), Ok(Cow::from("world!"))); 613 } 614 615 // The rocket emoji ( = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one 616 // to make it invalid UTF-8. 617 const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0]; 618 619 #[test] to_string_err()620 fn to_string_err() { 621 let reader = native_reader(BAD_UTF8); 622 assert!(reader.to_string().is_err()); 623 } 624 625 #[test] to_string_lossy()626 fn to_string_lossy() { 627 let reader = native_reader(BAD_UTF8); 628 assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����"))); 629 } 630 631 #[test] read_u8_array()632 fn read_u8_array() { 633 let mut reader = native_reader(BAD_UTF8); 634 reader.skip(1).unwrap(); 635 let arr: [u8; 2] = reader.read_u8_array().unwrap(); 636 assert_eq!(arr, &BAD_UTF8[1..3]); 637 assert_eq!(reader.bytes(), &BAD_UTF8[3..]); 638 } 639 } 640