1 use std::{ 2 io::{self, Read, Seek, SeekFrom}, 3 mem::size_of, 4 }; 5 6 use crate::{ 7 date::{Date, InfiniteOrNanDate}, 8 error::{Error, ErrorKind}, 9 stream::{Event, OwnedEvent}, 10 u64_to_usize, Uid, 11 }; 12 13 struct StackItem { 14 object_ref: u64, 15 child_object_refs: Vec<u64>, 16 ty: StackType, 17 } 18 19 enum StackType { 20 Array, 21 Dict, 22 } 23 24 // https://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c 25 // https://hg.python.org/cpython/file/3.4/Lib/plistlib.py 26 pub struct BinaryReader<R> { 27 stack: Vec<StackItem>, 28 object_offsets: Vec<u64>, 29 object_on_stack: Vec<bool>, 30 reader: PosReader<R>, 31 ref_size: u8, 32 root_object: u64, 33 trailer_start_offset: u64, 34 } 35 36 struct PosReader<R> { 37 reader: R, 38 pos: u64, 39 } 40 41 impl<R: Read + Seek> PosReader<R> { read_all(&mut self, buf: &mut [u8]) -> Result<(), Error>42 fn read_all(&mut self, buf: &mut [u8]) -> Result<(), Error> { 43 self.read_exact(buf) 44 .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?; 45 Ok(()) 46 } 47 seek(&mut self, pos: SeekFrom) -> Result<u64, Error>48 fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> { 49 self.pos = self 50 .reader 51 .seek(pos) 52 .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?; 53 Ok(self.pos) 54 } 55 } 56 57 impl<R: Read> Read for PosReader<R> { read(&mut self, buf: &mut [u8]) -> io::Result<usize>58 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 59 let count = self.reader.read(buf)?; 60 self.pos 61 .checked_add(count as u64) 62 .expect("file cannot be larger than `u64::max_value()` bytes"); 63 Ok(count) 64 } 65 } 66 67 impl<R: Read + Seek> BinaryReader<R> { new(reader: R) -> BinaryReader<R>68 pub fn new(reader: R) -> BinaryReader<R> { 69 BinaryReader { 70 stack: Vec::new(), 71 object_offsets: Vec::new(), 72 object_on_stack: Vec::new(), 73 reader: PosReader { reader, pos: 0 }, 74 ref_size: 0, 75 root_object: 0, 76 trailer_start_offset: 0, 77 } 78 } 79 allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error>80 fn allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error> { 81 // Check we are not reading past the start of the plist trailer 82 let inner = |len: u64, size: usize| { 83 let byte_len = len.checked_mul(size as u64)?; 84 let end_offset = self.reader.pos.checked_add(byte_len)?; 85 if end_offset <= self.trailer_start_offset { 86 Some(()) 87 } else { 88 None 89 } 90 }; 91 inner(len, size).ok_or_else(|| self.with_pos(ErrorKind::ObjectOffsetTooLarge))?; 92 93 Ok(Vec::with_capacity(len as usize)) 94 } 95 read_trailer(&mut self) -> Result<(), Error>96 fn read_trailer(&mut self) -> Result<(), Error> { 97 self.reader.seek(SeekFrom::Start(0))?; 98 let mut magic = [0; 8]; 99 self.reader.read_all(&mut magic)?; 100 if &magic != b"bplist00" { 101 return Err(self.with_pos(ErrorKind::InvalidMagic)); 102 } 103 104 self.trailer_start_offset = self.reader.seek(SeekFrom::End(-32))?; 105 106 // Trailer starts with 6 bytes of padding 107 let mut zeros = [0; 6]; 108 self.reader.read_all(&mut zeros)?; 109 110 let offset_size = self.read_u8()?; 111 match offset_size { 112 1 | 2 | 4 | 8 => (), 113 _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectOffsetSize)), 114 } 115 116 self.ref_size = self.read_u8()?; 117 match self.ref_size { 118 1 | 2 | 4 | 8 => (), 119 _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectReferenceSize)), 120 } 121 122 let num_objects = self.read_be_u64()?; 123 self.root_object = self.read_be_u64()?; 124 let offset_table_offset = self.read_be_u64()?; 125 126 // Read offset table 127 self.reader.seek(SeekFrom::Start(offset_table_offset))?; 128 self.object_offsets = self.read_ints(num_objects, offset_size)?; 129 self.object_on_stack = vec![false; self.object_offsets.len()]; 130 131 Ok(()) 132 } 133 134 /// Reads a list of `len` big-endian integers of `size` bytes from the reader. read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error>135 fn read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error> { 136 let mut ints = self.allocate_vec(len, size as usize)?; 137 for _ in 0..len { 138 match size { 139 1 => ints.push(self.read_u8()?.into()), 140 2 => ints.push(self.read_be_u16()?.into()), 141 4 => ints.push(self.read_be_u32()?.into()), 142 8 => ints.push(self.read_be_u64()?), 143 _ => unreachable!("size is either self.ref_size or offset_size both of which are already validated") 144 } 145 } 146 Ok(ints) 147 } 148 149 /// Reads a list of `len` offsets into the object table from the reader. read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error>150 fn read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error> { 151 let ref_size = self.ref_size; 152 self.read_ints(len, ref_size) 153 } 154 155 /// Reads a compressed value length from the reader. `len` must contain the low 4 bits of the 156 /// object token. read_object_len(&mut self, len: u8) -> Result<u64, Error>157 fn read_object_len(&mut self, len: u8) -> Result<u64, Error> { 158 if (len & 0x0f) == 0x0f { 159 let len_power_of_two = self.read_u8()? & 0x03; 160 Ok(match len_power_of_two { 161 0 => self.read_u8()?.into(), 162 1 => self.read_be_u16()?.into(), 163 2 => self.read_be_u32()?.into(), 164 3 => self.read_be_u64()?, 165 _ => return Err(self.with_pos(ErrorKind::InvalidObjectLength)), 166 }) 167 } else { 168 Ok(len.into()) 169 } 170 } 171 172 /// Reads `len` bytes from the reader. read_data(&mut self, len: u64) -> Result<Vec<u8>, Error>173 fn read_data(&mut self, len: u64) -> Result<Vec<u8>, Error> { 174 let mut data = self.allocate_vec(len, size_of::<u8>())?; 175 data.resize(len as usize, 0); 176 self.reader.read_all(&mut data)?; 177 Ok(data) 178 } 179 seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error>180 fn seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error> { 181 let object_ref = u64_to_usize(object_ref) 182 .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?; 183 let offset = *self 184 .object_offsets 185 .get(object_ref) 186 .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?; 187 if offset >= self.trailer_start_offset { 188 return Err(self.with_pos(ErrorKind::ObjectOffsetTooLarge)); 189 } 190 Ok(self.reader.seek(SeekFrom::Start(offset))?) 191 } 192 push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error>193 fn push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error> { 194 let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error"); 195 let is_on_stack = &mut self.object_on_stack[object_ref]; 196 if *is_on_stack { 197 return Err(self.with_pos(ErrorKind::RecursiveObject)); 198 } 199 *is_on_stack = true; 200 self.stack.push(item); 201 Ok(()) 202 } 203 pop_stack_item(&mut self) -> StackItem204 fn pop_stack_item(&mut self) -> StackItem { 205 let item = self.stack.pop().expect("internal consistency error"); 206 let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error"); 207 self.object_on_stack[object_ref] = false; 208 item 209 } 210 read_next(&mut self) -> Result<Option<OwnedEvent>, Error>211 fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> { 212 let object_ref = if self.ref_size == 0 { 213 // Initialise here rather than in new 214 self.read_trailer()?; 215 self.root_object 216 } else { 217 let maybe_object_ref = if let Some(stack_item) = self.stack.last_mut() { 218 stack_item.child_object_refs.pop() 219 } else { 220 // Finished reading the plist 221 return Ok(None); 222 }; 223 224 if let Some(object_ref) = maybe_object_ref { 225 object_ref 226 } else { 227 // We're at the end of an array or dict. Pop the top stack item and return. 228 let stack_item = self.pop_stack_item(); 229 match stack_item.ty { 230 StackType::Array | StackType::Dict => return Ok(Some(Event::EndCollection)), 231 } 232 } 233 }; 234 235 self.seek_to_object(object_ref)?; 236 237 let token = self.read_u8()?; 238 let ty = (token & 0xf0) >> 4; 239 let size = token & 0x0f; 240 241 let result = match (ty, size) { 242 (0x0, 0x00) => return Err(self.with_pos(ErrorKind::NullObjectUnimplemented)), 243 (0x0, 0x08) => Some(Event::Boolean(false)), 244 (0x0, 0x09) => Some(Event::Boolean(true)), 245 (0x0, 0x0f) => return Err(self.with_pos(ErrorKind::FillObjectUnimplemented)), 246 (0x1, 0) => Some(Event::Integer(self.read_u8()?.into())), 247 (0x1, 1) => Some(Event::Integer(self.read_be_u16()?.into())), 248 (0x1, 2) => Some(Event::Integer(self.read_be_u32()?.into())), 249 (0x1, 3) => Some(Event::Integer(self.read_be_i64()?.into())), 250 (0x1, 4) => { 251 let value = self.read_be_i128()?; 252 if value < 0 || value > u64::max_value().into() { 253 return Err(self.with_pos(ErrorKind::IntegerOutOfRange)); 254 } 255 Some(Event::Integer((value as u64).into())) 256 } 257 (0x1, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // variable length int 258 (0x2, 2) => Some(Event::Real(f32::from_bits(self.read_be_u32()?).into())), 259 (0x2, 3) => Some(Event::Real(f64::from_bits(self.read_be_u64()?))), 260 (0x2, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // odd length float 261 (0x3, 3) => { 262 // Date. Seconds since 1/1/2001 00:00:00. 263 let secs = f64::from_bits(self.read_be_u64()?); 264 let date = Date::from_seconds_since_plist_epoch(secs) 265 .map_err(|InfiniteOrNanDate| self.with_pos(ErrorKind::InfiniteOrNanDate))?; 266 Some(Event::Date(date)) 267 } 268 (0x4, n) => { 269 // Data 270 let len = self.read_object_len(n)?; 271 Some(Event::Data(self.read_data(len)?.into())) 272 } 273 (0x5, n) => { 274 // ASCII string 275 let len = self.read_object_len(n)?; 276 let raw = self.read_data(len)?; 277 let string = String::from_utf8(raw) 278 .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String))?; 279 Some(Event::String(string.into())) 280 } 281 (0x6, n) => { 282 // UTF-16 string 283 let len_utf16_codepoints = self.read_object_len(n)?; 284 let mut raw_utf16 = self.allocate_vec(len_utf16_codepoints, size_of::<u16>())?; 285 286 for _ in 0..len_utf16_codepoints { 287 raw_utf16.push(self.read_be_u16()?); 288 } 289 290 let string = String::from_utf16(&raw_utf16) 291 .map_err(|_| self.with_pos(ErrorKind::InvalidUtf16String))?; 292 Some(Event::String(string.into())) 293 } 294 (0x8, n) if n < 8 => { 295 // Uid 296 let mut buf = [0; 8]; 297 // `len_bytes` is at most 8. 298 let len_bytes = n as usize + 1; 299 // Values are stored in big-endian so we must put the least significant bytes at 300 // the end of the buffer. 301 self.reader.read_all(&mut buf[8 - len_bytes..])?; 302 let value = u64::from_be_bytes(buf); 303 304 Some(Event::Uid(Uid::new(value))) 305 } 306 (0xa, n) => { 307 // Array 308 let len = self.read_object_len(n)?; 309 let mut child_object_refs = self.read_refs(len)?; 310 // Reverse so we can pop off the end of the stack in order 311 child_object_refs.reverse(); 312 313 self.push_stack_item_and_check_for_recursion(StackItem { 314 object_ref, 315 ty: StackType::Array, 316 child_object_refs, 317 })?; 318 319 Some(Event::StartArray(Some(len))) 320 } 321 (0xd, n) => { 322 // Dict 323 let len = self.read_object_len(n)?; 324 let key_refs = self.read_refs(len)?; 325 let value_refs = self.read_refs(len)?; 326 327 let keys_and_values_len = len 328 .checked_mul(2) 329 .ok_or_else(|| self.with_pos(ErrorKind::ObjectTooLarge))?; 330 let mut child_object_refs = 331 self.allocate_vec(keys_and_values_len, self.ref_size as usize)?; 332 let len = key_refs.len(); 333 for i in 1..=len { 334 // Reverse so we can pop off the end of the stack in order 335 child_object_refs.push(value_refs[len - i]); 336 child_object_refs.push(key_refs[len - i]); 337 } 338 339 self.push_stack_item_and_check_for_recursion(StackItem { 340 object_ref, 341 ty: StackType::Dict, 342 child_object_refs, 343 })?; 344 345 Some(Event::StartDictionary(Some(len as u64))) 346 } 347 (_, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), 348 }; 349 350 Ok(result) 351 } 352 read_u8(&mut self) -> Result<u8, Error>353 fn read_u8(&mut self) -> Result<u8, Error> { 354 let mut buf = [0; 1]; 355 self.reader.read_all(&mut buf)?; 356 Ok(buf[0]) 357 } 358 read_be_u16(&mut self) -> Result<u16, Error>359 fn read_be_u16(&mut self) -> Result<u16, Error> { 360 let mut buf = [0; 2]; 361 self.reader.read_all(&mut buf)?; 362 Ok(u16::from_be_bytes(buf)) 363 } 364 read_be_u32(&mut self) -> Result<u32, Error>365 fn read_be_u32(&mut self) -> Result<u32, Error> { 366 let mut buf = [0; 4]; 367 self.reader.read_all(&mut buf)?; 368 Ok(u32::from_be_bytes(buf)) 369 } 370 read_be_u64(&mut self) -> Result<u64, Error>371 fn read_be_u64(&mut self) -> Result<u64, Error> { 372 let mut buf = [0; 8]; 373 self.reader.read_all(&mut buf)?; 374 Ok(u64::from_be_bytes(buf)) 375 } 376 read_be_i64(&mut self) -> Result<i64, Error>377 fn read_be_i64(&mut self) -> Result<i64, Error> { 378 let mut buf = [0; 8]; 379 self.reader.read_all(&mut buf)?; 380 Ok(i64::from_be_bytes(buf)) 381 } 382 read_be_i128(&mut self) -> Result<i128, Error>383 fn read_be_i128(&mut self) -> Result<i128, Error> { 384 let mut buf = [0; 16]; 385 self.reader.read_all(&mut buf)?; 386 Ok(i128::from_be_bytes(buf)) 387 } 388 with_pos(&self, kind: ErrorKind) -> Error389 fn with_pos(&self, kind: ErrorKind) -> Error { 390 kind.with_byte_offset(self.reader.pos) 391 } 392 } 393 394 impl<R: Read + Seek> Iterator for BinaryReader<R> { 395 type Item = Result<OwnedEvent, Error>; 396 next(&mut self) -> Option<Result<OwnedEvent, Error>>397 fn next(&mut self) -> Option<Result<OwnedEvent, Error>> { 398 match self.read_next() { 399 Ok(Some(event)) => Some(Ok(event)), 400 Err(err) => { 401 // Mark the plist as finished 402 self.stack.clear(); 403 Some(Err(err)) 404 } 405 Ok(None) => None, 406 } 407 } 408 } 409 410 #[cfg(test)] 411 mod tests { 412 use std::{fs::File, path::Path}; 413 414 use super::*; 415 use crate::{stream::Event, Uid}; 416 417 #[test] streaming_parser()418 fn streaming_parser() { 419 use crate::stream::Event::*; 420 421 let reader = File::open(&Path::new("./tests/data/binary.plist")).unwrap(); 422 let streaming_parser = BinaryReader::new(reader); 423 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); 424 425 let comparison = &[ 426 StartDictionary(Some(13)), 427 String("Author".into()), 428 String("William Shakespeare".into()), 429 String("Birthdate".into()), 430 Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()), 431 String("EmptyArray".into()), 432 StartArray(Some(0)), 433 EndCollection, 434 String("IsNotFalse".into()), 435 Boolean(false), 436 String("SmallestNumber".into()), 437 Integer((-9223372036854775808i64).into()), 438 String("EmptyDictionary".into()), 439 StartDictionary(Some(0)), 440 EndCollection, 441 String("Height".into()), 442 Real(1.6), 443 String("Lines".into()), 444 StartArray(Some(2)), 445 String("It is a tale told by an idiot,".into()), 446 String("Full of sound and fury, signifying nothing.".into()), 447 EndCollection, 448 String("Death".into()), 449 Integer(1564.into()), 450 String("Blank".into()), 451 String("".into()), 452 String("BiggestNumber".into()), 453 Integer(18446744073709551615u64.into()), 454 String("IsTrue".into()), 455 Boolean(true), 456 String("Data".into()), 457 Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()), 458 EndCollection, 459 ]; 460 461 assert_eq!(events, &comparison[..]); 462 } 463 464 #[test] utf16_plist()465 fn utf16_plist() { 466 let reader = File::open(&Path::new("./tests/data/utf16_bplist.plist")).unwrap(); 467 let streaming_parser = BinaryReader::new(reader); 468 let mut events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); 469 470 assert_eq!(events[2], Event::String("\u{2605} or better".into())); 471 472 let poem = if let Event::String(ref mut poem) = events[4] { 473 poem 474 } else { 475 panic!("not a string") 476 }; 477 assert_eq!(poem.len(), 643); 478 assert_eq!(poem.to_mut().pop().unwrap(), '\u{2605}'); 479 } 480 481 #[test] nskeyedarchiver_plist()482 fn nskeyedarchiver_plist() { 483 let reader = File::open(&Path::new("./tests/data/binary_NSKeyedArchiver.plist")).unwrap(); 484 let streaming_parser = BinaryReader::new(reader); 485 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); 486 487 assert_eq!(events[10], Event::Uid(Uid::new(4))); 488 assert_eq!(events[12], Event::Uid(Uid::new(2))); 489 assert_eq!(events[18], Event::Uid(Uid::new(3))); 490 assert_eq!(events[46], Event::Uid(Uid::new(1))); 491 } 492 } 493