1 use std::{
2     io::{self, Read, Seek, SeekFrom},
3     mem::size_of,
4 };
5 
6 use crate::{
7     date::{Date, InfiniteOrNanDate},
8     error::{Error, ErrorKind},
9     stream::Event,
10     u64_to_usize, Uid,
11 };
12 
13 struct StackItem {
14     object_ref: u64,
15     child_object_refs: Vec<u64>,
16     ty: StackType,
17 }
18 
19 enum StackType {
20     Array,
21     Dict,
22 }
23 
24 // https://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
25 // https://hg.python.org/cpython/file/3.4/Lib/plistlib.py
26 pub struct BinaryReader<R> {
27     stack: Vec<StackItem>,
28     object_offsets: Vec<u64>,
29     object_on_stack: Vec<bool>,
30     reader: PosReader<R>,
31     ref_size: u8,
32     root_object: u64,
33     trailer_start_offset: u64,
34 }
35 
36 struct PosReader<R> {
37     reader: R,
38     pos: u64,
39 }
40 
41 impl<R: Read + Seek> PosReader<R> {
read_all(&mut self, buf: &mut [u8]) -> Result<(), Error>42     fn read_all(&mut self, buf: &mut [u8]) -> Result<(), Error> {
43         self.read_exact(buf)
44             .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?;
45         Ok(())
46     }
47 
seek(&mut self, pos: SeekFrom) -> Result<u64, Error>48     fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
49         self.pos = self
50             .reader
51             .seek(pos)
52             .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?;
53         Ok(self.pos)
54     }
55 }
56 
57 impl<R: Read> Read for PosReader<R> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>58     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
59         let count = self.reader.read(buf)?;
60         self.pos
61             .checked_add(count as u64)
62             .expect("file cannot be larger than `u64::max_value()` bytes");
63         Ok(count)
64     }
65 }
66 
67 impl<R: Read + Seek> BinaryReader<R> {
new(reader: R) -> BinaryReader<R>68     pub fn new(reader: R) -> BinaryReader<R> {
69         BinaryReader {
70             stack: Vec::new(),
71             object_offsets: Vec::new(),
72             object_on_stack: Vec::new(),
73             reader: PosReader { reader, pos: 0 },
74             ref_size: 0,
75             root_object: 0,
76             trailer_start_offset: 0,
77         }
78     }
79 
allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error>80     fn allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error> {
81         // Check we are not reading past the start of the plist trailer
82         let inner = |len: u64, size: usize| {
83             let byte_len = len.checked_mul(size as u64)?;
84             let end_offset = self.reader.pos.checked_add(byte_len)?;
85             if end_offset <= self.trailer_start_offset {
86                 Some(())
87             } else {
88                 None
89             }
90         };
91         inner(len, size).ok_or_else(|| self.with_pos(ErrorKind::ObjectOffsetTooLarge))?;
92 
93         Ok(Vec::with_capacity(len as usize))
94     }
95 
read_trailer(&mut self) -> Result<(), Error>96     fn read_trailer(&mut self) -> Result<(), Error> {
97         self.reader.seek(SeekFrom::Start(0))?;
98         let mut magic = [0; 8];
99         self.reader.read_all(&mut magic)?;
100         if &magic != b"bplist00" {
101             return Err(self.with_pos(ErrorKind::InvalidMagic));
102         }
103 
104         self.trailer_start_offset = self.reader.seek(SeekFrom::End(-32))?;
105 
106         // Trailer starts with 6 bytes of padding
107         let mut zeros = [0; 6];
108         self.reader.read_all(&mut zeros)?;
109 
110         let offset_size = self.read_u8()?;
111         match offset_size {
112             1 | 2 | 4 | 8 => (),
113             _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectOffsetSize)),
114         }
115 
116         self.ref_size = self.read_u8()?;
117         match self.ref_size {
118             1 | 2 | 4 | 8 => (),
119             _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectReferenceSize)),
120         }
121 
122         let num_objects = self.read_be_u64()?;
123         self.root_object = self.read_be_u64()?;
124         let offset_table_offset = self.read_be_u64()?;
125 
126         // Read offset table
127         self.reader.seek(SeekFrom::Start(offset_table_offset))?;
128         self.object_offsets = self.read_ints(num_objects, offset_size)?;
129         self.object_on_stack = vec![false; self.object_offsets.len()];
130 
131         Ok(())
132     }
133 
134     /// Reads a list of `len` big-endian integers of `size` bytes from the reader.
read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error>135     fn read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error> {
136         let mut ints = self.allocate_vec(len, size as usize)?;
137         for _ in 0..len {
138             match size {
139                 1 => ints.push(self.read_u8()?.into()),
140                 2 => ints.push(self.read_be_u16()?.into()),
141                 4 => ints.push(self.read_be_u32()?.into()),
142                 8 => ints.push(self.read_be_u64()?),
143                 _ => unreachable!("size is either self.ref_size or offset_size both of which are already validated")
144             }
145         }
146         Ok(ints)
147     }
148 
149     /// Reads a list of `len` offsets into the object table from the reader.
read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error>150     fn read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error> {
151         let ref_size = self.ref_size;
152         self.read_ints(len, ref_size)
153     }
154 
155     /// Reads a compressed value length from the reader. `len` must contain the low 4 bits of the
156     /// object token.
read_object_len(&mut self, len: u8) -> Result<u64, Error>157     fn read_object_len(&mut self, len: u8) -> Result<u64, Error> {
158         if (len & 0x0f) == 0x0f {
159             let len_power_of_two = self.read_u8()? & 0x03;
160             Ok(match len_power_of_two {
161                 0 => self.read_u8()?.into(),
162                 1 => self.read_be_u16()?.into(),
163                 2 => self.read_be_u32()?.into(),
164                 3 => self.read_be_u64()?,
165                 _ => return Err(self.with_pos(ErrorKind::InvalidObjectLength)),
166             })
167         } else {
168             Ok(len.into())
169         }
170     }
171 
172     /// Reads `len` bytes from the reader.
read_data(&mut self, len: u64) -> Result<Vec<u8>, Error>173     fn read_data(&mut self, len: u64) -> Result<Vec<u8>, Error> {
174         let mut data = self.allocate_vec(len, size_of::<u8>())?;
175         data.resize(len as usize, 0);
176         self.reader.read_all(&mut data)?;
177         Ok(data)
178     }
179 
seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error>180     fn seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error> {
181         let object_ref = u64_to_usize(object_ref)
182             .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?;
183         let offset = *self
184             .object_offsets
185             .get(object_ref)
186             .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?;
187         if offset >= self.trailer_start_offset {
188             return Err(self.with_pos(ErrorKind::ObjectOffsetTooLarge));
189         }
190         Ok(self.reader.seek(SeekFrom::Start(offset))?)
191     }
192 
push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error>193     fn push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error> {
194         let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error");
195         let is_on_stack = &mut self.object_on_stack[object_ref];
196         if *is_on_stack {
197             return Err(self.with_pos(ErrorKind::RecursiveObject));
198         }
199         *is_on_stack = true;
200         self.stack.push(item);
201         Ok(())
202     }
203 
pop_stack_item(&mut self) -> StackItem204     fn pop_stack_item(&mut self) -> StackItem {
205         let item = self.stack.pop().expect("internal consistency error");
206         let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error");
207         self.object_on_stack[object_ref] = false;
208         item
209     }
210 
read_next(&mut self) -> Result<Option<Event>, Error>211     fn read_next(&mut self) -> Result<Option<Event>, Error> {
212         let object_ref = if self.ref_size == 0 {
213             // Initialise here rather than in new
214             self.read_trailer()?;
215             self.root_object
216         } else {
217             let maybe_object_ref = if let Some(stack_item) = self.stack.last_mut() {
218                 stack_item.child_object_refs.pop()
219             } else {
220                 // Finished reading the plist
221                 return Ok(None);
222             };
223 
224             if let Some(object_ref) = maybe_object_ref {
225                 object_ref
226             } else {
227                 // We're at the end of an array or dict. Pop the top stack item and return.
228                 let stack_item = self.pop_stack_item();
229                 match stack_item.ty {
230                     StackType::Array | StackType::Dict => return Ok(Some(Event::EndCollection)),
231                 }
232             }
233         };
234 
235         self.seek_to_object(object_ref)?;
236 
237         let token = self.read_u8()?;
238         let ty = (token & 0xf0) >> 4;
239         let size = token & 0x0f;
240 
241         let result = match (ty, size) {
242             (0x0, 0x00) => return Err(self.with_pos(ErrorKind::NullObjectUnimplemented)),
243             (0x0, 0x08) => Some(Event::Boolean(false)),
244             (0x0, 0x09) => Some(Event::Boolean(true)),
245             (0x0, 0x0f) => return Err(self.with_pos(ErrorKind::FillObjectUnimplemented)),
246             (0x1, 0) => Some(Event::Integer(self.read_u8()?.into())),
247             (0x1, 1) => Some(Event::Integer(self.read_be_u16()?.into())),
248             (0x1, 2) => Some(Event::Integer(self.read_be_u32()?.into())),
249             (0x1, 3) => Some(Event::Integer(self.read_be_i64()?.into())),
250             (0x1, 4) => {
251                 let value = self.read_be_i128()?;
252                 if value < 0 || value > u64::max_value().into() {
253                     return Err(self.with_pos(ErrorKind::IntegerOutOfRange));
254                 }
255                 Some(Event::Integer((value as u64).into()))
256             }
257             (0x1, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // variable length int
258             (0x2, 2) => Some(Event::Real(f32::from_bits(self.read_be_u32()?).into())),
259             (0x2, 3) => Some(Event::Real(f64::from_bits(self.read_be_u64()?))),
260             (0x2, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // odd length float
261             (0x3, 3) => {
262                 // Date. Seconds since 1/1/2001 00:00:00.
263                 let secs = f64::from_bits(self.read_be_u64()?);
264                 let date = Date::from_seconds_since_plist_epoch(secs)
265                     .map_err(|InfiniteOrNanDate| self.with_pos(ErrorKind::InfiniteOrNanDate))?;
266                 Some(Event::Date(date))
267             }
268             (0x4, n) => {
269                 // Data
270                 let len = self.read_object_len(n)?;
271                 Some(Event::Data(self.read_data(len)?))
272             }
273             (0x5, n) => {
274                 // ASCII string
275                 let len = self.read_object_len(n)?;
276                 let raw = self.read_data(len)?;
277                 let string = String::from_utf8(raw)
278                     .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String))?;
279                 Some(Event::String(string))
280             }
281             (0x6, n) => {
282                 // UTF-16 string
283                 let len_utf16_codepoints = self.read_object_len(n)?;
284                 let mut raw_utf16 = self.allocate_vec(len_utf16_codepoints, size_of::<u16>())?;
285 
286                 for _ in 0..len_utf16_codepoints {
287                     raw_utf16.push(self.read_be_u16()?);
288                 }
289 
290                 let string = String::from_utf16(&raw_utf16)
291                     .map_err(|_| self.with_pos(ErrorKind::InvalidUtf16String))?;
292                 Some(Event::String(string))
293             }
294             (0x8, n) if n < 8 => {
295                 // Uid
296                 let mut buf = [0; 8];
297                 // `len_bytes` is at most 8.
298                 let len_bytes = n as usize + 1;
299                 // Values are stored in big-endian so we must put the least significant bytes at
300                 // the end of the buffer.
301                 self.reader.read_all(&mut buf[8 - len_bytes..])?;
302                 let value = u64::from_be_bytes(buf);
303 
304                 Some(Event::Uid(Uid::new(value)))
305             }
306             (0xa, n) => {
307                 // Array
308                 let len = self.read_object_len(n)?;
309                 let mut child_object_refs = self.read_refs(len)?;
310                 // Reverse so we can pop off the end of the stack in order
311                 child_object_refs.reverse();
312 
313                 self.push_stack_item_and_check_for_recursion(StackItem {
314                     object_ref,
315                     ty: StackType::Array,
316                     child_object_refs,
317                 })?;
318 
319                 Some(Event::StartArray(Some(len)))
320             }
321             (0xd, n) => {
322                 // Dict
323                 let len = self.read_object_len(n)?;
324                 let key_refs = self.read_refs(len)?;
325                 let value_refs = self.read_refs(len)?;
326 
327                 let keys_and_values_len = len
328                     .checked_mul(2)
329                     .ok_or_else(|| self.with_pos(ErrorKind::ObjectTooLarge))?;
330                 let mut child_object_refs =
331                     self.allocate_vec(keys_and_values_len, self.ref_size as usize)?;
332                 let len = key_refs.len();
333                 for i in 1..=len {
334                     // Reverse so we can pop off the end of the stack in order
335                     child_object_refs.push(value_refs[len - i]);
336                     child_object_refs.push(key_refs[len - i]);
337                 }
338 
339                 self.push_stack_item_and_check_for_recursion(StackItem {
340                     object_ref,
341                     ty: StackType::Dict,
342                     child_object_refs,
343                 })?;
344 
345                 Some(Event::StartDictionary(Some(len as u64)))
346             }
347             (_, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))),
348         };
349 
350         Ok(result)
351     }
352 
read_u8(&mut self) -> Result<u8, Error>353     fn read_u8(&mut self) -> Result<u8, Error> {
354         let mut buf = [0; 1];
355         self.reader.read_all(&mut buf)?;
356         Ok(buf[0])
357     }
358 
read_be_u16(&mut self) -> Result<u16, Error>359     fn read_be_u16(&mut self) -> Result<u16, Error> {
360         let mut buf = [0; 2];
361         self.reader.read_all(&mut buf)?;
362         Ok(u16::from_be_bytes(buf))
363     }
364 
read_be_u32(&mut self) -> Result<u32, Error>365     fn read_be_u32(&mut self) -> Result<u32, Error> {
366         let mut buf = [0; 4];
367         self.reader.read_all(&mut buf)?;
368         Ok(u32::from_be_bytes(buf))
369     }
370 
read_be_u64(&mut self) -> Result<u64, Error>371     fn read_be_u64(&mut self) -> Result<u64, Error> {
372         let mut buf = [0; 8];
373         self.reader.read_all(&mut buf)?;
374         Ok(u64::from_be_bytes(buf))
375     }
376 
read_be_i64(&mut self) -> Result<i64, Error>377     fn read_be_i64(&mut self) -> Result<i64, Error> {
378         let mut buf = [0; 8];
379         self.reader.read_all(&mut buf)?;
380         Ok(i64::from_be_bytes(buf))
381     }
382 
read_be_i128(&mut self) -> Result<i128, Error>383     fn read_be_i128(&mut self) -> Result<i128, Error> {
384         let mut buf = [0; 16];
385         self.reader.read_all(&mut buf)?;
386         Ok(i128::from_be_bytes(buf))
387     }
388 
with_pos(&self, kind: ErrorKind) -> Error389     fn with_pos(&self, kind: ErrorKind) -> Error {
390         kind.with_byte_offset(self.reader.pos)
391     }
392 }
393 
394 impl<R: Read + Seek> Iterator for BinaryReader<R> {
395     type Item = Result<Event, Error>;
396 
next(&mut self) -> Option<Result<Event, Error>>397     fn next(&mut self) -> Option<Result<Event, Error>> {
398         match self.read_next() {
399             Ok(Some(event)) => Some(Ok(event)),
400             Err(err) => {
401                 // Mark the plist as finished
402                 self.stack.clear();
403                 Some(Err(err))
404             }
405             Ok(None) => None,
406         }
407     }
408 }
409 
410 #[cfg(test)]
411 mod tests {
412     use std::{fs::File, path::Path};
413 
414     use super::*;
415     use crate::{stream::Event, Uid};
416 
417     #[test]
streaming_parser()418     fn streaming_parser() {
419         use crate::stream::Event::*;
420 
421         let reader = File::open(&Path::new("./tests/data/binary.plist")).unwrap();
422         let streaming_parser = BinaryReader::new(reader);
423         let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
424 
425         let comparison = &[
426             StartDictionary(Some(13)),
427             String("Author".into()),
428             String("William Shakespeare".into()),
429             String("Birthdate".into()),
430             Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()),
431             String("EmptyArray".into()),
432             StartArray(Some(0)),
433             EndCollection,
434             String("IsNotFalse".into()),
435             Boolean(false),
436             String("SmallestNumber".into()),
437             Integer((-9223372036854775808i64).into()),
438             String("EmptyDictionary".into()),
439             StartDictionary(Some(0)),
440             EndCollection,
441             String("Height".into()),
442             Real(1.6),
443             String("Lines".into()),
444             StartArray(Some(2)),
445             String("It is a tale told by an idiot,".into()),
446             String("Full of sound and fury, signifying nothing.".into()),
447             EndCollection,
448             String("Death".into()),
449             Integer(1564.into()),
450             String("Blank".into()),
451             String("".into()),
452             String("BiggestNumber".into()),
453             Integer(18446744073709551615u64.into()),
454             String("IsTrue".into()),
455             Boolean(true),
456             String("Data".into()),
457             Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0]),
458             EndCollection,
459         ];
460 
461         assert_eq!(events, &comparison[..]);
462     }
463 
464     #[test]
utf16_plist()465     fn utf16_plist() {
466         let reader = File::open(&Path::new("./tests/data/utf16_bplist.plist")).unwrap();
467         let streaming_parser = BinaryReader::new(reader);
468         let mut events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
469 
470         assert_eq!(events[2], Event::String("\u{2605} or better".to_owned()));
471 
472         let poem = if let Event::String(ref mut poem) = events[4] {
473             poem
474         } else {
475             panic!("not a string")
476         };
477         assert_eq!(poem.len(), 643);
478         assert_eq!(poem.pop().unwrap(), '\u{2605}');
479     }
480 
481     #[test]
nskeyedarchiver_plist()482     fn nskeyedarchiver_plist() {
483         let reader = File::open(&Path::new("./tests/data/binary_NSKeyedArchiver.plist")).unwrap();
484         let streaming_parser = BinaryReader::new(reader);
485         let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
486 
487         assert_eq!(events[10], Event::Uid(Uid::new(4)));
488         assert_eq!(events[12], Event::Uid(Uid::new(2)));
489         assert_eq!(events[18], Event::Uid(Uid::new(3)));
490         assert_eq!(events[46], Event::Uid(Uid::new(1)));
491     }
492 }
493