1 use base64;
2 use std::{
3     io::{self, Read},
4     str::FromStr,
5 };
6 use xml_rs::{
7     common::{is_whitespace_str, Position},
8     reader::{
9         Error as XmlReaderError, ErrorKind as XmlReaderErrorKind, EventReader, ParserConfig,
10         XmlEvent,
11     },
12 };
13 
14 use crate::{
15     error::{Error, ErrorKind, FilePosition},
16     stream::{Event, OwnedEvent},
17     Date, Integer,
18 };
19 
20 pub struct XmlReader<R: Read> {
21     xml_reader: EventReader<R>,
22     queued_event: Option<XmlEvent>,
23     element_stack: Vec<String>,
24     finished: bool,
25 }
26 
27 impl<R: Read> XmlReader<R> {
new(reader: R) -> XmlReader<R>28     pub fn new(reader: R) -> XmlReader<R> {
29         let config = ParserConfig::new()
30             .trim_whitespace(false)
31             .whitespace_to_characters(true)
32             .cdata_to_characters(true)
33             .ignore_comments(true)
34             .coalesce_characters(true);
35 
36         XmlReader {
37             xml_reader: EventReader::new_with_config(reader, config),
38             queued_event: None,
39             element_stack: Vec::new(),
40             finished: false,
41         }
42     }
43 
read_content(&mut self) -> Result<String, Error>44     fn read_content(&mut self) -> Result<String, Error> {
45         loop {
46             match self.xml_reader.next() {
47                 Ok(XmlEvent::Characters(s)) => return Ok(s),
48                 Ok(event @ XmlEvent::EndElement { .. }) => {
49                     self.queued_event = Some(event);
50                     return Ok("".to_owned());
51                 }
52                 Ok(XmlEvent::EndDocument) => {
53                     return Err(self.with_pos(ErrorKind::UnclosedXmlElement))
54                 }
55                 Ok(XmlEvent::StartElement { .. }) => {
56                     return Err(self.with_pos(ErrorKind::UnexpectedXmlOpeningTag));
57                 }
58                 Ok(XmlEvent::ProcessingInstruction { .. }) => (),
59                 Ok(XmlEvent::StartDocument { .. })
60                 | Ok(XmlEvent::CData(_))
61                 | Ok(XmlEvent::Comment(_))
62                 | Ok(XmlEvent::Whitespace(_)) => {
63                     unreachable!("parser does not output CData, Comment or Whitespace events");
64                 }
65                 Err(err) => return Err(from_xml_error(err)),
66             }
67         }
68     }
69 
next_event(&mut self) -> Result<XmlEvent, XmlReaderError>70     fn next_event(&mut self) -> Result<XmlEvent, XmlReaderError> {
71         if let Some(event) = self.queued_event.take() {
72             Ok(event)
73         } else {
74             self.xml_reader.next()
75         }
76     }
77 
read_next(&mut self) -> Result<Option<OwnedEvent>, Error>78     fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> {
79         loop {
80             match self.next_event() {
81                 Ok(XmlEvent::StartDocument { .. }) => {}
82                 Ok(XmlEvent::StartElement { name, .. }) => {
83                     // Add the current element to the element stack
84                     self.element_stack.push(name.local_name.clone());
85 
86                     match &name.local_name[..] {
87                         "plist" => (),
88                         "array" => return Ok(Some(Event::StartArray(None))),
89                         "dict" => return Ok(Some(Event::StartDictionary(None))),
90                         "key" => return Ok(Some(Event::String(self.read_content()?.into()))),
91                         "true" => return Ok(Some(Event::Boolean(true))),
92                         "false" => return Ok(Some(Event::Boolean(false))),
93                         "data" => {
94                             let mut s = self.read_content()?;
95                             // Strip whitespace and line endings from input string
96                             s.retain(|c| !c.is_ascii_whitespace());
97                             let data = base64::decode(&s)
98                                 .map_err(|_| self.with_pos(ErrorKind::InvalidDataString))?;
99                             return Ok(Some(Event::Data(data.into())));
100                         }
101                         "date" => {
102                             let s = self.read_content()?;
103                             let date = Date::from_rfc3339(&s)
104                                 .map_err(|()| self.with_pos(ErrorKind::InvalidDateString))?;
105                             return Ok(Some(Event::Date(date)));
106                         }
107                         "integer" => {
108                             let s = self.read_content()?;
109                             match Integer::from_str(&s) {
110                                 Ok(i) => return Ok(Some(Event::Integer(i))),
111                                 Err(_) => {
112                                     return Err(self.with_pos(ErrorKind::InvalidIntegerString))
113                                 }
114                             }
115                         }
116                         "real" => {
117                             let s = self.read_content()?;
118                             match f64::from_str(&s) {
119                                 Ok(f) => return Ok(Some(Event::Real(f))),
120                                 Err(_) => return Err(self.with_pos(ErrorKind::InvalidRealString)),
121                             }
122                         }
123                         "string" => return Ok(Some(Event::String(self.read_content()?.into()))),
124                         _ => return Err(self.with_pos(ErrorKind::UnknownXmlElement)),
125                     }
126                 }
127                 Ok(XmlEvent::EndElement { name, .. }) => {
128                     // Check the corrent element is being closed
129                     match self.element_stack.pop() {
130                         Some(ref open_name) if &name.local_name == open_name => (),
131                         Some(ref _open_name) => {
132                             return Err(self.with_pos(ErrorKind::UnclosedXmlElement))
133                         }
134                         None => return Err(self.with_pos(ErrorKind::UnpairedXmlClosingTag)),
135                     }
136 
137                     match &name.local_name[..] {
138                         "array" | "dict" => return Ok(Some(Event::EndCollection)),
139                         "plist" | _ => (),
140                     }
141                 }
142                 Ok(XmlEvent::EndDocument) => {
143                     if self.element_stack.is_empty() {
144                         return Ok(None);
145                     } else {
146                         return Err(self.with_pos(ErrorKind::UnclosedXmlElement));
147                     }
148                 }
149 
150                 Ok(XmlEvent::Characters(c)) => {
151                     if !is_whitespace_str(&c) {
152                         return Err(
153                             self.with_pos(ErrorKind::UnexpectedXmlCharactersExpectedElement)
154                         );
155                     }
156                 }
157                 Ok(XmlEvent::CData(_)) | Ok(XmlEvent::Comment(_)) | Ok(XmlEvent::Whitespace(_)) => {
158                     unreachable!("parser does not output CData, Comment or Whitespace events")
159                 }
160                 Ok(XmlEvent::ProcessingInstruction { .. }) => (),
161                 Err(err) => return Err(from_xml_error(err)),
162             }
163         }
164     }
165 
with_pos(&self, kind: ErrorKind) -> Error166     fn with_pos(&self, kind: ErrorKind) -> Error {
167         kind.with_position(convert_xml_pos(self.xml_reader.position()))
168     }
169 }
170 
171 impl<R: Read> Iterator for XmlReader<R> {
172     type Item = Result<OwnedEvent, Error>;
173 
next(&mut self) -> Option<Result<OwnedEvent, Error>>174     fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
175         if self.finished {
176             None
177         } else {
178             match self.read_next() {
179                 Ok(Some(event)) => Some(Ok(event)),
180                 Ok(None) => {
181                     self.finished = true;
182                     None
183                 }
184                 Err(err) => {
185                     self.finished = true;
186                     Some(Err(err))
187                 }
188             }
189         }
190     }
191 }
192 
convert_xml_pos(pos: xml_rs::common::TextPosition) -> FilePosition193 fn convert_xml_pos(pos: xml_rs::common::TextPosition) -> FilePosition {
194     // TODO: pos.row and pos.column counts from 0. what do we want to do?
195     FilePosition::LineColumn(pos.row, pos.column)
196 }
197 
from_xml_error(err: XmlReaderError) -> Error198 fn from_xml_error(err: XmlReaderError) -> Error {
199     let kind = match err.kind() {
200         XmlReaderErrorKind::Io(err) if err.kind() == io::ErrorKind::UnexpectedEof => {
201             ErrorKind::UnexpectedEof
202         }
203         XmlReaderErrorKind::Io(err) => {
204             let err = if let Some(code) = err.raw_os_error() {
205                 io::Error::from_raw_os_error(code)
206             } else {
207                 io::Error::new(err.kind(), err.to_string())
208             };
209             ErrorKind::Io(err)
210         }
211         XmlReaderErrorKind::Syntax(_) => ErrorKind::InvalidXmlSyntax,
212         XmlReaderErrorKind::UnexpectedEof => ErrorKind::UnexpectedEof,
213         XmlReaderErrorKind::Utf8(_) => ErrorKind::InvalidXmlUtf8,
214     };
215 
216     kind.with_position(convert_xml_pos(err.position()))
217 }
218 
219 #[cfg(test)]
220 mod tests {
221     use std::{fs::File, path::Path};
222 
223     use super::*;
224     use crate::stream::Event::{self, *};
225 
226     #[test]
streaming_parser()227     fn streaming_parser() {
228         let reader = File::open(&Path::new("./tests/data/xml.plist")).unwrap();
229         let streaming_parser = XmlReader::new(reader);
230         let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
231 
232         let comparison = &[
233             StartDictionary(None),
234             String("Author".into()),
235             String("William Shakespeare".into()),
236             String("Lines".into()),
237             StartArray(None),
238             String("It is a tale told by an idiot,".into()),
239             String("Full of sound and fury, signifying nothing.".into()),
240             EndCollection,
241             String("Death".into()),
242             Integer(1564.into()),
243             String("Height".into()),
244             Real(1.60),
245             String("Data".into()),
246             Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
247             String("Birthdate".into()),
248             Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()),
249             String("Blank".into()),
250             String("".into()),
251             String("BiggestNumber".into()),
252             Integer(18446744073709551615u64.into()),
253             String("SmallestNumber".into()),
254             Integer((-9223372036854775808i64).into()),
255             String("HexademicalNumber".into()),
256             Integer(0xdead_beef_u64.into()),
257             String("IsTrue".into()),
258             Boolean(true),
259             String("IsNotFalse".into()),
260             Boolean(false),
261             EndCollection,
262         ];
263 
264         assert_eq!(events, comparison);
265     }
266 
267     #[test]
bad_data()268     fn bad_data() {
269         let reader = File::open(&Path::new("./tests/data/xml_error.plist")).unwrap();
270         let streaming_parser = XmlReader::new(reader);
271         let events: Vec<_> = streaming_parser.collect();
272 
273         assert!(events.last().unwrap().is_err());
274     }
275 }
276