1 use base64;
2 use std::{
3 io::{self, Read},
4 str::FromStr,
5 };
6 use xml_rs::{
7 common::{is_whitespace_str, Position},
8 reader::{
9 Error as XmlReaderError, ErrorKind as XmlReaderErrorKind, EventReader, ParserConfig,
10 XmlEvent,
11 },
12 };
13
14 use crate::{
15 error::{Error, ErrorKind, FilePosition},
16 stream::{Event, OwnedEvent},
17 Date, Integer,
18 };
19
20 pub struct XmlReader<R: Read> {
21 xml_reader: EventReader<R>,
22 queued_event: Option<XmlEvent>,
23 element_stack: Vec<String>,
24 finished: bool,
25 }
26
27 impl<R: Read> XmlReader<R> {
new(reader: R) -> XmlReader<R>28 pub fn new(reader: R) -> XmlReader<R> {
29 let config = ParserConfig::new()
30 .trim_whitespace(false)
31 .whitespace_to_characters(true)
32 .cdata_to_characters(true)
33 .ignore_comments(true)
34 .coalesce_characters(true);
35
36 XmlReader {
37 xml_reader: EventReader::new_with_config(reader, config),
38 queued_event: None,
39 element_stack: Vec::new(),
40 finished: false,
41 }
42 }
43
read_content(&mut self) -> Result<String, Error>44 fn read_content(&mut self) -> Result<String, Error> {
45 loop {
46 match self.xml_reader.next() {
47 Ok(XmlEvent::Characters(s)) => return Ok(s),
48 Ok(event @ XmlEvent::EndElement { .. }) => {
49 self.queued_event = Some(event);
50 return Ok("".to_owned());
51 }
52 Ok(XmlEvent::EndDocument) => {
53 return Err(self.with_pos(ErrorKind::UnclosedXmlElement))
54 }
55 Ok(XmlEvent::StartElement { .. }) => {
56 return Err(self.with_pos(ErrorKind::UnexpectedXmlOpeningTag));
57 }
58 Ok(XmlEvent::ProcessingInstruction { .. }) => (),
59 Ok(XmlEvent::StartDocument { .. })
60 | Ok(XmlEvent::CData(_))
61 | Ok(XmlEvent::Comment(_))
62 | Ok(XmlEvent::Whitespace(_)) => {
63 unreachable!("parser does not output CData, Comment or Whitespace events");
64 }
65 Err(err) => return Err(from_xml_error(err)),
66 }
67 }
68 }
69
next_event(&mut self) -> Result<XmlEvent, XmlReaderError>70 fn next_event(&mut self) -> Result<XmlEvent, XmlReaderError> {
71 if let Some(event) = self.queued_event.take() {
72 Ok(event)
73 } else {
74 self.xml_reader.next()
75 }
76 }
77
read_next(&mut self) -> Result<Option<OwnedEvent>, Error>78 fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> {
79 loop {
80 match self.next_event() {
81 Ok(XmlEvent::StartDocument { .. }) => {}
82 Ok(XmlEvent::StartElement { name, .. }) => {
83 // Add the current element to the element stack
84 self.element_stack.push(name.local_name.clone());
85
86 match &name.local_name[..] {
87 "plist" => (),
88 "array" => return Ok(Some(Event::StartArray(None))),
89 "dict" => return Ok(Some(Event::StartDictionary(None))),
90 "key" => return Ok(Some(Event::String(self.read_content()?.into()))),
91 "true" => return Ok(Some(Event::Boolean(true))),
92 "false" => return Ok(Some(Event::Boolean(false))),
93 "data" => {
94 let mut s = self.read_content()?;
95 // Strip whitespace and line endings from input string
96 s.retain(|c| !c.is_ascii_whitespace());
97 let data = base64::decode(&s)
98 .map_err(|_| self.with_pos(ErrorKind::InvalidDataString))?;
99 return Ok(Some(Event::Data(data.into())));
100 }
101 "date" => {
102 let s = self.read_content()?;
103 let date = Date::from_rfc3339(&s)
104 .map_err(|()| self.with_pos(ErrorKind::InvalidDateString))?;
105 return Ok(Some(Event::Date(date)));
106 }
107 "integer" => {
108 let s = self.read_content()?;
109 match Integer::from_str(&s) {
110 Ok(i) => return Ok(Some(Event::Integer(i))),
111 Err(_) => {
112 return Err(self.with_pos(ErrorKind::InvalidIntegerString))
113 }
114 }
115 }
116 "real" => {
117 let s = self.read_content()?;
118 match f64::from_str(&s) {
119 Ok(f) => return Ok(Some(Event::Real(f))),
120 Err(_) => return Err(self.with_pos(ErrorKind::InvalidRealString)),
121 }
122 }
123 "string" => return Ok(Some(Event::String(self.read_content()?.into()))),
124 _ => return Err(self.with_pos(ErrorKind::UnknownXmlElement)),
125 }
126 }
127 Ok(XmlEvent::EndElement { name, .. }) => {
128 // Check the corrent element is being closed
129 match self.element_stack.pop() {
130 Some(ref open_name) if &name.local_name == open_name => (),
131 Some(ref _open_name) => {
132 return Err(self.with_pos(ErrorKind::UnclosedXmlElement))
133 }
134 None => return Err(self.with_pos(ErrorKind::UnpairedXmlClosingTag)),
135 }
136
137 match &name.local_name[..] {
138 "array" | "dict" => return Ok(Some(Event::EndCollection)),
139 "plist" | _ => (),
140 }
141 }
142 Ok(XmlEvent::EndDocument) => {
143 if self.element_stack.is_empty() {
144 return Ok(None);
145 } else {
146 return Err(self.with_pos(ErrorKind::UnclosedXmlElement));
147 }
148 }
149
150 Ok(XmlEvent::Characters(c)) => {
151 if !is_whitespace_str(&c) {
152 return Err(
153 self.with_pos(ErrorKind::UnexpectedXmlCharactersExpectedElement)
154 );
155 }
156 }
157 Ok(XmlEvent::CData(_)) | Ok(XmlEvent::Comment(_)) | Ok(XmlEvent::Whitespace(_)) => {
158 unreachable!("parser does not output CData, Comment or Whitespace events")
159 }
160 Ok(XmlEvent::ProcessingInstruction { .. }) => (),
161 Err(err) => return Err(from_xml_error(err)),
162 }
163 }
164 }
165
with_pos(&self, kind: ErrorKind) -> Error166 fn with_pos(&self, kind: ErrorKind) -> Error {
167 kind.with_position(convert_xml_pos(self.xml_reader.position()))
168 }
169 }
170
171 impl<R: Read> Iterator for XmlReader<R> {
172 type Item = Result<OwnedEvent, Error>;
173
next(&mut self) -> Option<Result<OwnedEvent, Error>>174 fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
175 if self.finished {
176 None
177 } else {
178 match self.read_next() {
179 Ok(Some(event)) => Some(Ok(event)),
180 Ok(None) => {
181 self.finished = true;
182 None
183 }
184 Err(err) => {
185 self.finished = true;
186 Some(Err(err))
187 }
188 }
189 }
190 }
191 }
192
convert_xml_pos(pos: xml_rs::common::TextPosition) -> FilePosition193 fn convert_xml_pos(pos: xml_rs::common::TextPosition) -> FilePosition {
194 // TODO: pos.row and pos.column counts from 0. what do we want to do?
195 FilePosition::LineColumn(pos.row, pos.column)
196 }
197
from_xml_error(err: XmlReaderError) -> Error198 fn from_xml_error(err: XmlReaderError) -> Error {
199 let kind = match err.kind() {
200 XmlReaderErrorKind::Io(err) if err.kind() == io::ErrorKind::UnexpectedEof => {
201 ErrorKind::UnexpectedEof
202 }
203 XmlReaderErrorKind::Io(err) => {
204 let err = if let Some(code) = err.raw_os_error() {
205 io::Error::from_raw_os_error(code)
206 } else {
207 io::Error::new(err.kind(), err.to_string())
208 };
209 ErrorKind::Io(err)
210 }
211 XmlReaderErrorKind::Syntax(_) => ErrorKind::InvalidXmlSyntax,
212 XmlReaderErrorKind::UnexpectedEof => ErrorKind::UnexpectedEof,
213 XmlReaderErrorKind::Utf8(_) => ErrorKind::InvalidXmlUtf8,
214 };
215
216 kind.with_position(convert_xml_pos(err.position()))
217 }
218
219 #[cfg(test)]
220 mod tests {
221 use std::{fs::File, path::Path};
222
223 use super::*;
224 use crate::stream::Event::{self, *};
225
226 #[test]
streaming_parser()227 fn streaming_parser() {
228 let reader = File::open(&Path::new("./tests/data/xml.plist")).unwrap();
229 let streaming_parser = XmlReader::new(reader);
230 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
231
232 let comparison = &[
233 StartDictionary(None),
234 String("Author".into()),
235 String("William Shakespeare".into()),
236 String("Lines".into()),
237 StartArray(None),
238 String("It is a tale told by an idiot,".into()),
239 String("Full of sound and fury, signifying nothing.".into()),
240 EndCollection,
241 String("Death".into()),
242 Integer(1564.into()),
243 String("Height".into()),
244 Real(1.60),
245 String("Data".into()),
246 Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
247 String("Birthdate".into()),
248 Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()),
249 String("Blank".into()),
250 String("".into()),
251 String("BiggestNumber".into()),
252 Integer(18446744073709551615u64.into()),
253 String("SmallestNumber".into()),
254 Integer((-9223372036854775808i64).into()),
255 String("HexademicalNumber".into()),
256 Integer(0xdead_beef_u64.into()),
257 String("IsTrue".into()),
258 Boolean(true),
259 String("IsNotFalse".into()),
260 Boolean(false),
261 EndCollection,
262 ];
263
264 assert_eq!(events, comparison);
265 }
266
267 #[test]
bad_data()268 fn bad_data() {
269 let reader = File::open(&Path::new("./tests/data/xml_error.plist")).unwrap();
270 let streaming_parser = XmlReader::new(reader);
271 let events: Vec<_> = streaming_parser.collect();
272
273 assert!(events.last().unwrap().is_err());
274 }
275 }
276