1 #![forbid(unsafe_code)]
2 
3 extern crate xml;
4 #[macro_use]
5 extern crate lazy_static;
6 
7 use std::env;
8 use std::fmt;
9 use std::fs::File;
10 use std::io::{BufRead, BufReader, Write, stderr};
11 use std::path::Path;
12 
13 use xml::name::OwnedName;
14 use xml::common::Position;
15 use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};
16 
17 /// Dummy function that opens a file, parses it, and returns a `Result`.
18 /// There can be IO errors (from `File::open`) and XML errors (from the parser).
19 /// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
20 /// do this without defining their own error type.
21 #[allow(dead_code)]
count_event_in_file(name: &Path) -> Result<usize>22 fn count_event_in_file(name: &Path) -> Result<usize> {
23     let mut event_count = 0;
24     for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
25         try!(event);
26         event_count += 1;
27     }
28     Ok(event_count)
29 }
30 
31 #[test]
sample_1_short()32 fn sample_1_short() {
33     test(
34         include_bytes!("documents/sample_1.xml"),
35         include_bytes!("documents/sample_1_short.txt"),
36         ParserConfig::new()
37             .ignore_comments(true)
38             .whitespace_to_characters(true)
39             .cdata_to_characters(true)
40             .trim_whitespace(true)
41             .coalesce_characters(true),
42         false
43     );
44 }
45 
46 #[test]
sample_1_full()47 fn sample_1_full() {
48     test(
49         include_bytes!("documents/sample_1.xml"),
50         include_bytes!("documents/sample_1_full.txt"),
51         ParserConfig::new()
52             .ignore_comments(false)
53             .whitespace_to_characters(false)
54             .cdata_to_characters(false)
55             .trim_whitespace(false)
56             .coalesce_characters(false),
57         false
58     );
59 }
60 
61 #[test]
sample_2_short()62 fn sample_2_short() {
63     test(
64         include_bytes!("documents/sample_2.xml"),
65         include_bytes!("documents/sample_2_short.txt"),
66         ParserConfig::new()
67             .ignore_comments(true)
68             .whitespace_to_characters(true)
69             .cdata_to_characters(true)
70             .trim_whitespace(true)
71             .coalesce_characters(true),
72         false
73     );
74 }
75 
76 #[test]
sample_2_full()77 fn sample_2_full() {
78     test(
79         include_bytes!("documents/sample_2.xml"),
80         include_bytes!("documents/sample_2_full.txt"),
81         ParserConfig::new()
82             .ignore_comments(false)
83             .whitespace_to_characters(false)
84             .cdata_to_characters(false)
85             .trim_whitespace(false)
86             .coalesce_characters(false),
87         false
88     );
89 }
90 
91 #[test]
sample_3_short()92 fn sample_3_short() {
93     test(
94         include_bytes!("documents/sample_3.xml"),
95         include_bytes!("documents/sample_3_short.txt"),
96         ParserConfig::new()
97             .ignore_comments(true)
98             .whitespace_to_characters(true)
99             .cdata_to_characters(true)
100             .trim_whitespace(true)
101             .coalesce_characters(true),
102         true
103     );
104 }
105 
106 #[test]
sample_3_full()107 fn sample_3_full() {
108     test(
109         include_bytes!("documents/sample_3.xml"),
110         include_bytes!("documents/sample_3_full.txt"),
111         ParserConfig::new()
112             .ignore_comments(false)
113             .whitespace_to_characters(false)
114             .cdata_to_characters(false)
115             .trim_whitespace(false)
116             .coalesce_characters(false),
117         true
118     );
119 }
120 
121 #[test]
sample_4_short()122 fn sample_4_short() {
123     test(
124         include_bytes!("documents/sample_4.xml"),
125         include_bytes!("documents/sample_4_short.txt"),
126         ParserConfig::new()
127             .ignore_comments(true)
128             .whitespace_to_characters(true)
129             .cdata_to_characters(true)
130             .trim_whitespace(true)
131             .coalesce_characters(true),
132         false
133     );
134 }
135 
136 #[test]
sample_4_full()137 fn sample_4_full() {
138     test(
139         include_bytes!("documents/sample_4.xml"),
140         include_bytes!("documents/sample_4_full.txt"),
141         ParserConfig::new()
142             .ignore_comments(false)
143             .whitespace_to_characters(false)
144             .cdata_to_characters(false)
145             .trim_whitespace(false)
146             .coalesce_characters(false),
147         false
148     );
149 
150 }
151 
152 #[test]
sample_5_short()153 fn sample_5_short() {
154     test(
155         include_bytes!("documents/sample_5.xml"),
156         include_bytes!("documents/sample_5_short.txt"),
157         ParserConfig::new()
158             .ignore_comments(true)
159             .whitespace_to_characters(true)
160             .cdata_to_characters(true)
161             .trim_whitespace(true)
162             .coalesce_characters(true)
163             .add_entity("nbsp", " ")
164             .add_entity("copy", "©")
165             .add_entity("NotEqualTilde", "≂̸"),
166         false
167     );
168 }
169 
170 #[test]
sample_6_full()171 fn sample_6_full() {
172     test(
173         include_bytes!("documents/sample_6.xml"),
174         include_bytes!("documents/sample_6_full.txt"),
175         ParserConfig::new()
176             .ignore_root_level_whitespace(false)
177             .ignore_comments(false)
178             .whitespace_to_characters(false)
179             .cdata_to_characters(false)
180             .trim_whitespace(false)
181             .coalesce_characters(false),
182         false
183     );
184 }
185 
186 #[test]
eof_1()187 fn eof_1() {
188     test(
189         br#"<?xml"#,
190         br#"1:6 Unexpected end of stream: no root element found"#,
191         ParserConfig::new(),
192         false
193     );
194 }
195 
196 #[test]
bad_1()197 fn bad_1() {
198     test(
199         br#"<?xml&.,"#,
200         br#"1:6 Unexpected token: <?xml&"#,
201         ParserConfig::new(),
202         false
203     );
204 }
205 
206 #[test]
dashes_in_comments()207 fn dashes_in_comments() {
208     test(
209         br#"<!-- comment -- --><hello/>"#,
210         br#"
211             |1:14 Unexpected token '--' before ' '
212         "#,
213         ParserConfig::new(),
214         false
215     );
216 
217     test(
218         br#"<!-- comment ---><hello/>"#,
219         br#"
220             |1:14 Unexpected token '--' before '-'
221         "#,
222         ParserConfig::new(),
223         false
224     );
225 }
226 
227 #[test]
tabs_1()228 fn tabs_1() {
229     test(
230         b"\t<a>\t<b/></a>",
231         br#"
232             |1:2 StartDocument(1.0, UTF-8)
233             |1:2 StartElement(a)
234             |1:6 StartElement(b)
235             |1:6 EndElement(b)
236             |1:10 EndElement(a)
237             |1:14 EndDocument
238         "#,
239         ParserConfig::new()
240             .trim_whitespace(true),
241         true
242     );
243 }
244 
245 #[test]
issue_83_duplicate_attributes()246 fn issue_83_duplicate_attributes() {
247     test(
248         br#"<hello><some-tag a='10' a="20"></hello>"#,
249         br#"
250             |StartDocument(1.0, UTF-8)
251             |StartElement(hello)
252             |1:30 Attribute 'a' is redefined
253         "#,
254         ParserConfig::new(),
255         false
256     );
257 }
258 
259 #[test]
issue_93_large_characters_in_entity_references()260 fn issue_93_large_characters_in_entity_references() {
261     test(
262         r#"<hello>&��;</hello>"#.as_bytes(),
263         r#"
264             |StartDocument(1.0, UTF-8)
265             |StartElement(hello)
266             |1:10 Unexpected entity: ��
267         "#.as_bytes(),  // FIXME: it shouldn't be 10, looks like indices are off slightly
268         ParserConfig::new(),
269         false
270     )
271 }
272 
273 #[test]
issue_98_cdata_ending_with_right_bracket()274 fn issue_98_cdata_ending_with_right_bracket() {
275     test(
276         br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
277         br#"
278             |StartDocument(1.0, UTF-8)
279             |StartElement(hello)
280             |CData("Foo [Bar]")
281             |EndElement(hello)
282             |EndDocument
283         "#,
284         ParserConfig::new(),
285         false
286     )
287 }
288 
289 #[test]
issue_105_unexpected_double_dash()290 fn issue_105_unexpected_double_dash() {
291     test(
292         br#"<hello>-- </hello>"#,
293         br#"
294             |StartDocument(1.0, UTF-8)
295             |StartElement(hello)
296             |Characters("-- ")
297             |EndElement(hello)
298             |EndDocument
299         "#,
300         ParserConfig::new(),
301         false
302     );
303 
304     test(
305         br#"<hello>--</hello>"#,
306         br#"
307             |StartDocument(1.0, UTF-8)
308             |StartElement(hello)
309             |Characters("--")
310             |EndElement(hello)
311             |EndDocument
312         "#,
313         ParserConfig::new(),
314         false
315     );
316 
317     test(
318         br#"<hello>--></hello>"#,
319         br#"
320             |StartDocument(1.0, UTF-8)
321             |StartElement(hello)
322             |Characters("-->")
323             |EndElement(hello)
324             |EndDocument
325         "#,
326         ParserConfig::new(),
327         false
328     );
329 
330     test(
331         br#"<hello><![CDATA[--]]></hello>"#,
332         br#"
333             |StartDocument(1.0, UTF-8)
334             |StartElement(hello)
335             |CData("--")
336             |EndElement(hello)
337             |EndDocument
338         "#,
339         ParserConfig::new(),
340         false
341     );
342 }
343 
344 #[test]
issue_attribues_have_no_default_namespace()345 fn issue_attribues_have_no_default_namespace () {
346     test(
347         br#"<hello xmlns="urn:foo" x="y"/>"#,
348         br#"
349             |StartDocument(1.0, UTF-8)
350             |StartElement({urn:foo}hello [x="y"])
351             |EndElement({urn:foo}hello)
352             |EndDocument
353         "#,
354         ParserConfig::new(),
355         false
356     );
357 }
358 
359 #[test]
issue_replacement_character_entity_reference()360 fn issue_replacement_character_entity_reference() {
361     test(
362         br#"<doc>&#55357;&#56628;</doc>"#,
363         br#"
364             |StartDocument(1.0, UTF-8)
365             |StartElement(doc)
366             |1:13 Invalid decimal character number in an entity: #55357
367         "#,
368         ParserConfig::new(),
369         false,
370     );
371 
372     test(
373         br#"<doc>&#xd83d;&#xdd34;</doc>"#,
374         br#"
375             |StartDocument(1.0, UTF-8)
376             |StartElement(doc)
377             |1:13 Invalid hexadecimal character number in an entity: #xd83d
378         "#,
379         ParserConfig::new(),
380         false,
381     );
382 
383     test(
384         br#"<doc>&#55357;&#56628;</doc>"#,
385         format!(
386             r#"
387                 |StartDocument(1.0, UTF-8)
388                 |StartElement(doc)
389                 |Characters("{replacement_character}{replacement_character}")
390                 |EndElement(doc)
391                 |EndDocument
392             "#,
393             replacement_character = "\u{fffd}"
394         )
395         .as_bytes(),
396         ParserConfig::new()
397             .replace_unknown_entity_references(true),
398         false,
399     );
400 
401     test(
402         br#"<doc>&#xd83d;&#xdd34;</doc>"#,
403         format!(
404             r#"
405                 |StartDocument(1.0, UTF-8)
406                 |StartElement(doc)
407                 |Characters("{replacement_character}{replacement_character}")
408                 |EndElement(doc)
409                 |EndDocument
410             "#,
411             replacement_character = "\u{fffd}"
412         )
413         .as_bytes(),
414         ParserConfig::new()
415             .replace_unknown_entity_references(true),
416         false,
417     );
418 }
419 
420 lazy_static! {
421     // If PRINT_SPEC env variable is set, print the lines
422     // to stderr instead of comparing with the output
423     // it can be used like this:
424     // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
425     static ref PRINT: bool = {
426         for (key, value) in env::vars() {
427             if key == "PRINT_SPEC" && value == "1" {
428                 return true;
429             }
430         }
431         false
432     };
433 }
434 
435 // clones a lot but that's fine
trim_until_bar(s: String) -> String436 fn trim_until_bar(s: String) -> String {
437     match s.trim() {
438         ts if ts.starts_with('|') => return ts[1..].to_owned(),
439         _ => {}
440     }
441     s
442 }
443 
test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool)444 fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
445     let mut reader = config.create_reader(input);
446     let mut spec_lines = BufReader::new(output).lines()
447         .map(|line| line.unwrap())
448         .enumerate()
449         .map(|(i, line)| (i, trim_until_bar(line)))
450         .filter(|&(_, ref line)| !line.trim().is_empty());
451 
452     loop {
453         let e = reader.next();
454         let line =
455             if test_position {
456                 format!("{} {}", reader.position(), Event(&e))
457             } else {
458                 format!("{}", Event(&e))
459             };
460 
461         if *PRINT {
462             writeln!(&mut stderr(), "{}", line).unwrap();
463         } else {
464             if let Some((n, spec)) = spec_lines.next() {
465                 if line != spec {
466                     const SPLITTER: &'static str = "-------------------";
467                     panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound:    {}\n{}\n",
468                            SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
469                 }
470             } else {
471                 panic!("Unexpected event: {}", line);
472             }
473         }
474 
475         match e {
476             Ok(XmlEvent::EndDocument) | Err(_) => break,
477             _ => {},
478         }
479     }
480 }
481 
482 // Here we define our own string representation of events so we don't depend
483 // on the specifics of Display implementation for XmlEvent and OwnedName.
484 
485 struct Name<'a>(&'a OwnedName);
486 
487 impl <'a> fmt::Display for Name<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result488     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
489         if let Some(ref namespace) = self.0.namespace {
490             try! { write!(f, "{{{}}}", namespace) }
491         }
492 
493         if let Some(ref prefix) = self.0.prefix {
494             try! { write!(f, "{}:", prefix) }
495         }
496 
497         write!(f, "{}", self.0.local_name)
498     }
499 }
500 
501 struct Event<'a>(&'a Result<XmlEvent>);
502 
503 impl<'a> fmt::Display for Event<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result504     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
505         let empty = String::new();
506         match *self.0 {
507             Ok(ref e) => match *e {
508                 XmlEvent::StartDocument { ref version, ref encoding, .. } =>
509                     write!(f, "StartDocument({}, {})", version, encoding),
510                 XmlEvent::EndDocument =>
511                     write!(f, "EndDocument"),
512                 XmlEvent::ProcessingInstruction { ref name, ref data } =>
513                     write!(f, "ProcessingInstruction({}={:?})", name,
514                         data.as_ref().unwrap_or(&empty)),
515                 XmlEvent::StartElement { ref name, ref attributes, .. } => {
516                     if attributes.is_empty() {
517                         write!(f, "StartElement({})", Name(name))
518                     }
519                     else {
520                         let attrs: Vec<_> = attributes.iter()
521                             .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
522                         write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
523                     }
524                 },
525                 XmlEvent::EndElement { ref name } =>
526                     write!(f, "EndElement({})", Name(name)),
527                 XmlEvent::Comment(ref data) =>
528                     write!(f, r#"Comment("{}")"#, data.escape_debug()),
529                 XmlEvent::CData(ref data) =>
530                     write!(f, r#"CData("{}")"#, data.escape_debug()),
531                 XmlEvent::Characters(ref data) =>
532                     write!(f, r#"Characters("{}")"#, data.escape_debug()),
533                 XmlEvent::Whitespace(ref data) =>
534                     write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
535             },
536             Err(ref e) => e.fmt(f),
537         }
538     }
539 }
540