1 #![forbid(unsafe_code)]
2 
3 extern crate xml;
4 #[macro_use]
5 extern crate lazy_static;
6 
7 use std::env;
8 use std::fmt;
9 use std::fs::File;
10 use std::io::{BufRead, BufReader, Write, stderr};
11 use std::path::Path;
12 
13 use xml::name::OwnedName;
14 use xml::common::Position;
15 use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};
16 
17 /// Dummy function that opens a file, parses it, and returns a `Result`.
18 /// There can be IO errors (from `File::open`) and XML errors (from the parser).
19 /// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
20 /// do this without defining their own error type.
21 #[allow(dead_code)]
count_event_in_file(name: &Path) -> Result<usize>22 fn count_event_in_file(name: &Path) -> Result<usize> {
23     let mut event_count = 0;
24     for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
25         try!(event);
26         event_count += 1;
27     }
28     Ok(event_count)
29 }
30 
31 #[test]
sample_1_short()32 fn sample_1_short() {
33     test(
34         include_bytes!("documents/sample_1.xml"),
35         include_bytes!("documents/sample_1_short.txt"),
36         ParserConfig::new()
37             .ignore_comments(true)
38             .whitespace_to_characters(true)
39             .cdata_to_characters(true)
40             .trim_whitespace(true)
41             .coalesce_characters(true),
42         false
43     );
44 }
45 
46 #[test]
sample_1_full()47 fn sample_1_full() {
48     test(
49         include_bytes!("documents/sample_1.xml"),
50         include_bytes!("documents/sample_1_full.txt"),
51         ParserConfig::new()
52             .ignore_comments(false)
53             .whitespace_to_characters(false)
54             .cdata_to_characters(false)
55             .trim_whitespace(false)
56             .coalesce_characters(false),
57         false
58     );
59 }
60 
61 #[test]
sample_2_short()62 fn sample_2_short() {
63     test(
64         include_bytes!("documents/sample_2.xml"),
65         include_bytes!("documents/sample_2_short.txt"),
66         ParserConfig::new()
67             .ignore_comments(true)
68             .whitespace_to_characters(true)
69             .cdata_to_characters(true)
70             .trim_whitespace(true)
71             .coalesce_characters(true),
72         false
73     );
74 }
75 
76 #[test]
sample_2_full()77 fn sample_2_full() {
78     test(
79         include_bytes!("documents/sample_2.xml"),
80         include_bytes!("documents/sample_2_full.txt"),
81         ParserConfig::new()
82             .ignore_comments(false)
83             .whitespace_to_characters(false)
84             .cdata_to_characters(false)
85             .trim_whitespace(false)
86             .coalesce_characters(false),
87         false
88     );
89 }
90 
91 #[test]
sample_3_short()92 fn sample_3_short() {
93     test(
94         include_bytes!("documents/sample_3.xml"),
95         include_bytes!("documents/sample_3_short.txt"),
96         ParserConfig::new()
97             .ignore_comments(true)
98             .whitespace_to_characters(true)
99             .cdata_to_characters(true)
100             .trim_whitespace(true)
101             .coalesce_characters(true),
102         true
103     );
104 }
105 
106 #[test]
sample_3_full()107 fn sample_3_full() {
108     test(
109         include_bytes!("documents/sample_3.xml"),
110         include_bytes!("documents/sample_3_full.txt"),
111         ParserConfig::new()
112             .ignore_comments(false)
113             .whitespace_to_characters(false)
114             .cdata_to_characters(false)
115             .trim_whitespace(false)
116             .coalesce_characters(false),
117         true
118     );
119 }
120 
121 #[test]
sample_4_short()122 fn sample_4_short() {
123     test(
124         include_bytes!("documents/sample_4.xml"),
125         include_bytes!("documents/sample_4_short.txt"),
126         ParserConfig::new()
127             .ignore_comments(true)
128             .whitespace_to_characters(true)
129             .cdata_to_characters(true)
130             .trim_whitespace(true)
131             .coalesce_characters(true),
132         false
133     );
134 }
135 
136 #[test]
sample_4_full()137 fn sample_4_full() {
138     test(
139         include_bytes!("documents/sample_4.xml"),
140         include_bytes!("documents/sample_4_full.txt"),
141         ParserConfig::new()
142             .ignore_comments(false)
143             .whitespace_to_characters(false)
144             .cdata_to_characters(false)
145             .trim_whitespace(false)
146             .coalesce_characters(false),
147         false
148     );
149 
150 }
151 
152 #[test]
sample_5_short()153 fn sample_5_short() {
154     test(
155         include_bytes!("documents/sample_5.xml"),
156         include_bytes!("documents/sample_5_short.txt"),
157         ParserConfig::new()
158             .ignore_comments(true)
159             .whitespace_to_characters(true)
160             .cdata_to_characters(true)
161             .trim_whitespace(true)
162             .coalesce_characters(true)
163             .add_entity("nbsp", " ")
164             .add_entity("copy", "©")
165             .add_entity("NotEqualTilde", "≂̸"),
166         false
167     );
168 }
169 
170 #[test]
sample_6_full()171 fn sample_6_full() {
172     test(
173         include_bytes!("documents/sample_6.xml"),
174         include_bytes!("documents/sample_6_full.txt"),
175         ParserConfig::new()
176             .ignore_root_level_whitespace(false)
177             .ignore_comments(false)
178             .whitespace_to_characters(false)
179             .cdata_to_characters(false)
180             .trim_whitespace(false)
181             .coalesce_characters(false),
182         false
183     );
184 }
185 
186 #[test]
eof_1()187 fn eof_1() {
188     test(
189         br#"<?xml"#,
190         br#"1:6 Unexpected end of stream: no root element found"#,
191         ParserConfig::new(),
192         false
193     );
194 }
195 
196 #[test]
bad_1()197 fn bad_1() {
198     test(
199         br#"<?xml&.,"#,
200         br#"1:6 Unexpected token: <?xml&"#,
201         ParserConfig::new(),
202         false
203     );
204 }
205 
206 #[test]
dashes_in_comments()207 fn dashes_in_comments() {
208     test(
209         br#"<!-- comment -- --><hello/>"#,
210         br#"
211             |1:14 Unexpected token '--' before ' '
212         "#,
213         ParserConfig::new(),
214         false
215     );
216 
217     test(
218         br#"<!-- comment ---><hello/>"#,
219         br#"
220             |1:14 Unexpected token '--' before '-'
221         "#,
222         ParserConfig::new(),
223         false
224     );
225 }
226 
227 #[test]
tabs_1()228 fn tabs_1() {
229     test(
230         b"\t<a>\t<b/></a>",
231         br#"
232             |1:2 StartDocument(1.0, UTF-8)
233             |1:2 StartElement(a)
234             |1:6 StartElement(b)
235             |1:6 EndElement(b)
236             |1:10 EndElement(a)
237             |1:14 EndDocument
238         "#,
239         ParserConfig::new()
240             .trim_whitespace(true),
241         true
242     );
243 }
244 
245 #[test]
issue_32_unescaped_cdata_end()246 fn issue_32_unescaped_cdata_end() {
247     test(
248         br#"<hello>]]></hello>"#,
249         br#"
250             |StartDocument(1.0, UTF-8)
251             |StartElement(hello)
252             |Characters("]]>")
253             |EndElement(hello)
254             |EndDocument
255         "#,
256         ParserConfig::new(),
257         false
258     );
259 }
260 
261 #[test]
issue_unescaped_processing_instruction_end()262 fn issue_unescaped_processing_instruction_end() {
263     test(
264         br#"<hello>?></hello>"#,
265         br#"
266             |StartDocument(1.0, UTF-8)
267             |StartElement(hello)
268             |Characters("?>")
269             |EndElement(hello)
270             |EndDocument
271         "#,
272         ParserConfig::new(),
273         false
274     );
275 }
276 
277 #[test]
issue_unescaped_empty_tag_end()278 fn issue_unescaped_empty_tag_end() {
279     test(
280         br#"<hello>/></hello>"#,
281         br#"
282             |StartDocument(1.0, UTF-8)
283             |StartElement(hello)
284             |Characters("/>")
285             |EndElement(hello)
286             |EndDocument
287         "#,
288         ParserConfig::new(),
289         false
290     );
291 }
292 
293 #[test]
issue_83_duplicate_attributes()294 fn issue_83_duplicate_attributes() {
295     test(
296         br#"<hello><some-tag a='10' a="20"></hello>"#,
297         br#"
298             |StartDocument(1.0, UTF-8)
299             |StartElement(hello)
300             |1:30 Attribute 'a' is redefined
301         "#,
302         ParserConfig::new(),
303         false
304     );
305 }
306 
307 #[test]
issue_93_large_characters_in_entity_references()308 fn issue_93_large_characters_in_entity_references() {
309     test(
310         r#"<hello>&��;</hello>"#.as_bytes(),
311         r#"
312             |StartDocument(1.0, UTF-8)
313             |StartElement(hello)
314             |1:10 Unexpected entity: ��
315         "#.as_bytes(),  // FIXME: it shouldn't be 10, looks like indices are off slightly
316         ParserConfig::new(),
317         false
318     )
319 }
320 
321 #[test]
issue_98_cdata_ending_with_right_bracket()322 fn issue_98_cdata_ending_with_right_bracket() {
323     test(
324         br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
325         br#"
326             |StartDocument(1.0, UTF-8)
327             |StartElement(hello)
328             |CData("Foo [Bar]")
329             |EndElement(hello)
330             |EndDocument
331         "#,
332         ParserConfig::new(),
333         false
334     )
335 }
336 
337 #[test]
issue_105_unexpected_double_dash()338 fn issue_105_unexpected_double_dash() {
339     test(
340         br#"<hello>-- </hello>"#,
341         br#"
342             |StartDocument(1.0, UTF-8)
343             |StartElement(hello)
344             |Characters("-- ")
345             |EndElement(hello)
346             |EndDocument
347         "#,
348         ParserConfig::new(),
349         false
350     );
351 
352     test(
353         br#"<hello>--</hello>"#,
354         br#"
355             |StartDocument(1.0, UTF-8)
356             |StartElement(hello)
357             |Characters("--")
358             |EndElement(hello)
359             |EndDocument
360         "#,
361         ParserConfig::new(),
362         false
363     );
364 
365     test(
366         br#"<hello>--></hello>"#,
367         br#"
368             |StartDocument(1.0, UTF-8)
369             |StartElement(hello)
370             |Characters("-->")
371             |EndElement(hello)
372             |EndDocument
373         "#,
374         ParserConfig::new(),
375         false
376     );
377 
378     test(
379         br#"<hello><![CDATA[--]]></hello>"#,
380         br#"
381             |StartDocument(1.0, UTF-8)
382             |StartElement(hello)
383             |CData("--")
384             |EndElement(hello)
385             |EndDocument
386         "#,
387         ParserConfig::new(),
388         false
389     );
390 }
391 
392 #[test]
issue_attribues_have_no_default_namespace()393 fn issue_attribues_have_no_default_namespace () {
394     test(
395         br#"<hello xmlns="urn:foo" x="y"/>"#,
396         br#"
397             |StartDocument(1.0, UTF-8)
398             |StartElement({urn:foo}hello [x="y"])
399             |EndElement({urn:foo}hello)
400             |EndDocument
401         "#,
402         ParserConfig::new(),
403         false
404     );
405 }
406 
407 #[test]
issue_replacement_character_entity_reference()408 fn issue_replacement_character_entity_reference() {
409     test(
410         br#"<doc>&#55357;&#56628;</doc>"#,
411         br#"
412             |StartDocument(1.0, UTF-8)
413             |StartElement(doc)
414             |1:13 Invalid decimal character number in an entity: #55357
415         "#,
416         ParserConfig::new(),
417         false,
418     );
419 
420     test(
421         br#"<doc>&#xd83d;&#xdd34;</doc>"#,
422         br#"
423             |StartDocument(1.0, UTF-8)
424             |StartElement(doc)
425             |1:13 Invalid hexadecimal character number in an entity: #xd83d
426         "#,
427         ParserConfig::new(),
428         false,
429     );
430 
431     test(
432         br#"<doc>&#55357;&#56628;</doc>"#,
433         format!(
434             r#"
435                 |StartDocument(1.0, UTF-8)
436                 |StartElement(doc)
437                 |Characters("{replacement_character}{replacement_character}")
438                 |EndElement(doc)
439                 |EndDocument
440             "#,
441             replacement_character = "\u{fffd}"
442         )
443         .as_bytes(),
444         ParserConfig::new()
445             .replace_unknown_entity_references(true),
446         false,
447     );
448 
449     test(
450         br#"<doc>&#xd83d;&#xdd34;</doc>"#,
451         format!(
452             r#"
453                 |StartDocument(1.0, UTF-8)
454                 |StartElement(doc)
455                 |Characters("{replacement_character}{replacement_character}")
456                 |EndElement(doc)
457                 |EndDocument
458             "#,
459             replacement_character = "\u{fffd}"
460         )
461         .as_bytes(),
462         ParserConfig::new()
463             .replace_unknown_entity_references(true),
464         false,
465     );
466 }
467 
468 lazy_static! {
469     // If PRINT_SPEC env variable is set, print the lines
470     // to stderr instead of comparing with the output
471     // it can be used like this:
472     // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
473     static ref PRINT: bool = {
474         for (key, value) in env::vars() {
475             if key == "PRINT_SPEC" && value == "1" {
476                 return true;
477             }
478         }
479         false
480     };
481 }
482 
483 // clones a lot but that's fine
trim_until_bar(s: String) -> String484 fn trim_until_bar(s: String) -> String {
485     match s.trim() {
486         ts if ts.starts_with('|') => return ts[1..].to_owned(),
487         _ => {}
488     }
489     s
490 }
491 
test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool)492 fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
493     let mut reader = config.create_reader(input);
494     let mut spec_lines = BufReader::new(output).lines()
495         .map(|line| line.unwrap())
496         .enumerate()
497         .map(|(i, line)| (i, trim_until_bar(line)))
498         .filter(|&(_, ref line)| !line.trim().is_empty());
499 
500     loop {
501         let e = reader.next();
502         let line =
503             if test_position {
504                 format!("{} {}", reader.position(), Event(&e))
505             } else {
506                 format!("{}", Event(&e))
507             };
508 
509         if *PRINT {
510             writeln!(&mut stderr(), "{}", line).unwrap();
511         } else {
512             if let Some((n, spec)) = spec_lines.next() {
513                 if line != spec {
514                     const SPLITTER: &'static str = "-------------------";
515                     panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound:    {}\n{}\n",
516                            SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
517                 }
518             } else {
519                 panic!("Unexpected event: {}", line);
520             }
521         }
522 
523         match e {
524             Ok(XmlEvent::EndDocument) | Err(_) => break,
525             _ => {},
526         }
527     }
528 }
529 
530 // Here we define our own string representation of events so we don't depend
531 // on the specifics of Display implementation for XmlEvent and OwnedName.
532 
533 struct Name<'a>(&'a OwnedName);
534 
535 impl <'a> fmt::Display for Name<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result536     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
537         if let Some(ref namespace) = self.0.namespace {
538             try! { write!(f, "{{{}}}", namespace) }
539         }
540 
541         if let Some(ref prefix) = self.0.prefix {
542             try! { write!(f, "{}:", prefix) }
543         }
544 
545         write!(f, "{}", self.0.local_name)
546     }
547 }
548 
549 struct Event<'a>(&'a Result<XmlEvent>);
550 
551 impl<'a> fmt::Display for Event<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result552     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
553         let empty = String::new();
554         match *self.0 {
555             Ok(ref e) => match *e {
556                 XmlEvent::StartDocument { ref version, ref encoding, .. } =>
557                     write!(f, "StartDocument({}, {})", version, encoding),
558                 XmlEvent::EndDocument =>
559                     write!(f, "EndDocument"),
560                 XmlEvent::ProcessingInstruction { ref name, ref data } =>
561                     write!(f, "ProcessingInstruction({}={:?})", name,
562                         data.as_ref().unwrap_or(&empty)),
563                 XmlEvent::StartElement { ref name, ref attributes, .. } => {
564                     if attributes.is_empty() {
565                         write!(f, "StartElement({})", Name(name))
566                     }
567                     else {
568                         let attrs: Vec<_> = attributes.iter()
569                             .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
570                         write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
571                     }
572                 },
573                 XmlEvent::EndElement { ref name } =>
574                     write!(f, "EndElement({})", Name(name)),
575                 XmlEvent::Comment(ref data) =>
576                     write!(f, r#"Comment("{}")"#, data.escape_debug()),
577                 XmlEvent::CData(ref data) =>
578                     write!(f, r#"CData("{}")"#, data.escape_debug()),
579                 XmlEvent::Characters(ref data) =>
580                     write!(f, r#"Characters("{}")"#, data.escape_debug()),
581                 XmlEvent::Whitespace(ref data) =>
582                     write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
583             },
584             Err(ref e) => e.fmt(f),
585         }
586     }
587 }
588