1 #![forbid(unsafe_code)]
2
3 extern crate xml;
4 #[macro_use]
5 extern crate lazy_static;
6
7 use std::env;
8 use std::fmt;
9 use std::fs::File;
10 use std::io::{BufRead, BufReader, Write, stderr};
11 use std::path::Path;
12
13 use xml::name::OwnedName;
14 use xml::common::Position;
15 use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};
16
17 /// Dummy function that opens a file, parses it, and returns a `Result`.
18 /// There can be IO errors (from `File::open`) and XML errors (from the parser).
19 /// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
20 /// do this without defining their own error type.
21 #[allow(dead_code)]
count_event_in_file(name: &Path) -> Result<usize>22 fn count_event_in_file(name: &Path) -> Result<usize> {
23 let mut event_count = 0;
24 for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
25 try!(event);
26 event_count += 1;
27 }
28 Ok(event_count)
29 }
30
31 #[test]
sample_1_short()32 fn sample_1_short() {
33 test(
34 include_bytes!("documents/sample_1.xml"),
35 include_bytes!("documents/sample_1_short.txt"),
36 ParserConfig::new()
37 .ignore_comments(true)
38 .whitespace_to_characters(true)
39 .cdata_to_characters(true)
40 .trim_whitespace(true)
41 .coalesce_characters(true),
42 false
43 );
44 }
45
46 #[test]
sample_1_full()47 fn sample_1_full() {
48 test(
49 include_bytes!("documents/sample_1.xml"),
50 include_bytes!("documents/sample_1_full.txt"),
51 ParserConfig::new()
52 .ignore_comments(false)
53 .whitespace_to_characters(false)
54 .cdata_to_characters(false)
55 .trim_whitespace(false)
56 .coalesce_characters(false),
57 false
58 );
59 }
60
61 #[test]
sample_2_short()62 fn sample_2_short() {
63 test(
64 include_bytes!("documents/sample_2.xml"),
65 include_bytes!("documents/sample_2_short.txt"),
66 ParserConfig::new()
67 .ignore_comments(true)
68 .whitespace_to_characters(true)
69 .cdata_to_characters(true)
70 .trim_whitespace(true)
71 .coalesce_characters(true),
72 false
73 );
74 }
75
76 #[test]
sample_2_full()77 fn sample_2_full() {
78 test(
79 include_bytes!("documents/sample_2.xml"),
80 include_bytes!("documents/sample_2_full.txt"),
81 ParserConfig::new()
82 .ignore_comments(false)
83 .whitespace_to_characters(false)
84 .cdata_to_characters(false)
85 .trim_whitespace(false)
86 .coalesce_characters(false),
87 false
88 );
89 }
90
91 #[test]
sample_3_short()92 fn sample_3_short() {
93 test(
94 include_bytes!("documents/sample_3.xml"),
95 include_bytes!("documents/sample_3_short.txt"),
96 ParserConfig::new()
97 .ignore_comments(true)
98 .whitespace_to_characters(true)
99 .cdata_to_characters(true)
100 .trim_whitespace(true)
101 .coalesce_characters(true),
102 true
103 );
104 }
105
106 #[test]
sample_3_full()107 fn sample_3_full() {
108 test(
109 include_bytes!("documents/sample_3.xml"),
110 include_bytes!("documents/sample_3_full.txt"),
111 ParserConfig::new()
112 .ignore_comments(false)
113 .whitespace_to_characters(false)
114 .cdata_to_characters(false)
115 .trim_whitespace(false)
116 .coalesce_characters(false),
117 true
118 );
119 }
120
121 #[test]
sample_4_short()122 fn sample_4_short() {
123 test(
124 include_bytes!("documents/sample_4.xml"),
125 include_bytes!("documents/sample_4_short.txt"),
126 ParserConfig::new()
127 .ignore_comments(true)
128 .whitespace_to_characters(true)
129 .cdata_to_characters(true)
130 .trim_whitespace(true)
131 .coalesce_characters(true),
132 false
133 );
134 }
135
136 #[test]
sample_4_full()137 fn sample_4_full() {
138 test(
139 include_bytes!("documents/sample_4.xml"),
140 include_bytes!("documents/sample_4_full.txt"),
141 ParserConfig::new()
142 .ignore_comments(false)
143 .whitespace_to_characters(false)
144 .cdata_to_characters(false)
145 .trim_whitespace(false)
146 .coalesce_characters(false),
147 false
148 );
149
150 }
151
152 #[test]
sample_5_short()153 fn sample_5_short() {
154 test(
155 include_bytes!("documents/sample_5.xml"),
156 include_bytes!("documents/sample_5_short.txt"),
157 ParserConfig::new()
158 .ignore_comments(true)
159 .whitespace_to_characters(true)
160 .cdata_to_characters(true)
161 .trim_whitespace(true)
162 .coalesce_characters(true)
163 .add_entity("nbsp", " ")
164 .add_entity("copy", "©")
165 .add_entity("NotEqualTilde", "≂̸"),
166 false
167 );
168 }
169
170 #[test]
sample_6_full()171 fn sample_6_full() {
172 test(
173 include_bytes!("documents/sample_6.xml"),
174 include_bytes!("documents/sample_6_full.txt"),
175 ParserConfig::new()
176 .ignore_root_level_whitespace(false)
177 .ignore_comments(false)
178 .whitespace_to_characters(false)
179 .cdata_to_characters(false)
180 .trim_whitespace(false)
181 .coalesce_characters(false),
182 false
183 );
184 }
185
186 #[test]
eof_1()187 fn eof_1() {
188 test(
189 br#"<?xml"#,
190 br#"1:6 Unexpected end of stream: no root element found"#,
191 ParserConfig::new(),
192 false
193 );
194 }
195
196 #[test]
bad_1()197 fn bad_1() {
198 test(
199 br#"<?xml&.,"#,
200 br#"1:6 Unexpected token: <?xml&"#,
201 ParserConfig::new(),
202 false
203 );
204 }
205
206 #[test]
dashes_in_comments()207 fn dashes_in_comments() {
208 test(
209 br#"<!-- comment -- --><hello/>"#,
210 br#"
211 |1:14 Unexpected token '--' before ' '
212 "#,
213 ParserConfig::new(),
214 false
215 );
216
217 test(
218 br#"<!-- comment ---><hello/>"#,
219 br#"
220 |1:14 Unexpected token '--' before '-'
221 "#,
222 ParserConfig::new(),
223 false
224 );
225 }
226
227 #[test]
tabs_1()228 fn tabs_1() {
229 test(
230 b"\t<a>\t<b/></a>",
231 br#"
232 |1:2 StartDocument(1.0, UTF-8)
233 |1:2 StartElement(a)
234 |1:6 StartElement(b)
235 |1:6 EndElement(b)
236 |1:10 EndElement(a)
237 |1:14 EndDocument
238 "#,
239 ParserConfig::new()
240 .trim_whitespace(true),
241 true
242 );
243 }
244
245 #[test]
issue_32_unescaped_cdata_end()246 fn issue_32_unescaped_cdata_end() {
247 test(
248 br#"<hello>]]></hello>"#,
249 br#"
250 |StartDocument(1.0, UTF-8)
251 |StartElement(hello)
252 |Characters("]]>")
253 |EndElement(hello)
254 |EndDocument
255 "#,
256 ParserConfig::new(),
257 false
258 );
259 }
260
261 #[test]
issue_unescaped_processing_instruction_end()262 fn issue_unescaped_processing_instruction_end() {
263 test(
264 br#"<hello>?></hello>"#,
265 br#"
266 |StartDocument(1.0, UTF-8)
267 |StartElement(hello)
268 |Characters("?>")
269 |EndElement(hello)
270 |EndDocument
271 "#,
272 ParserConfig::new(),
273 false
274 );
275 }
276
277 #[test]
issue_unescaped_empty_tag_end()278 fn issue_unescaped_empty_tag_end() {
279 test(
280 br#"<hello>/></hello>"#,
281 br#"
282 |StartDocument(1.0, UTF-8)
283 |StartElement(hello)
284 |Characters("/>")
285 |EndElement(hello)
286 |EndDocument
287 "#,
288 ParserConfig::new(),
289 false
290 );
291 }
292
293 #[test]
issue_83_duplicate_attributes()294 fn issue_83_duplicate_attributes() {
295 test(
296 br#"<hello><some-tag a='10' a="20"></hello>"#,
297 br#"
298 |StartDocument(1.0, UTF-8)
299 |StartElement(hello)
300 |1:30 Attribute 'a' is redefined
301 "#,
302 ParserConfig::new(),
303 false
304 );
305 }
306
307 #[test]
issue_93_large_characters_in_entity_references()308 fn issue_93_large_characters_in_entity_references() {
309 test(
310 r#"<hello>&;</hello>"#.as_bytes(),
311 r#"
312 |StartDocument(1.0, UTF-8)
313 |StartElement(hello)
314 |1:10 Unexpected entity:
315 "#.as_bytes(), // FIXME: it shouldn't be 10, looks like indices are off slightly
316 ParserConfig::new(),
317 false
318 )
319 }
320
321 #[test]
issue_98_cdata_ending_with_right_bracket()322 fn issue_98_cdata_ending_with_right_bracket() {
323 test(
324 br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
325 br#"
326 |StartDocument(1.0, UTF-8)
327 |StartElement(hello)
328 |CData("Foo [Bar]")
329 |EndElement(hello)
330 |EndDocument
331 "#,
332 ParserConfig::new(),
333 false
334 )
335 }
336
337 #[test]
issue_105_unexpected_double_dash()338 fn issue_105_unexpected_double_dash() {
339 test(
340 br#"<hello>-- </hello>"#,
341 br#"
342 |StartDocument(1.0, UTF-8)
343 |StartElement(hello)
344 |Characters("-- ")
345 |EndElement(hello)
346 |EndDocument
347 "#,
348 ParserConfig::new(),
349 false
350 );
351
352 test(
353 br#"<hello>--</hello>"#,
354 br#"
355 |StartDocument(1.0, UTF-8)
356 |StartElement(hello)
357 |Characters("--")
358 |EndElement(hello)
359 |EndDocument
360 "#,
361 ParserConfig::new(),
362 false
363 );
364
365 test(
366 br#"<hello>--></hello>"#,
367 br#"
368 |StartDocument(1.0, UTF-8)
369 |StartElement(hello)
370 |Characters("-->")
371 |EndElement(hello)
372 |EndDocument
373 "#,
374 ParserConfig::new(),
375 false
376 );
377
378 test(
379 br#"<hello><![CDATA[--]]></hello>"#,
380 br#"
381 |StartDocument(1.0, UTF-8)
382 |StartElement(hello)
383 |CData("--")
384 |EndElement(hello)
385 |EndDocument
386 "#,
387 ParserConfig::new(),
388 false
389 );
390 }
391
392 #[test]
issue_attribues_have_no_default_namespace()393 fn issue_attribues_have_no_default_namespace () {
394 test(
395 br#"<hello xmlns="urn:foo" x="y"/>"#,
396 br#"
397 |StartDocument(1.0, UTF-8)
398 |StartElement({urn:foo}hello [x="y"])
399 |EndElement({urn:foo}hello)
400 |EndDocument
401 "#,
402 ParserConfig::new(),
403 false
404 );
405 }
406
407 #[test]
issue_replacement_character_entity_reference()408 fn issue_replacement_character_entity_reference() {
409 test(
410 br#"<doc>��</doc>"#,
411 br#"
412 |StartDocument(1.0, UTF-8)
413 |StartElement(doc)
414 |1:13 Invalid decimal character number in an entity: #55357
415 "#,
416 ParserConfig::new(),
417 false,
418 );
419
420 test(
421 br#"<doc>��</doc>"#,
422 br#"
423 |StartDocument(1.0, UTF-8)
424 |StartElement(doc)
425 |1:13 Invalid hexadecimal character number in an entity: #xd83d
426 "#,
427 ParserConfig::new(),
428 false,
429 );
430
431 test(
432 br#"<doc>��</doc>"#,
433 format!(
434 r#"
435 |StartDocument(1.0, UTF-8)
436 |StartElement(doc)
437 |Characters("{replacement_character}{replacement_character}")
438 |EndElement(doc)
439 |EndDocument
440 "#,
441 replacement_character = "\u{fffd}"
442 )
443 .as_bytes(),
444 ParserConfig::new()
445 .replace_unknown_entity_references(true),
446 false,
447 );
448
449 test(
450 br#"<doc>��</doc>"#,
451 format!(
452 r#"
453 |StartDocument(1.0, UTF-8)
454 |StartElement(doc)
455 |Characters("{replacement_character}{replacement_character}")
456 |EndElement(doc)
457 |EndDocument
458 "#,
459 replacement_character = "\u{fffd}"
460 )
461 .as_bytes(),
462 ParserConfig::new()
463 .replace_unknown_entity_references(true),
464 false,
465 );
466 }
467
468 lazy_static! {
469 // If PRINT_SPEC env variable is set, print the lines
470 // to stderr instead of comparing with the output
471 // it can be used like this:
472 // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
473 static ref PRINT: bool = {
474 for (key, value) in env::vars() {
475 if key == "PRINT_SPEC" && value == "1" {
476 return true;
477 }
478 }
479 false
480 };
481 }
482
483 // clones a lot but that's fine
trim_until_bar(s: String) -> String484 fn trim_until_bar(s: String) -> String {
485 match s.trim() {
486 ts if ts.starts_with('|') => return ts[1..].to_owned(),
487 _ => {}
488 }
489 s
490 }
491
test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool)492 fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
493 let mut reader = config.create_reader(input);
494 let mut spec_lines = BufReader::new(output).lines()
495 .map(|line| line.unwrap())
496 .enumerate()
497 .map(|(i, line)| (i, trim_until_bar(line)))
498 .filter(|&(_, ref line)| !line.trim().is_empty());
499
500 loop {
501 let e = reader.next();
502 let line =
503 if test_position {
504 format!("{} {}", reader.position(), Event(&e))
505 } else {
506 format!("{}", Event(&e))
507 };
508
509 if *PRINT {
510 writeln!(&mut stderr(), "{}", line).unwrap();
511 } else {
512 if let Some((n, spec)) = spec_lines.next() {
513 if line != spec {
514 const SPLITTER: &'static str = "-------------------";
515 panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound: {}\n{}\n",
516 SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
517 }
518 } else {
519 panic!("Unexpected event: {}", line);
520 }
521 }
522
523 match e {
524 Ok(XmlEvent::EndDocument) | Err(_) => break,
525 _ => {},
526 }
527 }
528 }
529
530 // Here we define our own string representation of events so we don't depend
531 // on the specifics of Display implementation for XmlEvent and OwnedName.
532
533 struct Name<'a>(&'a OwnedName);
534
535 impl <'a> fmt::Display for Name<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result536 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
537 if let Some(ref namespace) = self.0.namespace {
538 try! { write!(f, "{{{}}}", namespace) }
539 }
540
541 if let Some(ref prefix) = self.0.prefix {
542 try! { write!(f, "{}:", prefix) }
543 }
544
545 write!(f, "{}", self.0.local_name)
546 }
547 }
548
549 struct Event<'a>(&'a Result<XmlEvent>);
550
551 impl<'a> fmt::Display for Event<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result552 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
553 let empty = String::new();
554 match *self.0 {
555 Ok(ref e) => match *e {
556 XmlEvent::StartDocument { ref version, ref encoding, .. } =>
557 write!(f, "StartDocument({}, {})", version, encoding),
558 XmlEvent::EndDocument =>
559 write!(f, "EndDocument"),
560 XmlEvent::ProcessingInstruction { ref name, ref data } =>
561 write!(f, "ProcessingInstruction({}={:?})", name,
562 data.as_ref().unwrap_or(&empty)),
563 XmlEvent::StartElement { ref name, ref attributes, .. } => {
564 if attributes.is_empty() {
565 write!(f, "StartElement({})", Name(name))
566 }
567 else {
568 let attrs: Vec<_> = attributes.iter()
569 .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
570 write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
571 }
572 },
573 XmlEvent::EndElement { ref name } =>
574 write!(f, "EndElement({})", Name(name)),
575 XmlEvent::Comment(ref data) =>
576 write!(f, r#"Comment("{}")"#, data.escape_debug()),
577 XmlEvent::CData(ref data) =>
578 write!(f, r#"CData("{}")"#, data.escape_debug()),
579 XmlEvent::Characters(ref data) =>
580 write!(f, r#"Characters("{}")"#, data.escape_debug()),
581 XmlEvent::Whitespace(ref data) =>
582 write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
583 },
584 Err(ref e) => e.fmt(f),
585 }
586 }
587 }
588