1 #![forbid(unsafe_code)]
2
3 extern crate xml;
4 #[macro_use]
5 extern crate lazy_static;
6
7 use std::env;
8 use std::fmt;
9 use std::fs::File;
10 use std::io::{BufRead, BufReader, Write, stderr};
11 use std::path::Path;
12
13 use xml::name::OwnedName;
14 use xml::common::Position;
15 use xml::reader::{Result, XmlEvent, ParserConfig, EventReader};
16
17 /// Dummy function that opens a file, parses it, and returns a `Result`.
18 /// There can be IO errors (from `File::open`) and XML errors (from the parser).
19 /// Having `impl From<std::io::Error> for xml::reader::Error` allows the user to
20 /// do this without defining their own error type.
21 #[allow(dead_code)]
count_event_in_file(name: &Path) -> Result<usize>22 fn count_event_in_file(name: &Path) -> Result<usize> {
23 let mut event_count = 0;
24 for event in EventReader::new(BufReader::new(try!(File::open(name)))) {
25 try!(event);
26 event_count += 1;
27 }
28 Ok(event_count)
29 }
30
31 #[test]
sample_1_short()32 fn sample_1_short() {
33 test(
34 include_bytes!("documents/sample_1.xml"),
35 include_bytes!("documents/sample_1_short.txt"),
36 ParserConfig::new()
37 .ignore_comments(true)
38 .whitespace_to_characters(true)
39 .cdata_to_characters(true)
40 .trim_whitespace(true)
41 .coalesce_characters(true),
42 false
43 );
44 }
45
46 #[test]
sample_1_full()47 fn sample_1_full() {
48 test(
49 include_bytes!("documents/sample_1.xml"),
50 include_bytes!("documents/sample_1_full.txt"),
51 ParserConfig::new()
52 .ignore_comments(false)
53 .whitespace_to_characters(false)
54 .cdata_to_characters(false)
55 .trim_whitespace(false)
56 .coalesce_characters(false),
57 false
58 );
59 }
60
61 #[test]
sample_2_short()62 fn sample_2_short() {
63 test(
64 include_bytes!("documents/sample_2.xml"),
65 include_bytes!("documents/sample_2_short.txt"),
66 ParserConfig::new()
67 .ignore_comments(true)
68 .whitespace_to_characters(true)
69 .cdata_to_characters(true)
70 .trim_whitespace(true)
71 .coalesce_characters(true),
72 false
73 );
74 }
75
76 #[test]
sample_2_full()77 fn sample_2_full() {
78 test(
79 include_bytes!("documents/sample_2.xml"),
80 include_bytes!("documents/sample_2_full.txt"),
81 ParserConfig::new()
82 .ignore_comments(false)
83 .whitespace_to_characters(false)
84 .cdata_to_characters(false)
85 .trim_whitespace(false)
86 .coalesce_characters(false),
87 false
88 );
89 }
90
91 #[test]
sample_3_short()92 fn sample_3_short() {
93 test(
94 include_bytes!("documents/sample_3.xml"),
95 include_bytes!("documents/sample_3_short.txt"),
96 ParserConfig::new()
97 .ignore_comments(true)
98 .whitespace_to_characters(true)
99 .cdata_to_characters(true)
100 .trim_whitespace(true)
101 .coalesce_characters(true),
102 true
103 );
104 }
105
106 #[test]
sample_3_full()107 fn sample_3_full() {
108 test(
109 include_bytes!("documents/sample_3.xml"),
110 include_bytes!("documents/sample_3_full.txt"),
111 ParserConfig::new()
112 .ignore_comments(false)
113 .whitespace_to_characters(false)
114 .cdata_to_characters(false)
115 .trim_whitespace(false)
116 .coalesce_characters(false),
117 true
118 );
119 }
120
121 #[test]
sample_4_short()122 fn sample_4_short() {
123 test(
124 include_bytes!("documents/sample_4.xml"),
125 include_bytes!("documents/sample_4_short.txt"),
126 ParserConfig::new()
127 .ignore_comments(true)
128 .whitespace_to_characters(true)
129 .cdata_to_characters(true)
130 .trim_whitespace(true)
131 .coalesce_characters(true),
132 false
133 );
134 }
135
136 #[test]
sample_4_full()137 fn sample_4_full() {
138 test(
139 include_bytes!("documents/sample_4.xml"),
140 include_bytes!("documents/sample_4_full.txt"),
141 ParserConfig::new()
142 .ignore_comments(false)
143 .whitespace_to_characters(false)
144 .cdata_to_characters(false)
145 .trim_whitespace(false)
146 .coalesce_characters(false),
147 false
148 );
149
150 }
151
152 #[test]
sample_5_short()153 fn sample_5_short() {
154 test(
155 include_bytes!("documents/sample_5.xml"),
156 include_bytes!("documents/sample_5_short.txt"),
157 ParserConfig::new()
158 .ignore_comments(true)
159 .whitespace_to_characters(true)
160 .cdata_to_characters(true)
161 .trim_whitespace(true)
162 .coalesce_characters(true)
163 .add_entity("nbsp", " ")
164 .add_entity("copy", "©")
165 .add_entity("NotEqualTilde", "≂̸"),
166 false
167 );
168 }
169
170 #[test]
sample_6_full()171 fn sample_6_full() {
172 test(
173 include_bytes!("documents/sample_6.xml"),
174 include_bytes!("documents/sample_6_full.txt"),
175 ParserConfig::new()
176 .ignore_root_level_whitespace(false)
177 .ignore_comments(false)
178 .whitespace_to_characters(false)
179 .cdata_to_characters(false)
180 .trim_whitespace(false)
181 .coalesce_characters(false),
182 false
183 );
184 }
185
186 #[test]
eof_1()187 fn eof_1() {
188 test(
189 br#"<?xml"#,
190 br#"1:6 Unexpected end of stream: no root element found"#,
191 ParserConfig::new(),
192 false
193 );
194 }
195
196 #[test]
bad_1()197 fn bad_1() {
198 test(
199 br#"<?xml&.,"#,
200 br#"1:6 Unexpected token: <?xml&"#,
201 ParserConfig::new(),
202 false
203 );
204 }
205
206 #[test]
dashes_in_comments()207 fn dashes_in_comments() {
208 test(
209 br#"<!-- comment -- --><hello/>"#,
210 br#"
211 |1:14 Unexpected token '--' before ' '
212 "#,
213 ParserConfig::new(),
214 false
215 );
216
217 test(
218 br#"<!-- comment ---><hello/>"#,
219 br#"
220 |1:14 Unexpected token '--' before '-'
221 "#,
222 ParserConfig::new(),
223 false
224 );
225 }
226
227 #[test]
tabs_1()228 fn tabs_1() {
229 test(
230 b"\t<a>\t<b/></a>",
231 br#"
232 |1:2 StartDocument(1.0, UTF-8)
233 |1:2 StartElement(a)
234 |1:6 StartElement(b)
235 |1:6 EndElement(b)
236 |1:10 EndElement(a)
237 |1:14 EndDocument
238 "#,
239 ParserConfig::new()
240 .trim_whitespace(true),
241 true
242 );
243 }
244
245 #[test]
issue_83_duplicate_attributes()246 fn issue_83_duplicate_attributes() {
247 test(
248 br#"<hello><some-tag a='10' a="20"></hello>"#,
249 br#"
250 |StartDocument(1.0, UTF-8)
251 |StartElement(hello)
252 |1:30 Attribute 'a' is redefined
253 "#,
254 ParserConfig::new(),
255 false
256 );
257 }
258
259 #[test]
issue_93_large_characters_in_entity_references()260 fn issue_93_large_characters_in_entity_references() {
261 test(
262 r#"<hello>&;</hello>"#.as_bytes(),
263 r#"
264 |StartDocument(1.0, UTF-8)
265 |StartElement(hello)
266 |1:10 Unexpected entity:
267 "#.as_bytes(), // FIXME: it shouldn't be 10, looks like indices are off slightly
268 ParserConfig::new(),
269 false
270 )
271 }
272
273 #[test]
issue_98_cdata_ending_with_right_bracket()274 fn issue_98_cdata_ending_with_right_bracket() {
275 test(
276 br#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
277 br#"
278 |StartDocument(1.0, UTF-8)
279 |StartElement(hello)
280 |CData("Foo [Bar]")
281 |EndElement(hello)
282 |EndDocument
283 "#,
284 ParserConfig::new(),
285 false
286 )
287 }
288
289 #[test]
issue_105_unexpected_double_dash()290 fn issue_105_unexpected_double_dash() {
291 test(
292 br#"<hello>-- </hello>"#,
293 br#"
294 |StartDocument(1.0, UTF-8)
295 |StartElement(hello)
296 |Characters("-- ")
297 |EndElement(hello)
298 |EndDocument
299 "#,
300 ParserConfig::new(),
301 false
302 );
303
304 test(
305 br#"<hello>--</hello>"#,
306 br#"
307 |StartDocument(1.0, UTF-8)
308 |StartElement(hello)
309 |Characters("--")
310 |EndElement(hello)
311 |EndDocument
312 "#,
313 ParserConfig::new(),
314 false
315 );
316
317 test(
318 br#"<hello>--></hello>"#,
319 br#"
320 |StartDocument(1.0, UTF-8)
321 |StartElement(hello)
322 |Characters("-->")
323 |EndElement(hello)
324 |EndDocument
325 "#,
326 ParserConfig::new(),
327 false
328 );
329
330 test(
331 br#"<hello><![CDATA[--]]></hello>"#,
332 br#"
333 |StartDocument(1.0, UTF-8)
334 |StartElement(hello)
335 |CData("--")
336 |EndElement(hello)
337 |EndDocument
338 "#,
339 ParserConfig::new(),
340 false
341 );
342 }
343
344 #[test]
issue_attribues_have_no_default_namespace()345 fn issue_attribues_have_no_default_namespace () {
346 test(
347 br#"<hello xmlns="urn:foo" x="y"/>"#,
348 br#"
349 |StartDocument(1.0, UTF-8)
350 |StartElement({urn:foo}hello [x="y"])
351 |EndElement({urn:foo}hello)
352 |EndDocument
353 "#,
354 ParserConfig::new(),
355 false
356 );
357 }
358
359 #[test]
issue_replacement_character_entity_reference()360 fn issue_replacement_character_entity_reference() {
361 test(
362 br#"<doc>��</doc>"#,
363 br#"
364 |StartDocument(1.0, UTF-8)
365 |StartElement(doc)
366 |1:13 Invalid decimal character number in an entity: #55357
367 "#,
368 ParserConfig::new(),
369 false,
370 );
371
372 test(
373 br#"<doc>��</doc>"#,
374 br#"
375 |StartDocument(1.0, UTF-8)
376 |StartElement(doc)
377 |1:13 Invalid hexadecimal character number in an entity: #xd83d
378 "#,
379 ParserConfig::new(),
380 false,
381 );
382
383 test(
384 br#"<doc>��</doc>"#,
385 format!(
386 r#"
387 |StartDocument(1.0, UTF-8)
388 |StartElement(doc)
389 |Characters("{replacement_character}{replacement_character}")
390 |EndElement(doc)
391 |EndDocument
392 "#,
393 replacement_character = "\u{fffd}"
394 )
395 .as_bytes(),
396 ParserConfig::new()
397 .replace_unknown_entity_references(true),
398 false,
399 );
400
401 test(
402 br#"<doc>��</doc>"#,
403 format!(
404 r#"
405 |StartDocument(1.0, UTF-8)
406 |StartElement(doc)
407 |Characters("{replacement_character}{replacement_character}")
408 |EndElement(doc)
409 |EndDocument
410 "#,
411 replacement_character = "\u{fffd}"
412 )
413 .as_bytes(),
414 ParserConfig::new()
415 .replace_unknown_entity_references(true),
416 false,
417 );
418 }
419
420 lazy_static! {
421 // If PRINT_SPEC env variable is set, print the lines
422 // to stderr instead of comparing with the output
423 // it can be used like this:
424 // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt
425 static ref PRINT: bool = {
426 for (key, value) in env::vars() {
427 if key == "PRINT_SPEC" && value == "1" {
428 return true;
429 }
430 }
431 false
432 };
433 }
434
435 // clones a lot but that's fine
trim_until_bar(s: String) -> String436 fn trim_until_bar(s: String) -> String {
437 match s.trim() {
438 ts if ts.starts_with('|') => return ts[1..].to_owned(),
439 _ => {}
440 }
441 s
442 }
443
test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool)444 fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) {
445 let mut reader = config.create_reader(input);
446 let mut spec_lines = BufReader::new(output).lines()
447 .map(|line| line.unwrap())
448 .enumerate()
449 .map(|(i, line)| (i, trim_until_bar(line)))
450 .filter(|&(_, ref line)| !line.trim().is_empty());
451
452 loop {
453 let e = reader.next();
454 let line =
455 if test_position {
456 format!("{} {}", reader.position(), Event(&e))
457 } else {
458 format!("{}", Event(&e))
459 };
460
461 if *PRINT {
462 writeln!(&mut stderr(), "{}", line).unwrap();
463 } else {
464 if let Some((n, spec)) = spec_lines.next() {
465 if line != spec {
466 const SPLITTER: &'static str = "-------------------";
467 panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound: {}\n{}\n",
468 SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap());
469 }
470 } else {
471 panic!("Unexpected event: {}", line);
472 }
473 }
474
475 match e {
476 Ok(XmlEvent::EndDocument) | Err(_) => break,
477 _ => {},
478 }
479 }
480 }
481
482 // Here we define our own string representation of events so we don't depend
483 // on the specifics of Display implementation for XmlEvent and OwnedName.
484
485 struct Name<'a>(&'a OwnedName);
486
487 impl <'a> fmt::Display for Name<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result488 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
489 if let Some(ref namespace) = self.0.namespace {
490 try! { write!(f, "{{{}}}", namespace) }
491 }
492
493 if let Some(ref prefix) = self.0.prefix {
494 try! { write!(f, "{}:", prefix) }
495 }
496
497 write!(f, "{}", self.0.local_name)
498 }
499 }
500
501 struct Event<'a>(&'a Result<XmlEvent>);
502
503 impl<'a> fmt::Display for Event<'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result504 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
505 let empty = String::new();
506 match *self.0 {
507 Ok(ref e) => match *e {
508 XmlEvent::StartDocument { ref version, ref encoding, .. } =>
509 write!(f, "StartDocument({}, {})", version, encoding),
510 XmlEvent::EndDocument =>
511 write!(f, "EndDocument"),
512 XmlEvent::ProcessingInstruction { ref name, ref data } =>
513 write!(f, "ProcessingInstruction({}={:?})", name,
514 data.as_ref().unwrap_or(&empty)),
515 XmlEvent::StartElement { ref name, ref attributes, .. } => {
516 if attributes.is_empty() {
517 write!(f, "StartElement({})", Name(name))
518 }
519 else {
520 let attrs: Vec<_> = attributes.iter()
521 .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect();
522 write!(f, "StartElement({} [{}])", Name(name), attrs.join(", "))
523 }
524 },
525 XmlEvent::EndElement { ref name } =>
526 write!(f, "EndElement({})", Name(name)),
527 XmlEvent::Comment(ref data) =>
528 write!(f, r#"Comment("{}")"#, data.escape_debug()),
529 XmlEvent::CData(ref data) =>
530 write!(f, r#"CData("{}")"#, data.escape_debug()),
531 XmlEvent::Characters(ref data) =>
532 write!(f, r#"Characters("{}")"#, data.escape_debug()),
533 XmlEvent::Whitespace(ref data) =>
534 write!(f, r#"Whitespace("{}")"#, data.escape_debug()),
535 },
536 Err(ref e) => e.fmt(f),
537 }
538 }
539 }
540