1 use common::is_whitespace_char;
2 
3 use reader::events::XmlEvent;
4 use reader::lexer::Token;
5 
6 use super::{
7     Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate,
8     ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE
9 };
10 
11 impl PullParser {
outside_tag(&mut self, t: Token) -> Option<Result>12     pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
13         match t {
14             Token::ReferenceStart =>
15                 self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))),
16 
17             Token::Whitespace(_) if self.depth() == 0 && self.config.ignore_root_level_whitespace => None,  // skip whitespace outside of the root element
18 
19             Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None,
20 
21             Token::Whitespace(c) => {
22                 if !self.buf_has_data() {
23                     self.push_pos();
24                 }
25                 self.append_char_continue(c)
26             }
27 
28             _ if t.contains_char_data() && self.depth() == 0 =>
29                 Some(self_error!(self; "Unexpected characters outside the root element: {}", t)),
30 
31             _ if t.contains_char_data() => {  // Non-whitespace char data
32                 if !self.buf_has_data() {
33                     self.push_pos();
34                 }
35                 self.inside_whitespace = false;
36                 t.push_to_string(&mut self.buf);
37                 None
38             }
39 
40             Token::ReferenceEnd => { // Semi-colon in a text outside an entity
41                 self.inside_whitespace = false;
42                 Token::ReferenceEnd.push_to_string(&mut self.buf);
43                 None
44             }
45 
46             Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
47                 // We need to switch the lexer into a comment mode inside comments
48                 self.lexer.inside_comment();
49                 self.into_state_continue(State::InsideComment)
50             }
51 
52             Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => {
53                 if !self.buf_has_data() {
54                     self.push_pos();
55                 }
56                 // We need to disable lexing errors inside CDATA
57                 self.lexer.disable_errors();
58                 self.into_state_continue(State::InsideCData)
59             }
60 
61             _ => {
62                 // Encountered some markup event, flush the buffer as characters
63                 // or a whitespace
64                 let mut next_event = if self.buf_has_data() {
65                     let buf = self.take_buf();
66                     if self.inside_whitespace && self.config.trim_whitespace {
67                         None
68                     } else if self.inside_whitespace && !self.config.whitespace_to_characters {
69                         Some(Ok(XmlEvent::Whitespace(buf)))
70                     } else if self.config.trim_whitespace {
71                         Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
72                     } else {
73                         Some(Ok(XmlEvent::Characters(buf)))
74                     }
75                 } else { None };
76                 self.inside_whitespace = true;  // Reset inside_whitespace flag
77                 self.push_pos();
78                 match t {
79                     Token::ProcessingInstructionStart =>
80                         self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
81 
82                     Token::DoctypeStart if !self.encountered_element => {
83                         // We don't have a doctype event so skip this position
84                         // FIXME: update when we have a doctype event
85                         self.next_pos();
86                         self.lexer.disable_errors();
87                         self.into_state(State::InsideDoctype, next_event)
88                     }
89 
90                     Token::OpeningTagStart => {
91                         // If declaration was not parsed and we have encountered an element,
92                         // emit this declaration as the next event.
93                         if !self.parsed_declaration {
94                             self.parsed_declaration = true;
95                             let sd_event = XmlEvent::StartDocument {
96                                 version: DEFAULT_VERSION,
97                                 encoding: DEFAULT_ENCODING.into(),
98                                 standalone: DEFAULT_STANDALONE
99                             };
100                             // next_event is always none here because we're outside of
101                             // the root element
102                             next_event = Some(Ok(sd_event));
103                             self.push_pos();
104                         }
105                         self.encountered_element = true;
106                         self.nst.push_empty();
107                         self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
108                     }
109 
110                     Token::ClosingTagStart if self.depth() > 0 =>
111                         self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
112 
113                     Token::CommentStart => {
114                         // We need to switch the lexer into a comment mode inside comments
115                         self.lexer.inside_comment();
116                         self.into_state(State::InsideComment, next_event)
117                     }
118 
119                     Token::CDataStart => {
120                         // We need to disable lexing errors inside CDATA
121                         self.lexer.disable_errors();
122                         self.into_state(State::InsideCData, next_event)
123                     }
124 
125                     _ => Some(self_error!(self; "Unexpected token: {}", t))
126                 }
127             }
128         }
129     }
130 }
131