1 use common::is_whitespace_char; 2 3 use reader::events::XmlEvent; 4 use reader::lexer::Token; 5 6 use super::{ 7 Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate, 8 ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE 9 }; 10 11 impl PullParser { outside_tag(&mut self, t: Token) -> Option<Result>12 pub fn outside_tag(&mut self, t: Token) -> Option<Result> { 13 match t { 14 Token::ReferenceStart => 15 self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))), 16 17 Token::Whitespace(_) if self.depth() == 0 && self.config.ignore_root_level_whitespace => None, // skip whitespace outside of the root element 18 19 Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None, 20 21 Token::Whitespace(c) => { 22 if !self.buf_has_data() { 23 self.push_pos(); 24 } 25 self.append_char_continue(c) 26 } 27 28 _ if t.contains_char_data() && self.depth() == 0 => 29 Some(self_error!(self; "Unexpected characters outside the root element: {}", t)), 30 31 _ if t.contains_char_data() => { // Non-whitespace char data 32 if !self.buf_has_data() { 33 self.push_pos(); 34 } 35 self.inside_whitespace = false; 36 t.push_to_string(&mut self.buf); 37 None 38 } 39 40 Token::ReferenceEnd => { // Semi-colon in a text outside an entity 41 self.inside_whitespace = false; 42 Token::ReferenceEnd.push_to_string(&mut self.buf); 43 None 44 } 45 46 Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => { 47 // We need to switch the lexer into a comment mode inside comments 48 self.lexer.inside_comment(); 49 self.into_state_continue(State::InsideComment) 50 } 51 52 Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => { 53 if !self.buf_has_data() { 54 self.push_pos(); 55 } 56 // We need to disable lexing errors inside CDATA 57 self.lexer.disable_errors(); 58 self.into_state_continue(State::InsideCData) 59 } 60 61 _ => { 62 // Encountered some markup event, flush the buffer as characters 63 // or a whitespace 64 let mut next_event = if self.buf_has_data() { 65 let buf = self.take_buf(); 66 if self.inside_whitespace && self.config.trim_whitespace { 67 None 68 } else if self.inside_whitespace && !self.config.whitespace_to_characters { 69 Some(Ok(XmlEvent::Whitespace(buf))) 70 } else if self.config.trim_whitespace { 71 Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into()))) 72 } else { 73 Some(Ok(XmlEvent::Characters(buf))) 74 } 75 } else { None }; 76 self.inside_whitespace = true; // Reset inside_whitespace flag 77 self.push_pos(); 78 match t { 79 Token::ProcessingInstructionStart => 80 self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event), 81 82 Token::DoctypeStart if !self.encountered_element => { 83 // We don't have a doctype event so skip this position 84 // FIXME: update when we have a doctype event 85 self.next_pos(); 86 self.lexer.disable_errors(); 87 self.into_state(State::InsideDoctype, next_event) 88 } 89 90 Token::OpeningTagStart => { 91 // If declaration was not parsed and we have encountered an element, 92 // emit this declaration as the next event. 93 if !self.parsed_declaration { 94 self.parsed_declaration = true; 95 let sd_event = XmlEvent::StartDocument { 96 version: DEFAULT_VERSION, 97 encoding: DEFAULT_ENCODING.into(), 98 standalone: DEFAULT_STANDALONE 99 }; 100 // next_event is always none here because we're outside of 101 // the root element 102 next_event = Some(Ok(sd_event)); 103 self.push_pos(); 104 } 105 self.encountered_element = true; 106 self.nst.push_empty(); 107 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event) 108 } 109 110 Token::ClosingTagStart if self.depth() > 0 => 111 self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event), 112 113 Token::CommentStart => { 114 // We need to switch the lexer into a comment mode inside comments 115 self.lexer.inside_comment(); 116 self.into_state(State::InsideComment, next_event) 117 } 118 119 Token::CDataStart => { 120 // We need to disable lexing errors inside CDATA 121 self.lexer.disable_errors(); 122 self.into_state(State::InsideCData, next_event) 123 } 124 125 _ => Some(self_error!(self; "Unexpected token: {}", t)) 126 } 127 } 128 } 129 } 130 } 131