1 // Copyright 2014-2017 The html5ever Project Developers. See the 2 // COPYRIGHT file at the top-level directory of this distribution. 3 // 4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 7 // option. This file may not be copied, modified, or distributed 8 // except according to those terms. 9 10 use std::borrow::Cow; 11 12 use crate::tendril::StrTendril; 13 use crate::{Attribute, QualName}; 14 15 pub use self::TagKind::{EmptyTag, EndTag, ShortTag, StartTag}; 16 pub use self::Token::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; 17 pub use self::Token::{CommentToken, DoctypeToken, PIToken, TagToken}; 18 19 use super::states; 20 21 /// Tag kind denotes which kind of tag did we encounter. 22 #[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] 23 pub enum TagKind { 24 /// Beginning of a tag (e.g. `<a>`). 25 StartTag, 26 /// End of a tag (e.g. `</a>`). 27 EndTag, 28 /// Empty tag (e.g. `<a/>`). 29 EmptyTag, 30 /// Short tag (e.g. `</>`). 31 ShortTag, 32 } 33 34 /// XML 5 Tag Token 35 #[derive(PartialEq, Eq, Debug, Clone)] 36 pub struct Tag { 37 /// Token kind denotes which type of token was encountered. 38 /// E.g. if parser parsed `</a>` the token kind would be `EndTag`. 39 pub kind: TagKind, 40 /// Qualified name of the tag. 41 pub name: QualName, 42 /// List of attributes attached to this tag. 43 /// Only valid in start and empty tag. 44 pub attrs: Vec<Attribute>, 45 } 46 47 impl Tag { 48 /// Sorts attributes in a tag. equiv_modulo_attr_order(&self, other: &Tag) -> bool49 pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool { 50 if (self.kind != other.kind) || (self.name != other.name) { 51 return false; 52 } 53 54 let mut self_attrs = self.attrs.clone(); 55 let mut other_attrs = other.attrs.clone(); 56 self_attrs.sort(); 57 other_attrs.sort(); 58 59 self_attrs == other_attrs 60 } 61 } 62 63 /// A `DOCTYPE` token. 64 /// Doctype token in XML5 is rather limited for reasons, such as: 65 /// security and simplicity. XML5 only supports declaring DTD with 66 /// name, public identifier and system identifier 67 #[derive(PartialEq, Eq, Clone, Debug)] 68 pub struct Doctype { 69 /// Name of DOCTYPE declared 70 pub name: Option<StrTendril>, 71 /// Public identifier of this DOCTYPE. 72 pub public_id: Option<StrTendril>, 73 /// System identifier of this DOCTYPE. 74 pub system_id: Option<StrTendril>, 75 } 76 77 impl Doctype { 78 /// Constructs an empty DOCTYPE, with all fields set to None. new() -> Doctype79 pub fn new() -> Doctype { 80 Doctype { 81 name: None, 82 public_id: None, 83 system_id: None, 84 } 85 } 86 } 87 88 /// A ProcessingInstruction token. 89 #[derive(PartialEq, Eq, Clone, Debug)] 90 pub struct Pi { 91 /// What is the name of processing instruction. 92 pub target: StrTendril, 93 94 /// Text of processing instruction. 95 pub data: StrTendril, 96 } 97 98 /// Describes tokens encountered during parsing of input. 99 #[derive(PartialEq, Eq, Debug)] 100 pub enum Token { 101 /// Doctype token 102 DoctypeToken(Doctype), 103 /// Token tag founds. This token applies to all 104 /// possible kinds of tags (like start, end, empty tag, etc.). 105 TagToken(Tag), 106 /// Processing Instruction token 107 PIToken(Pi), 108 /// Comment token. 109 CommentToken(StrTendril), 110 /// Token that represents a series of characters. 111 CharacterTokens(StrTendril), 112 /// End of File found. 113 EOFToken, 114 /// NullCharacter encountered. 115 NullCharacterToken, 116 /// Error happened 117 ParseError(Cow<'static, str>), 118 } 119 120 /// Types which can receive tokens from the tokenizer. 121 pub trait TokenSink { 122 /// Process a token. process_token(&mut self, token: Token)123 fn process_token(&mut self, token: Token); 124 125 /// Signal to the sink that parsing has ended. end(&mut self)126 fn end(&mut self) {} 127 128 /// The tokenizer will call this after emitting any start tag. 129 /// This allows the tree builder to change the tokenizer's state. 130 /// By default no state changes occur. query_state_change(&mut self) -> Option<states::XmlState>131 fn query_state_change(&mut self) -> Option<states::XmlState> { 132 None 133 } 134 } 135