1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 use std::borrow::Cow;
11 
12 use crate::tendril::StrTendril;
13 use crate::{Attribute, QualName};
14 
15 pub use self::TagKind::{EmptyTag, EndTag, ShortTag, StartTag};
16 pub use self::Token::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
17 pub use self::Token::{CommentToken, DoctypeToken, PIToken, TagToken};
18 
19 use super::states;
20 
21 /// Tag kind denotes which kind of tag did we encounter.
22 #[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
23 pub enum TagKind {
24     /// Beginning of a tag (e.g. `<a>`).
25     StartTag,
26     /// End of a tag (e.g. `</a>`).
27     EndTag,
28     /// Empty tag (e.g. `<a/>`).
29     EmptyTag,
30     /// Short tag (e.g. `</>`).
31     ShortTag,
32 }
33 
34 /// XML 5 Tag Token
35 #[derive(PartialEq, Eq, Debug, Clone)]
36 pub struct Tag {
37     /// Token kind denotes which type of token was encountered.
38     /// E.g. if parser parsed `</a>` the token kind would be `EndTag`.
39     pub kind: TagKind,
40     /// Qualified name of the tag.
41     pub name: QualName,
42     /// List of attributes attached to this tag.
43     /// Only valid in start and empty tag.
44     pub attrs: Vec<Attribute>,
45 }
46 
47 impl Tag {
48     /// Sorts attributes in a tag.
equiv_modulo_attr_order(&self, other: &Tag) -> bool49     pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
50         if (self.kind != other.kind) || (self.name != other.name) {
51             return false;
52         }
53 
54         let mut self_attrs = self.attrs.clone();
55         let mut other_attrs = other.attrs.clone();
56         self_attrs.sort();
57         other_attrs.sort();
58 
59         self_attrs == other_attrs
60     }
61 }
62 
63 /// A `DOCTYPE` token.
64 /// Doctype token in XML5 is rather limited for reasons, such as:
65 /// security and simplicity. XML5 only supports declaring DTD with
66 /// name, public identifier and system identifier
67 #[derive(PartialEq, Eq, Clone, Debug)]
68 pub struct Doctype {
69     /// Name of DOCTYPE declared
70     pub name: Option<StrTendril>,
71     /// Public identifier of this DOCTYPE.
72     pub public_id: Option<StrTendril>,
73     /// System identifier of this DOCTYPE.
74     pub system_id: Option<StrTendril>,
75 }
76 
77 impl Doctype {
78     /// Constructs an empty DOCTYPE, with all fields set to None.
new() -> Doctype79     pub fn new() -> Doctype {
80         Doctype {
81             name: None,
82             public_id: None,
83             system_id: None,
84         }
85     }
86 }
87 
88 /// A ProcessingInstruction token.
89 #[derive(PartialEq, Eq, Clone, Debug)]
90 pub struct Pi {
91     /// What is the name of processing instruction.
92     pub target: StrTendril,
93 
94     /// Text of processing instruction.
95     pub data: StrTendril,
96 }
97 
98 /// Describes tokens encountered during parsing of input.
99 #[derive(PartialEq, Eq, Debug)]
100 pub enum Token {
101     /// Doctype token
102     DoctypeToken(Doctype),
103     /// Token tag founds. This token applies to all
104     /// possible kinds of tags (like start, end, empty tag, etc.).
105     TagToken(Tag),
106     /// Processing Instruction token
107     PIToken(Pi),
108     /// Comment token.
109     CommentToken(StrTendril),
110     /// Token that represents a series of characters.
111     CharacterTokens(StrTendril),
112     /// End of File found.
113     EOFToken,
114     /// NullCharacter encountered.
115     NullCharacterToken,
116     /// Error happened
117     ParseError(Cow<'static, str>),
118 }
119 
120 /// Types which can receive tokens from the tokenizer.
121 pub trait TokenSink {
122     /// Process a token.
process_token(&mut self, token: Token)123     fn process_token(&mut self, token: Token);
124 
125     /// Signal to the sink that parsing has ended.
end(&mut self)126     fn end(&mut self) {}
127 
128     /// The tokenizer will call this after emitting any start tag.
129     /// This allows the tree builder to change the tokenizer's state.
130     /// By default no state changes occur.
query_state_change(&mut self) -> Option<states::XmlState>131     fn query_state_change(&mut self) -> Option<states::XmlState> {
132         None
133     }
134 }
135