1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 use log::warn;
11 pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
12 use markup5ever::{local_name, namespace_url, ns};
13 use std::default::Default;
14 use std::io::{self, Write};
15
16 use crate::{LocalName, QualName};
17
serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> where Wr: Write, T: Serialize,18 pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
19 where
20 Wr: Write,
21 T: Serialize,
22 {
23 let mut ser = HtmlSerializer::new(writer, opts.clone());
24 node.serialize(&mut ser, opts.traversal_scope)
25 }
26
27 #[derive(Clone)]
28 pub struct SerializeOpts {
29 /// Is scripting enabled?
30 pub scripting_enabled: bool,
31
32 /// Serialize the root node? Default: ChildrenOnly
33 pub traversal_scope: TraversalScope,
34
35 /// If the serializer is asked to serialize an invalid tree, the default
36 /// behavior is to panic in the event that an `end_elem` is created without a
37 /// matching `start_elem`. Setting this to true will prevent those panics by
38 /// creating a default parent on the element stack. No extra start elem will
39 /// actually be written. Default: false
40 pub create_missing_parent: bool,
41 }
42
43 impl Default for SerializeOpts {
default() -> SerializeOpts44 fn default() -> SerializeOpts {
45 SerializeOpts {
46 scripting_enabled: true,
47 traversal_scope: TraversalScope::ChildrenOnly(None),
48 create_missing_parent: false,
49 }
50 }
51 }
52
53 #[derive(Default)]
54 struct ElemInfo {
55 html_name: Option<LocalName>,
56 ignore_children: bool,
57 processed_first_child: bool,
58 }
59
60 pub struct HtmlSerializer<Wr: Write> {
61 pub writer: Wr,
62 opts: SerializeOpts,
63 stack: Vec<ElemInfo>,
64 }
65
tagname(name: &QualName) -> LocalName66 fn tagname(name: &QualName) -> LocalName {
67 match name.ns {
68 ns!(html) | ns!(mathml) | ns!(svg) => (),
69 ref ns => {
70 // FIXME(#122)
71 warn!("node with weird namespace {:?}", ns);
72 },
73 }
74
75 name.local.clone()
76 }
77
78 impl<Wr: Write> HtmlSerializer<Wr> {
new(writer: Wr, opts: SerializeOpts) -> Self79 pub fn new(writer: Wr, opts: SerializeOpts) -> Self {
80 let html_name = match opts.traversal_scope {
81 TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
82 TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
83 };
84 HtmlSerializer {
85 writer: writer,
86 opts: opts,
87 stack: vec![ElemInfo {
88 html_name: html_name,
89 ignore_children: false,
90 processed_first_child: false,
91 }],
92 }
93 }
94
parent(&mut self) -> &mut ElemInfo95 fn parent(&mut self) -> &mut ElemInfo {
96 if self.stack.len() == 0 {
97 if self.opts.create_missing_parent {
98 warn!("ElemInfo stack empty, creating new parent");
99 self.stack.push(Default::default());
100 } else {
101 panic!("no parent ElemInfo")
102 }
103 }
104 self.stack.last_mut().unwrap()
105 }
106
write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()>107 fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
108 for c in text.chars() {
109 match c {
110 '&' => self.writer.write_all(b"&"),
111 '\u{00A0}' => self.writer.write_all(b" "),
112 '"' if attr_mode => self.writer.write_all(b"""),
113 '<' if !attr_mode => self.writer.write_all(b"<"),
114 '>' if !attr_mode => self.writer.write_all(b">"),
115 c => self.writer.write_fmt(format_args!("{}", c)),
116 }?;
117 }
118 Ok(())
119 }
120 }
121
122 impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> where AttrIter: Iterator<Item = AttrRef<'a>>,123 fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
124 where
125 AttrIter: Iterator<Item = AttrRef<'a>>,
126 {
127 let html_name = match name.ns {
128 ns!(html) => Some(name.local.clone()),
129 _ => None,
130 };
131
132 if self.parent().ignore_children {
133 self.stack.push(ElemInfo {
134 html_name: html_name,
135 ignore_children: true,
136 processed_first_child: false,
137 });
138 return Ok(());
139 }
140
141 self.writer.write_all(b"<")?;
142 self.writer.write_all(tagname(&name).as_bytes())?;
143 for (name, value) in attrs {
144 self.writer.write_all(b" ")?;
145
146 match name.ns {
147 ns!() => (),
148 ns!(xml) => self.writer.write_all(b"xml:")?,
149 ns!(xmlns) => {
150 if name.local != local_name!("xmlns") {
151 self.writer.write_all(b"xmlns:")?;
152 }
153 },
154 ns!(xlink) => self.writer.write_all(b"xlink:")?,
155 ref ns => {
156 // FIXME(#122)
157 warn!("attr with weird namespace {:?}", ns);
158 self.writer.write_all(b"unknown_namespace:")?;
159 },
160 }
161
162 self.writer.write_all(name.local.as_bytes())?;
163 self.writer.write_all(b"=\"")?;
164 self.write_escaped(value, true)?;
165 self.writer.write_all(b"\"")?;
166 }
167 self.writer.write_all(b">")?;
168
169 let ignore_children = name.ns == ns!(html) &&
170 match name.local {
171 local_name!("area") |
172 local_name!("base") |
173 local_name!("basefont") |
174 local_name!("bgsound") |
175 local_name!("br") |
176 local_name!("col") |
177 local_name!("embed") |
178 local_name!("frame") |
179 local_name!("hr") |
180 local_name!("img") |
181 local_name!("input") |
182 local_name!("keygen") |
183 local_name!("link") |
184 local_name!("meta") |
185 local_name!("param") |
186 local_name!("source") |
187 local_name!("track") |
188 local_name!("wbr") => true,
189 _ => false,
190 };
191
192 self.parent().processed_first_child = true;
193
194 self.stack.push(ElemInfo {
195 html_name: html_name,
196 ignore_children: ignore_children,
197 processed_first_child: false,
198 });
199
200 Ok(())
201 }
202
end_elem(&mut self, name: QualName) -> io::Result<()>203 fn end_elem(&mut self, name: QualName) -> io::Result<()> {
204 let info = match self.stack.pop() {
205 Some(info) => info,
206 None if self.opts.create_missing_parent => {
207 warn!("missing ElemInfo, creating default.");
208 Default::default()
209 },
210 _ => panic!("no ElemInfo"),
211 };
212 if info.ignore_children {
213 return Ok(());
214 }
215
216 self.writer.write_all(b"</")?;
217 self.writer.write_all(tagname(&name).as_bytes())?;
218 self.writer.write_all(b">")
219 }
220
write_text(&mut self, text: &str) -> io::Result<()>221 fn write_text(&mut self, text: &str) -> io::Result<()> {
222 let escape = match self.parent().html_name {
223 Some(local_name!("style")) |
224 Some(local_name!("script")) |
225 Some(local_name!("xmp")) |
226 Some(local_name!("iframe")) |
227 Some(local_name!("noembed")) |
228 Some(local_name!("noframes")) |
229 Some(local_name!("plaintext")) => false,
230
231 Some(local_name!("noscript")) => !self.opts.scripting_enabled,
232
233 _ => true,
234 };
235
236 if escape {
237 self.write_escaped(text, false)
238 } else {
239 self.writer.write_all(text.as_bytes())
240 }
241 }
242
write_comment(&mut self, text: &str) -> io::Result<()>243 fn write_comment(&mut self, text: &str) -> io::Result<()> {
244 self.writer.write_all(b"<!--")?;
245 self.writer.write_all(text.as_bytes())?;
246 self.writer.write_all(b"-->")
247 }
248
write_doctype(&mut self, name: &str) -> io::Result<()>249 fn write_doctype(&mut self, name: &str) -> io::Result<()> {
250 self.writer.write_all(b"<!DOCTYPE ")?;
251 self.writer.write_all(name.as_bytes())?;
252 self.writer.write_all(b">")
253 }
254
write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()>255 fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
256 self.writer.write_all(b"<?")?;
257 self.writer.write_all(target.as_bytes())?;
258 self.writer.write_all(b" ")?;
259 self.writer.write_all(data.as_bytes())?;
260 self.writer.write_all(b">")
261 }
262 }
263