1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 use log::warn;
11 pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
12 use markup5ever::{local_name, namespace_url, ns};
13 use std::default::Default;
14 use std::io::{self, Write};
15 
16 use crate::{LocalName, QualName};
17 
serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> where Wr: Write, T: Serialize,18 pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
19 where
20     Wr: Write,
21     T: Serialize,
22 {
23     let mut ser = HtmlSerializer::new(writer, opts.clone());
24     node.serialize(&mut ser, opts.traversal_scope)
25 }
26 
27 #[derive(Clone)]
28 pub struct SerializeOpts {
29     /// Is scripting enabled?
30     pub scripting_enabled: bool,
31 
32     /// Serialize the root node? Default: ChildrenOnly
33     pub traversal_scope: TraversalScope,
34 
35     /// If the serializer is asked to serialize an invalid tree, the default
36     /// behavior is to panic in the event that an `end_elem` is created without a
37     /// matching `start_elem`. Setting this to true will prevent those panics by
38     /// creating a default parent on the element stack. No extra start elem will
39     /// actually be written. Default: false
40     pub create_missing_parent: bool,
41 }
42 
43 impl Default for SerializeOpts {
default() -> SerializeOpts44     fn default() -> SerializeOpts {
45         SerializeOpts {
46             scripting_enabled: true,
47             traversal_scope: TraversalScope::ChildrenOnly(None),
48             create_missing_parent: false,
49         }
50     }
51 }
52 
53 #[derive(Default)]
54 struct ElemInfo {
55     html_name: Option<LocalName>,
56     ignore_children: bool,
57     processed_first_child: bool,
58 }
59 
60 pub struct HtmlSerializer<Wr: Write> {
61     pub writer: Wr,
62     opts: SerializeOpts,
63     stack: Vec<ElemInfo>,
64 }
65 
tagname(name: &QualName) -> LocalName66 fn tagname(name: &QualName) -> LocalName {
67     match name.ns {
68         ns!(html) | ns!(mathml) | ns!(svg) => (),
69         ref ns => {
70             // FIXME(#122)
71             warn!("node with weird namespace {:?}", ns);
72         },
73     }
74 
75     name.local.clone()
76 }
77 
78 impl<Wr: Write> HtmlSerializer<Wr> {
new(writer: Wr, opts: SerializeOpts) -> Self79     pub fn new(writer: Wr, opts: SerializeOpts) -> Self {
80         let html_name = match opts.traversal_scope {
81             TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
82             TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
83         };
84         HtmlSerializer {
85             writer: writer,
86             opts: opts,
87             stack: vec![ElemInfo {
88                 html_name: html_name,
89                 ignore_children: false,
90                 processed_first_child: false,
91             }],
92         }
93     }
94 
parent(&mut self) -> &mut ElemInfo95     fn parent(&mut self) -> &mut ElemInfo {
96         if self.stack.len() == 0 {
97             if self.opts.create_missing_parent {
98                 warn!("ElemInfo stack empty, creating new parent");
99                 self.stack.push(Default::default());
100             } else {
101                 panic!("no parent ElemInfo")
102             }
103         }
104         self.stack.last_mut().unwrap()
105     }
106 
write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()>107     fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
108         for c in text.chars() {
109             match c {
110                 '&' => self.writer.write_all(b"&amp;"),
111                 '\u{00A0}' => self.writer.write_all(b"&nbsp;"),
112                 '"' if attr_mode => self.writer.write_all(b"&quot;"),
113                 '<' if !attr_mode => self.writer.write_all(b"&lt;"),
114                 '>' if !attr_mode => self.writer.write_all(b"&gt;"),
115                 c => self.writer.write_fmt(format_args!("{}", c)),
116             }?;
117         }
118         Ok(())
119     }
120 }
121 
122 impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> where AttrIter: Iterator<Item = AttrRef<'a>>,123     fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
124     where
125         AttrIter: Iterator<Item = AttrRef<'a>>,
126     {
127         let html_name = match name.ns {
128             ns!(html) => Some(name.local.clone()),
129             _ => None,
130         };
131 
132         if self.parent().ignore_children {
133             self.stack.push(ElemInfo {
134                 html_name: html_name,
135                 ignore_children: true,
136                 processed_first_child: false,
137             });
138             return Ok(());
139         }
140 
141         self.writer.write_all(b"<")?;
142         self.writer.write_all(tagname(&name).as_bytes())?;
143         for (name, value) in attrs {
144             self.writer.write_all(b" ")?;
145 
146             match name.ns {
147                 ns!() => (),
148                 ns!(xml) => self.writer.write_all(b"xml:")?,
149                 ns!(xmlns) => {
150                     if name.local != local_name!("xmlns") {
151                         self.writer.write_all(b"xmlns:")?;
152                     }
153                 },
154                 ns!(xlink) => self.writer.write_all(b"xlink:")?,
155                 ref ns => {
156                     // FIXME(#122)
157                     warn!("attr with weird namespace {:?}", ns);
158                     self.writer.write_all(b"unknown_namespace:")?;
159                 },
160             }
161 
162             self.writer.write_all(name.local.as_bytes())?;
163             self.writer.write_all(b"=\"")?;
164             self.write_escaped(value, true)?;
165             self.writer.write_all(b"\"")?;
166         }
167         self.writer.write_all(b">")?;
168 
169         let ignore_children = name.ns == ns!(html) &&
170             match name.local {
171                 local_name!("area") |
172                 local_name!("base") |
173                 local_name!("basefont") |
174                 local_name!("bgsound") |
175                 local_name!("br") |
176                 local_name!("col") |
177                 local_name!("embed") |
178                 local_name!("frame") |
179                 local_name!("hr") |
180                 local_name!("img") |
181                 local_name!("input") |
182                 local_name!("keygen") |
183                 local_name!("link") |
184                 local_name!("meta") |
185                 local_name!("param") |
186                 local_name!("source") |
187                 local_name!("track") |
188                 local_name!("wbr") => true,
189                 _ => false,
190             };
191 
192         self.parent().processed_first_child = true;
193 
194         self.stack.push(ElemInfo {
195             html_name: html_name,
196             ignore_children: ignore_children,
197             processed_first_child: false,
198         });
199 
200         Ok(())
201     }
202 
end_elem(&mut self, name: QualName) -> io::Result<()>203     fn end_elem(&mut self, name: QualName) -> io::Result<()> {
204         let info = match self.stack.pop() {
205             Some(info) => info,
206             None if self.opts.create_missing_parent => {
207                 warn!("missing ElemInfo, creating default.");
208                 Default::default()
209             },
210             _ => panic!("no ElemInfo"),
211         };
212         if info.ignore_children {
213             return Ok(());
214         }
215 
216         self.writer.write_all(b"</")?;
217         self.writer.write_all(tagname(&name).as_bytes())?;
218         self.writer.write_all(b">")
219     }
220 
write_text(&mut self, text: &str) -> io::Result<()>221     fn write_text(&mut self, text: &str) -> io::Result<()> {
222         let escape = match self.parent().html_name {
223             Some(local_name!("style")) |
224             Some(local_name!("script")) |
225             Some(local_name!("xmp")) |
226             Some(local_name!("iframe")) |
227             Some(local_name!("noembed")) |
228             Some(local_name!("noframes")) |
229             Some(local_name!("plaintext")) => false,
230 
231             Some(local_name!("noscript")) => !self.opts.scripting_enabled,
232 
233             _ => true,
234         };
235 
236         if escape {
237             self.write_escaped(text, false)
238         } else {
239             self.writer.write_all(text.as_bytes())
240         }
241     }
242 
write_comment(&mut self, text: &str) -> io::Result<()>243     fn write_comment(&mut self, text: &str) -> io::Result<()> {
244         self.writer.write_all(b"<!--")?;
245         self.writer.write_all(text.as_bytes())?;
246         self.writer.write_all(b"-->")
247     }
248 
write_doctype(&mut self, name: &str) -> io::Result<()>249     fn write_doctype(&mut self, name: &str) -> io::Result<()> {
250         self.writer.write_all(b"<!DOCTYPE ")?;
251         self.writer.write_all(name.as_bytes())?;
252         self.writer.write_all(b">")
253     }
254 
write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()>255     fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
256         self.writer.write_all(b"<?")?;
257         self.writer.write_all(target.as_bytes())?;
258         self.writer.write_all(b" ")?;
259         self.writer.write_all(data.as_bytes())?;
260         self.writer.write_all(b">")
261     }
262 }
263