1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 /*!
11
12 Implements the `match_token!()` macro for use by the HTML tree builder
13 in `src/tree_builder/rules.rs`.
14
15
16 ## Example
17
18 ```rust
19 match_token!(token {
20 CommentToken(text) => 1,
21
22 tag @ <base> <link> <meta> => 2,
23
24 </head> => 3,
25
26 </body> </html> </br> => else,
27
28 tag @ </_> => 4,
29
30 token => 5,
31 })
32 ```
33
34
35 ## Syntax
36
37 Because of the simplistic parser, the macro invocation must
38 start with exactly `match_token!(token {` (with whitespace as specified)
39 and end with exactly `})`.
40
41 The left-hand side of each match arm is an optional `name @` binding, followed by
42
43 - an ordinary Rust pattern that starts with an identifier or an underscore, or
44
45 - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>"
46 to match an open or close tag respectively, or
47
48 - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags
49 respectively.
50
51 The right-hand side is either an expression or the keyword `else`.
52
53 Note that this syntax does not support guards or pattern alternation like
54 `Foo | Bar`. This is not a fundamental limitation; it's done for implementation
55 simplicity.
56
57
58 ## Semantics
59
60 Ordinary Rust patterns match as usual. If present, the `name @` binding has
61 the usual meaning.
62
63 A sequence of named tags matches any of those tags. A single sequence can
64 contain both open and close tags. If present, the `name @` binding binds (by
65 move) the `Tag` struct, not the outer `Token`. That is, a match arm like
66
67 ```rust
68 tag @ <html> <head> => ...
69 ```
70
71 expands to something like
72
73 ```rust
74 TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag })
75 | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ...
76 ```
77
78 A wildcard tag matches any tag of the appropriate kind, *unless* it was
79 previously matched with an `else` right-hand side (more on this below).
80
81 The expansion of this macro reorders code somewhat, to satisfy various
82 restrictions arising from moves. However it provides the semantics of in-order
83 matching, by enforcing the following restrictions on its input:
84
85 - The last pattern must be a variable or the wildcard "_". In other words
86 it must match everything.
87
88 - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear
89 after wildcard tag patterns.
90
91 - No tag name may appear more than once.
92
93 - A wildcard tag pattern may not occur in the same arm as any other tag.
94 "<_> <html> => ..." and "<_> </_> => ..." are both forbidden.
95
96 - The right-hand side "else" may only appear with specific-tag patterns.
97 It means that these specific tags should be handled by the last,
98 catch-all case arm, rather than by any wildcard tag arm. This situation
99 is common in the HTML5 syntax.
100 */
101
102 use quote::quote;
103 use syn::{braced, parse_quote, Token};
104
105 use proc_macro2::TokenStream;
106 use quote::ToTokens;
107 use std::collections::HashSet;
108 use std::fs::File;
109 use std::io::{Read, Write};
110 use std::path::Path;
111 use syn;
112 use syn::ext::IdentExt;
113 use syn::fold::Fold;
114 use syn::parse::{Parse, ParseStream, Result};
115
expand(from: &Path, to: &Path)116 pub fn expand(from: &Path, to: &Path) {
117 let mut source = String::new();
118 File::open(from)
119 .unwrap()
120 .read_to_string(&mut source)
121 .unwrap();
122 let ast = syn::parse_file(&source).expect("Parsing rules.rs module");
123 let mut m = MatchTokenParser {};
124 let ast = m.fold_file(ast);
125 let code = ast
126 .into_token_stream()
127 .to_string()
128 .replace("{ ", "{\n")
129 .replace(" }", "\n}");
130 File::create(to)
131 .unwrap()
132 .write_all(code.as_bytes())
133 .unwrap();
134 }
135
136 struct MatchTokenParser {}
137
138 struct MatchToken {
139 ident: syn::Ident,
140 arms: Vec<MatchTokenArm>,
141 }
142
143 struct MatchTokenArm {
144 binding: Option<syn::Ident>,
145 lhs: LHS,
146 rhs: RHS,
147 }
148
149 enum LHS {
150 Tags(Vec<Tag>),
151 Pattern(syn::Pat),
152 }
153
154 enum RHS {
155 Expression(syn::Expr),
156 Else,
157 }
158
159 #[derive(PartialEq, Eq, Hash, Clone)]
160 enum TagKind {
161 StartTag,
162 EndTag,
163 }
164
165 // Option is None if wildcard
166 #[derive(PartialEq, Eq, Hash, Clone)]
167 pub struct Tag {
168 kind: TagKind,
169 name: Option<syn::Ident>,
170 }
171
172 impl Parse for Tag {
parse(input: ParseStream) -> Result<Self>173 fn parse(input: ParseStream) -> Result<Self> {
174 input.parse::<Token![<]>()?;
175 let closing: Option<Token![/]> = input.parse()?;
176 let name = match input.call(syn::Ident::parse_any)? {
177 ref wildcard if wildcard == "_" => None,
178 other => Some(other),
179 };
180 input.parse::<Token![>]>()?;
181 Ok(Tag {
182 kind: if closing.is_some() {
183 TagKind::EndTag
184 } else {
185 TagKind::StartTag
186 },
187 name: name,
188 })
189 }
190 }
191
192 impl Parse for LHS {
parse(input: ParseStream) -> Result<Self>193 fn parse(input: ParseStream) -> Result<Self> {
194 if input.peek(Token![<]) {
195 let mut tags = Vec::new();
196 while !input.peek(Token![=>]) {
197 tags.push(input.parse()?);
198 }
199 Ok(LHS::Tags(tags))
200 } else {
201 let p: syn::Pat = input.parse()?;
202 Ok(LHS::Pattern(p))
203 }
204 }
205 }
206
207 impl Parse for MatchTokenArm {
parse(input: ParseStream) -> Result<Self>208 fn parse(input: ParseStream) -> Result<Self> {
209 let binding = if input.peek2(Token![@]) {
210 let binding = input.parse::<syn::Ident>()?;
211 input.parse::<Token![@]>()?;
212 Some(binding)
213 } else {
214 None
215 };
216 let lhs = input.parse::<LHS>()?;
217 input.parse::<Token![=>]>()?;
218 let rhs = if input.peek(syn::token::Brace) {
219 let block = input.parse::<syn::Block>().unwrap();
220 let block = syn::ExprBlock {
221 attrs: vec![],
222 label: None,
223 block,
224 };
225 input.parse::<Option<Token![,]>>()?;
226 RHS::Expression(syn::Expr::Block(block))
227 } else if input.peek(Token![else]) {
228 input.parse::<Token![else]>()?;
229 input.parse::<Token![,]>()?;
230 RHS::Else
231 } else {
232 let expr = input.parse::<syn::Expr>().unwrap();
233 input.parse::<Option<Token![,]>>()?;
234 RHS::Expression(expr)
235 };
236
237 Ok(MatchTokenArm { binding, lhs, rhs })
238 }
239 }
240
241 impl Parse for MatchToken {
parse(input: ParseStream) -> Result<Self>242 fn parse(input: ParseStream) -> Result<Self> {
243 let ident = input.parse::<syn::Ident>()?;
244 let content;
245 braced!(content in input);
246 let mut arms = vec![];
247 while !content.is_empty() {
248 arms.push(content.parse()?);
249 }
250 Ok(MatchToken { ident, arms })
251 }
252 }
253
expand_match_token(body: &TokenStream) -> syn::Expr254 pub fn expand_match_token(body: &TokenStream) -> syn::Expr {
255 let match_token = syn::parse2::<MatchToken>(body.clone());
256 let ast = expand_match_token_macro(match_token.unwrap());
257 syn::parse2(ast.into()).unwrap()
258 }
259
expand_match_token_macro(match_token: MatchToken) -> TokenStream260 fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
261 let mut arms = match_token.arms;
262 let to_be_matched = match_token.ident;
263 // Handle the last arm specially at the end.
264 let last_arm = arms.pop().unwrap();
265
266 // Tags we've seen, used for detecting duplicates.
267 let mut seen_tags: HashSet<Tag> = HashSet::new();
268
269 // Case arms for wildcard matching. We collect these and
270 // emit them later.
271 let mut wildcards_patterns: Vec<TokenStream> = Vec::new();
272 let mut wildcards_expressions: Vec<syn::Expr> = Vec::new();
273
274 // Tags excluded (by an 'else' RHS) from wildcard matching.
275 let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new();
276
277 let mut arms_code = Vec::new();
278
279 for MatchTokenArm { binding, lhs, rhs } in arms {
280 // Build Rust syntax for the `name @` binding, if any.
281 let binding = match binding {
282 Some(ident) => quote!(#ident @),
283 None => quote!(),
284 };
285
286 match (lhs, rhs) {
287 (LHS::Pattern(_), RHS::Else) => {
288 panic!("'else' may not appear with an ordinary pattern")
289 },
290
291 // ordinary pattern => expression
292 (LHS::Pattern(pat), RHS::Expression(expr)) => {
293 if !wildcards_patterns.is_empty() {
294 panic!(
295 "ordinary patterns may not appear after wildcard tags {:?} {:?}",
296 pat, expr
297 );
298 }
299 arms_code.push(quote!(#binding #pat => #expr,))
300 },
301
302 // <tag> <tag> ... => else
303 (LHS::Tags(tags), RHS::Else) => {
304 for tag in tags {
305 if !seen_tags.insert(tag.clone()) {
306 panic!("duplicate tag");
307 }
308 if tag.name.is_none() {
309 panic!("'else' may not appear with a wildcard tag");
310 }
311 wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag));
312 }
313 },
314
315 // <_> => expression
316 // <tag> <tag> ... => expression
317 (LHS::Tags(tags), RHS::Expression(expr)) => {
318 // Is this arm a tag wildcard?
319 // `None` if we haven't processed the first tag yet.
320 let mut wildcard = None;
321 for tag in tags {
322 if !seen_tags.insert(tag.clone()) {
323 panic!("duplicate tag");
324 }
325
326 match tag.name {
327 // <tag>
328 Some(_) => {
329 if !wildcards_patterns.is_empty() {
330 panic!("specific tags may not appear after wildcard tags");
331 }
332
333 if wildcard == Some(true) {
334 panic!("wildcard tags must appear alone");
335 }
336
337 if wildcard.is_some() {
338 // Push the delimeter `|` if it's not the first tag.
339 arms_code.push(quote!( | ))
340 }
341 arms_code.push(make_tag_pattern(&binding, tag));
342
343 wildcard = Some(false);
344 },
345
346 // <_>
347 None => {
348 if wildcard.is_some() {
349 panic!("wildcard tags must appear alone");
350 }
351 wildcard = Some(true);
352 wildcards_patterns.push(make_tag_pattern(&binding, tag));
353 wildcards_expressions.push(expr.clone());
354 },
355 }
356 }
357
358 match wildcard {
359 None => panic!("[internal macro error] tag arm with no tags"),
360 Some(false) => arms_code.push(quote!( => #expr,)),
361 Some(true) => {}, // codegen for wildcards is deferred
362 }
363 },
364 }
365 }
366
367 // Time to process the last, catch-all arm. We will generate something like
368 //
369 // last_arm_token => {
370 // let enable_wildcards = match last_arm_token {
371 // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false,
372 // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false,
373 // // ...
374 // _ => true,
375 // };
376 //
377 // match (enable_wildcards, last_arm_token) {
378 // (true, TagToken(name @ Tag { kind: StartTag, .. }))
379 // => ..., // wildcard action for start tags
380 //
381 // (true, TagToken(name @ Tag { kind: EndTag, .. }))
382 // => ..., // wildcard action for end tags
383 //
384 // (_, token) => ... // using the pattern from that last arm
385 // }
386 // }
387
388 let MatchTokenArm { binding, lhs, rhs } = last_arm;
389
390 let (last_pat, last_expr) = match (binding, lhs, rhs) {
391 (Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
392 (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
393 (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"),
394 (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e),
395 };
396
397 quote! {
398 match #to_be_matched {
399 #(
400 #arms_code
401 )*
402 last_arm_token => {
403 let enable_wildcards = match last_arm_token {
404 #(
405 #wild_excluded_patterns => false,
406 )*
407 _ => true,
408 };
409 match (enable_wildcards, last_arm_token) {
410 #(
411 (true, #wildcards_patterns) => #wildcards_expressions,
412 )*
413 (_, #last_pat) => #last_expr,
414 }
415 }
416 }
417 }
418 }
419
420 impl Fold for MatchTokenParser {
fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt421 fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt {
422 match stmt {
423 syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => {
424 if mac.path == parse_quote!(match_token) {
425 return syn::fold::fold_stmt(
426 self,
427 syn::Stmt::Expr(expand_match_token(&mac.tokens)),
428 );
429 }
430 },
431 _ => {},
432 }
433
434 syn::fold::fold_stmt(self, stmt)
435 }
436
fold_expr(&mut self, expr: syn::Expr) -> syn::Expr437 fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr {
438 match expr {
439 syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => {
440 if mac.path == parse_quote!(match_token) {
441 return syn::fold::fold_expr(self, expand_match_token(&mac.tokens));
442 }
443 },
444 _ => {},
445 }
446
447 syn::fold::fold_expr(self, expr)
448 }
449 }
450
make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream451 fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream {
452 let kind = match tag.kind {
453 TagKind::StartTag => quote!(crate::tokenizer::StartTag),
454 TagKind::EndTag => quote!(crate::tokenizer::EndTag),
455 };
456 let name_field = if let Some(name) = tag.name {
457 let name = name.to_string();
458 quote!(name: local_name!(#name),)
459 } else {
460 quote!()
461 };
462 quote! {
463 crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. })
464 }
465 }
466