1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 /*!
11 
12 Implements the `match_token!()` macro for use by the HTML tree builder
13 in `src/tree_builder/rules.rs`.
14 
15 
16 ## Example
17 
18 ```rust
19 match_token!(token {
20     CommentToken(text) => 1,
21 
22     tag @ <base> <link> <meta> => 2,
23 
24     </head> => 3,
25 
26     </body> </html> </br> => else,
27 
28     tag @ </_> => 4,
29 
30     token => 5,
31 })
32 ```
33 
34 
35 ## Syntax
36 
37 Because of the simplistic parser, the macro invocation must
38 start with exactly `match_token!(token {` (with whitespace as specified)
39 and end with exactly `})`.
40 
41 The left-hand side of each match arm is an optional `name @` binding, followed by
42 
43   - an ordinary Rust pattern that starts with an identifier or an underscore, or
44 
45   - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>"
46     to match an open or close tag respectively, or
47 
48   - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags
49     respectively.
50 
51 The right-hand side is either an expression or the keyword `else`.
52 
53 Note that this syntax does not support guards or pattern alternation like
54 `Foo | Bar`.  This is not a fundamental limitation; it's done for implementation
55 simplicity.
56 
57 
58 ## Semantics
59 
60 Ordinary Rust patterns match as usual.  If present, the `name @` binding has
61 the usual meaning.
62 
63 A sequence of named tags matches any of those tags.  A single sequence can
64 contain both open and close tags.  If present, the `name @` binding binds (by
65 move) the `Tag` struct, not the outer `Token`.  That is, a match arm like
66 
67 ```rust
68 tag @ <html> <head> => ...
69 ```
70 
71 expands to something like
72 
73 ```rust
74 TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag })
75 | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ...
76 ```
77 
78 A wildcard tag matches any tag of the appropriate kind, *unless* it was
79 previously matched with an `else` right-hand side (more on this below).
80 
81 The expansion of this macro reorders code somewhat, to satisfy various
82 restrictions arising from moves.  However it provides the semantics of in-order
83 matching, by enforcing the following restrictions on its input:
84 
85   - The last pattern must be a variable or the wildcard "_".  In other words
86     it must match everything.
87 
88   - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear
89     after wildcard tag patterns.
90 
91   - No tag name may appear more than once.
92 
93   - A wildcard tag pattern may not occur in the same arm as any other tag.
94     "<_> <html> => ..." and "<_> </_> => ..." are both forbidden.
95 
96   - The right-hand side "else" may only appear with specific-tag patterns.
97     It means that these specific tags should be handled by the last,
98     catch-all case arm, rather than by any wildcard tag arm.  This situation
99     is common in the HTML5 syntax.
100 */
101 
102 use quote::quote;
103 use syn::{braced, parse_quote, Token};
104 
105 use proc_macro2::TokenStream;
106 use quote::ToTokens;
107 use std::collections::HashSet;
108 use std::fs::File;
109 use std::io::{Read, Write};
110 use std::path::Path;
111 use syn;
112 use syn::ext::IdentExt;
113 use syn::fold::Fold;
114 use syn::parse::{Parse, ParseStream, Result};
115 
expand(from: &Path, to: &Path)116 pub fn expand(from: &Path, to: &Path) {
117     let mut source = String::new();
118     File::open(from)
119         .unwrap()
120         .read_to_string(&mut source)
121         .unwrap();
122     let ast = syn::parse_file(&source).expect("Parsing rules.rs module");
123     let mut m = MatchTokenParser {};
124     let ast = m.fold_file(ast);
125     let code = ast
126         .into_token_stream()
127         .to_string()
128         .replace("{ ", "{\n")
129         .replace(" }", "\n}");
130     File::create(to)
131         .unwrap()
132         .write_all(code.as_bytes())
133         .unwrap();
134 }
135 
136 struct MatchTokenParser {}
137 
138 struct MatchToken {
139     ident: syn::Ident,
140     arms: Vec<MatchTokenArm>,
141 }
142 
143 struct MatchTokenArm {
144     binding: Option<syn::Ident>,
145     lhs: LHS,
146     rhs: RHS,
147 }
148 
149 enum LHS {
150     Tags(Vec<Tag>),
151     Pattern(syn::Pat),
152 }
153 
154 enum RHS {
155     Expression(syn::Expr),
156     Else,
157 }
158 
159 #[derive(PartialEq, Eq, Hash, Clone)]
160 enum TagKind {
161     StartTag,
162     EndTag,
163 }
164 
165 // Option is None if wildcard
166 #[derive(PartialEq, Eq, Hash, Clone)]
167 pub struct Tag {
168     kind: TagKind,
169     name: Option<syn::Ident>,
170 }
171 
172 impl Parse for Tag {
parse(input: ParseStream) -> Result<Self>173     fn parse(input: ParseStream) -> Result<Self> {
174         input.parse::<Token![<]>()?;
175         let closing: Option<Token![/]> = input.parse()?;
176         let name = match input.call(syn::Ident::parse_any)? {
177             ref wildcard if wildcard == "_" => None,
178             other => Some(other),
179         };
180         input.parse::<Token![>]>()?;
181         Ok(Tag {
182             kind: if closing.is_some() {
183                 TagKind::EndTag
184             } else {
185                 TagKind::StartTag
186             },
187             name: name,
188         })
189     }
190 }
191 
192 impl Parse for LHS {
parse(input: ParseStream) -> Result<Self>193     fn parse(input: ParseStream) -> Result<Self> {
194         if input.peek(Token![<]) {
195             let mut tags = Vec::new();
196             while !input.peek(Token![=>]) {
197                 tags.push(input.parse()?);
198             }
199             Ok(LHS::Tags(tags))
200         } else {
201             let p: syn::Pat = input.parse()?;
202             Ok(LHS::Pattern(p))
203         }
204     }
205 }
206 
207 impl Parse for MatchTokenArm {
parse(input: ParseStream) -> Result<Self>208     fn parse(input: ParseStream) -> Result<Self> {
209         let binding = if input.peek2(Token![@]) {
210             let binding = input.parse::<syn::Ident>()?;
211             input.parse::<Token![@]>()?;
212             Some(binding)
213         } else {
214             None
215         };
216         let lhs = input.parse::<LHS>()?;
217         input.parse::<Token![=>]>()?;
218         let rhs = if input.peek(syn::token::Brace) {
219             let block = input.parse::<syn::Block>().unwrap();
220             let block = syn::ExprBlock {
221                 attrs: vec![],
222                 label: None,
223                 block,
224             };
225             input.parse::<Option<Token![,]>>()?;
226             RHS::Expression(syn::Expr::Block(block))
227         } else if input.peek(Token![else]) {
228             input.parse::<Token![else]>()?;
229             input.parse::<Token![,]>()?;
230             RHS::Else
231         } else {
232             let expr = input.parse::<syn::Expr>().unwrap();
233             input.parse::<Option<Token![,]>>()?;
234             RHS::Expression(expr)
235         };
236 
237         Ok(MatchTokenArm { binding, lhs, rhs })
238     }
239 }
240 
241 impl Parse for MatchToken {
parse(input: ParseStream) -> Result<Self>242     fn parse(input: ParseStream) -> Result<Self> {
243         let ident = input.parse::<syn::Ident>()?;
244         let content;
245         braced!(content in input);
246         let mut arms = vec![];
247         while !content.is_empty() {
248             arms.push(content.parse()?);
249         }
250         Ok(MatchToken { ident, arms })
251     }
252 }
253 
expand_match_token(body: &TokenStream) -> syn::Expr254 pub fn expand_match_token(body: &TokenStream) -> syn::Expr {
255     let match_token = syn::parse2::<MatchToken>(body.clone());
256     let ast = expand_match_token_macro(match_token.unwrap());
257     syn::parse2(ast.into()).unwrap()
258 }
259 
expand_match_token_macro(match_token: MatchToken) -> TokenStream260 fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
261     let mut arms = match_token.arms;
262     let to_be_matched = match_token.ident;
263     // Handle the last arm specially at the end.
264     let last_arm = arms.pop().unwrap();
265 
266     // Tags we've seen, used for detecting duplicates.
267     let mut seen_tags: HashSet<Tag> = HashSet::new();
268 
269     // Case arms for wildcard matching.  We collect these and
270     // emit them later.
271     let mut wildcards_patterns: Vec<TokenStream> = Vec::new();
272     let mut wildcards_expressions: Vec<syn::Expr> = Vec::new();
273 
274     // Tags excluded (by an 'else' RHS) from wildcard matching.
275     let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new();
276 
277     let mut arms_code = Vec::new();
278 
279     for MatchTokenArm { binding, lhs, rhs } in arms {
280         // Build Rust syntax for the `name @` binding, if any.
281         let binding = match binding {
282             Some(ident) => quote!(#ident @),
283             None => quote!(),
284         };
285 
286         match (lhs, rhs) {
287             (LHS::Pattern(_), RHS::Else) => {
288                 panic!("'else' may not appear with an ordinary pattern")
289             },
290 
291             // ordinary pattern => expression
292             (LHS::Pattern(pat), RHS::Expression(expr)) => {
293                 if !wildcards_patterns.is_empty() {
294                     panic!(
295                         "ordinary patterns may not appear after wildcard tags {:?} {:?}",
296                         pat, expr
297                     );
298                 }
299                 arms_code.push(quote!(#binding #pat => #expr,))
300             },
301 
302             // <tag> <tag> ... => else
303             (LHS::Tags(tags), RHS::Else) => {
304                 for tag in tags {
305                     if !seen_tags.insert(tag.clone()) {
306                         panic!("duplicate tag");
307                     }
308                     if tag.name.is_none() {
309                         panic!("'else' may not appear with a wildcard tag");
310                     }
311                     wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag));
312                 }
313             },
314 
315             // <_> => expression
316             // <tag> <tag> ... => expression
317             (LHS::Tags(tags), RHS::Expression(expr)) => {
318                 // Is this arm a tag wildcard?
319                 // `None` if we haven't processed the first tag yet.
320                 let mut wildcard = None;
321                 for tag in tags {
322                     if !seen_tags.insert(tag.clone()) {
323                         panic!("duplicate tag");
324                     }
325 
326                     match tag.name {
327                         // <tag>
328                         Some(_) => {
329                             if !wildcards_patterns.is_empty() {
330                                 panic!("specific tags may not appear after wildcard tags");
331                             }
332 
333                             if wildcard == Some(true) {
334                                 panic!("wildcard tags must appear alone");
335                             }
336 
337                             if wildcard.is_some() {
338                                 // Push the delimeter `|` if it's not the first tag.
339                                 arms_code.push(quote!( | ))
340                             }
341                             arms_code.push(make_tag_pattern(&binding, tag));
342 
343                             wildcard = Some(false);
344                         },
345 
346                         // <_>
347                         None => {
348                             if wildcard.is_some() {
349                                 panic!("wildcard tags must appear alone");
350                             }
351                             wildcard = Some(true);
352                             wildcards_patterns.push(make_tag_pattern(&binding, tag));
353                             wildcards_expressions.push(expr.clone());
354                         },
355                     }
356                 }
357 
358                 match wildcard {
359                     None => panic!("[internal macro error] tag arm with no tags"),
360                     Some(false) => arms_code.push(quote!( => #expr,)),
361                     Some(true) => {}, // codegen for wildcards is deferred
362                 }
363             },
364         }
365     }
366 
367     // Time to process the last, catch-all arm.  We will generate something like
368     //
369     //     last_arm_token => {
370     //         let enable_wildcards = match last_arm_token {
371     //             TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false,
372     //             TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false,
373     //             // ...
374     //             _ => true,
375     //         };
376     //
377     //         match (enable_wildcards, last_arm_token) {
378     //             (true, TagToken(name @ Tag { kind: StartTag, .. }))
379     //                 => ...,  // wildcard action for start tags
380     //
381     //             (true, TagToken(name @ Tag { kind: EndTag, .. }))
382     //                 => ...,  // wildcard action for end tags
383     //
384     //             (_, token) => ...  // using the pattern from that last arm
385     //         }
386     //     }
387 
388     let MatchTokenArm { binding, lhs, rhs } = last_arm;
389 
390     let (last_pat, last_expr) = match (binding, lhs, rhs) {
391         (Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
392         (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
393         (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"),
394         (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e),
395     };
396 
397     quote! {
398         match #to_be_matched {
399             #(
400                 #arms_code
401             )*
402             last_arm_token => {
403                 let enable_wildcards = match last_arm_token {
404                     #(
405                         #wild_excluded_patterns => false,
406                     )*
407                     _ => true,
408                 };
409                 match (enable_wildcards, last_arm_token) {
410                     #(
411                         (true, #wildcards_patterns) => #wildcards_expressions,
412                     )*
413                     (_, #last_pat) => #last_expr,
414                 }
415             }
416         }
417     }
418 }
419 
420 impl Fold for MatchTokenParser {
fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt421     fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt {
422         match stmt {
423             syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => {
424                 if mac.path == parse_quote!(match_token) {
425                     return syn::fold::fold_stmt(
426                         self,
427                         syn::Stmt::Expr(expand_match_token(&mac.tokens)),
428                     );
429                 }
430             },
431             _ => {},
432         }
433 
434         syn::fold::fold_stmt(self, stmt)
435     }
436 
fold_expr(&mut self, expr: syn::Expr) -> syn::Expr437     fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr {
438         match expr {
439             syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => {
440                 if mac.path == parse_quote!(match_token) {
441                     return syn::fold::fold_expr(self, expand_match_token(&mac.tokens));
442                 }
443             },
444             _ => {},
445         }
446 
447         syn::fold::fold_expr(self, expr)
448     }
449 }
450 
make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream451 fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream {
452     let kind = match tag.kind {
453         TagKind::StartTag => quote!(crate::tokenizer::StartTag),
454         TagKind::EndTag => quote!(crate::tokenizer::EndTag),
455     };
456     let name_field = if let Some(name) = tag.name {
457         let name = name.to_string();
458         quote!(name: local_name!(#name),)
459     } else {
460         quote!()
461     };
462     quote! {
463         crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. })
464     }
465 }
466