1 use fnv::{FnvHashMap as Map, FnvHashSet as Set};
2 use proc_macro2::TokenStream;
3 use quote::{quote, ToTokens, TokenStreamExt};
4 use syn::Ident;
5 
6 use crate::graph::{Graph, Meta, Node, NodeId, Range};
7 use crate::leaf::Leaf;
8 use crate::util::ToIdent;
9 
10 mod context;
11 mod fork;
12 mod leaf;
13 mod rope;
14 mod tables;
15 
16 use self::context::Context;
17 use self::tables::TableStack;
18 
19 pub struct Generator<'a> {
20     /// Name of the type we are implementing the `Logos` trait for
21     name: &'a Ident,
22     /// Name of the type with any generics it might need
23     this: &'a TokenStream,
24     /// Id to the root node
25     root: NodeId,
26     /// Reference to the graph with all of the nodes
27     graph: &'a Graph<Leaf<'a>>,
28     /// Meta data collected for the nodes
29     meta: Meta,
30     /// Buffer with functions growing during generation
31     rendered: TokenStream,
32     /// Set of functions that have already been rendered
33     fns: Set<(NodeId, Context)>,
34     /// Function name identifiers
35     idents: Map<(NodeId, Context), Ident>,
36     /// Local function calls. Note: a call might change its context,
37     /// so we can't use `idents` for this purpose.
38     gotos: Map<(NodeId, Context), TokenStream>,
39     /// Identifiers for helper functions matching a byte to a given
40     /// set of ranges
41     tests: Map<Vec<Range>, Ident>,
42     /// Related to above, table stack manages tables that need to be
43     tables: TableStack,
44 }
45 
46 impl<'a> Generator<'a> {
new( name: &'a Ident, this: &'a TokenStream, root: NodeId, graph: &'a Graph<Leaf>, ) -> Self47     pub fn new(
48         name: &'a Ident,
49         this: &'a TokenStream,
50         root: NodeId,
51         graph: &'a Graph<Leaf>,
52     ) -> Self {
53         let rendered = Self::fast_loop_macro();
54         let meta = Meta::analyze(root, graph);
55 
56         Generator {
57             name,
58             this,
59             root,
60             graph,
61             meta,
62             rendered,
63             fns: Set::default(),
64             idents: Map::default(),
65             gotos: Map::default(),
66             tests: Map::default(),
67             tables: TableStack::new(),
68         }
69     }
70 
generate(mut self) -> TokenStream71     pub fn generate(mut self) -> TokenStream {
72         let root = self.goto(self.root, Context::default()).clone();
73         let rendered = &self.rendered;
74         let tables = &self.tables;
75 
76         quote! {
77             #tables
78             #rendered
79             #root
80         }
81     }
82 
generate_fn(&mut self, id: NodeId, ctx: Context)83     fn generate_fn(&mut self, id: NodeId, ctx: Context) {
84         if self.fns.contains(&(id, ctx)) {
85             return;
86         }
87         self.fns.insert((id, ctx));
88 
89         let body = match &self.graph[id] {
90             Node::Fork(fork) => self.generate_fork(id, fork, ctx),
91             Node::Rope(rope) => self.generate_rope(rope, ctx),
92             Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
93         };
94         let ident = self.generate_ident(id, ctx);
95         let out = quote! {
96             #[inline]
97             fn #ident<'s>(lex: &mut Lexer<'s>) {
98                 #body
99             }
100         };
101 
102         self.rendered.append_all(out);
103     }
104 
goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream105     fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
106         let key = (id, ctx);
107 
108         if !self.gotos.contains_key(&key) {
109             let meta = &self.meta[id];
110             let enters_loop = !meta.loop_entry_from.is_empty();
111 
112             let bump = if enters_loop || !ctx.can_backtrack() {
113                 ctx.switch(self.graph[id].miss())
114             } else {
115                 None
116             };
117 
118             let bump = match (bump, enters_loop, meta.min_read) {
119                 (Some(t), _, _) => Some(t),
120                 (None, true, _) => ctx.bump(),
121                 (None, false, 0) => ctx.bump(),
122                 (None, false, _) => None,
123             };
124 
125             if meta.min_read == 0 || ctx.remainder() < meta.min_read {
126                 ctx.wipe();
127             }
128 
129             let ident = self.generate_ident(id, ctx);
130             let mut call_site = quote!(#ident(lex));
131 
132             if let Some(bump) = bump {
133                 call_site = quote!({
134                     #bump
135                     #call_site
136                 });
137             }
138             self.gotos.insert(key, call_site);
139             self.generate_fn(id, ctx);
140         }
141         &self.gotos[&key]
142     }
143 
generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident144     fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
145         self.idents.entry((id, ctx)).or_insert_with(|| {
146             let mut ident = format!("goto{}", id);
147 
148             ctx.write_suffix(&mut ident);
149 
150             ident.to_ident()
151         })
152     }
153 
154     /// Returns an identifier to a function that matches a byte to any
155     /// of the provided ranges. This will generate either a simple
156     /// match expression, or use a lookup table internally.
generate_test(&mut self, ranges: Vec<Range>) -> &Ident157     fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
158         if !self.tests.contains_key(&ranges) {
159             let idx = self.tests.len();
160             let ident = format!("pattern{}", idx).to_ident();
161 
162             let lo = ranges.first().unwrap().start;
163             let hi = ranges.last().unwrap().end;
164 
165             let body = match ranges.len() {
166                 0..=2 => {
167                     quote! {
168                         match byte {
169                             #(#ranges)|* => true,
170                             _ => false,
171                         }
172                     }
173                 }
174                 _ if hi - lo < 64 => {
175                     let mut offset = hi.saturating_sub(63);
176 
177                     while offset.count_ones() > 1 && lo - offset > 0 {
178                         offset += 1;
179                     }
180 
181                     let mut table = 0u64;
182 
183                     for byte in ranges.iter().flat_map(|range| *range) {
184                         if byte - offset >= 64 {
185                             panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
186                         }
187                         table |= 1 << (byte - offset);
188                     }
189 
190                     let search = match offset {
191                         0 => quote!(byte),
192                         _ => quote!(byte.wrapping_sub(#offset)),
193                     };
194 
195                     quote! {
196                         const LUT: u64 = #table;
197 
198                         match 1u64.checked_shl(#search as u32) {
199                             Some(shift) => LUT & shift != 0,
200                             None => false,
201                         }
202                     }
203                 }
204                 _ => {
205                     let mut view = self.tables.view();
206 
207                     for byte in ranges.iter().flat_map(|range| *range) {
208                         view.flag(byte);
209                     }
210 
211                     let mask = view.mask();
212                     let lut = view.ident();
213 
214                     quote! {
215                         #lut[byte as usize] & #mask > 0
216                     }
217                 }
218             };
219             self.rendered.append_all(quote! {
220                 #[inline]
221                 fn #ident(byte: u8) -> bool {
222                     #body
223                 }
224             });
225             self.tests.insert(ranges.clone(), ident);
226         }
227         &self.tests[&ranges]
228     }
229 }
230 
231 macro_rules! match_quote {
232     ($source:expr; $($byte:tt,)* ) => {match $source {
233         $( $byte => quote!($byte), )*
234         byte => quote!(#byte),
235     }}
236 }
237 
byte_to_tokens(byte: u8) -> TokenStream238 fn byte_to_tokens(byte: u8) -> TokenStream {
239     match_quote! {
240         byte;
241         b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
242         b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
243         b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
244         b'u', b'v', b'w', b'x', b'y', b'z',
245         b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
246         b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
247         b'U', b'V', b'W', b'X', b'Y', b'Z',
248         b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
249         b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
250         b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
251     }
252 }
253 
254 impl ToTokens for Range {
to_tokens(&self, tokens: &mut TokenStream)255     fn to_tokens(&self, tokens: &mut TokenStream) {
256         let Range { start, end } = self;
257 
258         tokens.append_all(byte_to_tokens(*start));
259 
260         if start != end {
261             tokens.append_all(quote!(..=));
262             tokens.append_all(byte_to_tokens(*end));
263         }
264     }
265 }
266