1 use fnv::{FnvHashMap as Map, FnvHashSet as Set};
2 use proc_macro2::TokenStream;
3 use quote::{quote, ToTokens, TokenStreamExt};
4 use syn::Ident;
5
6 use crate::graph::{Graph, Meta, Node, NodeId, Range};
7 use crate::leaf::Leaf;
8 use crate::util::ToIdent;
9
10 mod context;
11 mod fork;
12 mod leaf;
13 mod rope;
14 mod tables;
15
16 use self::context::Context;
17 use self::tables::TableStack;
18
19 pub struct Generator<'a> {
20 /// Name of the type we are implementing the `Logos` trait for
21 name: &'a Ident,
22 /// Name of the type with any generics it might need
23 this: &'a TokenStream,
24 /// Id to the root node
25 root: NodeId,
26 /// Reference to the graph with all of the nodes
27 graph: &'a Graph<Leaf<'a>>,
28 /// Meta data collected for the nodes
29 meta: Meta,
30 /// Buffer with functions growing during generation
31 rendered: TokenStream,
32 /// Set of functions that have already been rendered
33 fns: Set<(NodeId, Context)>,
34 /// Function name identifiers
35 idents: Map<(NodeId, Context), Ident>,
36 /// Local function calls. Note: a call might change its context,
37 /// so we can't use `idents` for this purpose.
38 gotos: Map<(NodeId, Context), TokenStream>,
39 /// Identifiers for helper functions matching a byte to a given
40 /// set of ranges
41 tests: Map<Vec<Range>, Ident>,
42 /// Related to above, table stack manages tables that need to be
43 tables: TableStack,
44 }
45
46 impl<'a> Generator<'a> {
new( name: &'a Ident, this: &'a TokenStream, root: NodeId, graph: &'a Graph<Leaf>, ) -> Self47 pub fn new(
48 name: &'a Ident,
49 this: &'a TokenStream,
50 root: NodeId,
51 graph: &'a Graph<Leaf>,
52 ) -> Self {
53 let rendered = Self::fast_loop_macro();
54 let meta = Meta::analyze(root, graph);
55
56 Generator {
57 name,
58 this,
59 root,
60 graph,
61 meta,
62 rendered,
63 fns: Set::default(),
64 idents: Map::default(),
65 gotos: Map::default(),
66 tests: Map::default(),
67 tables: TableStack::new(),
68 }
69 }
70
generate(mut self) -> TokenStream71 pub fn generate(mut self) -> TokenStream {
72 let root = self.goto(self.root, Context::default()).clone();
73 let rendered = &self.rendered;
74 let tables = &self.tables;
75
76 quote! {
77 #tables
78 #rendered
79 #root
80 }
81 }
82
generate_fn(&mut self, id: NodeId, ctx: Context)83 fn generate_fn(&mut self, id: NodeId, ctx: Context) {
84 if self.fns.contains(&(id, ctx)) {
85 return;
86 }
87 self.fns.insert((id, ctx));
88
89 let body = match &self.graph[id] {
90 Node::Fork(fork) => self.generate_fork(id, fork, ctx),
91 Node::Rope(rope) => self.generate_rope(rope, ctx),
92 Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
93 };
94 let ident = self.generate_ident(id, ctx);
95 let out = quote! {
96 #[inline]
97 fn #ident<'s>(lex: &mut Lexer<'s>) {
98 #body
99 }
100 };
101
102 self.rendered.append_all(out);
103 }
104
goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream105 fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
106 let key = (id, ctx);
107
108 if !self.gotos.contains_key(&key) {
109 let meta = &self.meta[id];
110 let enters_loop = !meta.loop_entry_from.is_empty();
111
112 let bump = if enters_loop || !ctx.can_backtrack() {
113 ctx.switch(self.graph[id].miss())
114 } else {
115 None
116 };
117
118 let bump = match (bump, enters_loop, meta.min_read) {
119 (Some(t), _, _) => Some(t),
120 (None, true, _) => ctx.bump(),
121 (None, false, 0) => ctx.bump(),
122 (None, false, _) => None,
123 };
124
125 if meta.min_read == 0 || ctx.remainder() < meta.min_read {
126 ctx.wipe();
127 }
128
129 let ident = self.generate_ident(id, ctx);
130 let mut call_site = quote!(#ident(lex));
131
132 if let Some(bump) = bump {
133 call_site = quote!({
134 #bump
135 #call_site
136 });
137 }
138 self.gotos.insert(key, call_site);
139 self.generate_fn(id, ctx);
140 }
141 &self.gotos[&key]
142 }
143
generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident144 fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
145 self.idents.entry((id, ctx)).or_insert_with(|| {
146 let mut ident = format!("goto{}", id);
147
148 ctx.write_suffix(&mut ident);
149
150 ident.to_ident()
151 })
152 }
153
154 /// Returns an identifier to a function that matches a byte to any
155 /// of the provided ranges. This will generate either a simple
156 /// match expression, or use a lookup table internally.
generate_test(&mut self, ranges: Vec<Range>) -> &Ident157 fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
158 if !self.tests.contains_key(&ranges) {
159 let idx = self.tests.len();
160 let ident = format!("pattern{}", idx).to_ident();
161
162 let lo = ranges.first().unwrap().start;
163 let hi = ranges.last().unwrap().end;
164
165 let body = match ranges.len() {
166 0..=2 => {
167 quote! {
168 match byte {
169 #(#ranges)|* => true,
170 _ => false,
171 }
172 }
173 }
174 _ if hi - lo < 64 => {
175 let mut offset = hi.saturating_sub(63);
176
177 while offset.count_ones() > 1 && lo - offset > 0 {
178 offset += 1;
179 }
180
181 let mut table = 0u64;
182
183 for byte in ranges.iter().flat_map(|range| *range) {
184 if byte - offset >= 64 {
185 panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
186 }
187 table |= 1 << (byte - offset);
188 }
189
190 let search = match offset {
191 0 => quote!(byte),
192 _ => quote!(byte.wrapping_sub(#offset)),
193 };
194
195 quote! {
196 const LUT: u64 = #table;
197
198 match 1u64.checked_shl(#search as u32) {
199 Some(shift) => LUT & shift != 0,
200 None => false,
201 }
202 }
203 }
204 _ => {
205 let mut view = self.tables.view();
206
207 for byte in ranges.iter().flat_map(|range| *range) {
208 view.flag(byte);
209 }
210
211 let mask = view.mask();
212 let lut = view.ident();
213
214 quote! {
215 #lut[byte as usize] & #mask > 0
216 }
217 }
218 };
219 self.rendered.append_all(quote! {
220 #[inline]
221 fn #ident(byte: u8) -> bool {
222 #body
223 }
224 });
225 self.tests.insert(ranges.clone(), ident);
226 }
227 &self.tests[&ranges]
228 }
229 }
230
231 macro_rules! match_quote {
232 ($source:expr; $($byte:tt,)* ) => {match $source {
233 $( $byte => quote!($byte), )*
234 byte => quote!(#byte),
235 }}
236 }
237
byte_to_tokens(byte: u8) -> TokenStream238 fn byte_to_tokens(byte: u8) -> TokenStream {
239 match_quote! {
240 byte;
241 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
242 b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
243 b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
244 b'u', b'v', b'w', b'x', b'y', b'z',
245 b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
246 b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
247 b'U', b'V', b'W', b'X', b'Y', b'Z',
248 b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
249 b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
250 b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
251 }
252 }
253
254 impl ToTokens for Range {
to_tokens(&self, tokens: &mut TokenStream)255 fn to_tokens(&self, tokens: &mut TokenStream) {
256 let Range { start, end } = self;
257
258 tokens.append_all(byte_to_tokens(*start));
259
260 if start != end {
261 tokens.append_all(quote!(..=));
262 tokens.append_all(byte_to_tokens(*end));
263 }
264 }
265 }
266