1 use std::fmt;
2 
3 use crate::ast::{self, Ast};
4 
5 /// A trait for visiting an abstract syntax tree (AST) in depth first order.
6 ///
7 /// The principle aim of this trait is to enable callers to perform case
8 /// analysis on an abstract syntax tree without necessarily using recursion.
9 /// In particular, this permits callers to do case analysis with constant stack
10 /// usage, which can be important since the size of an abstract syntax tree
11 /// may be proportional to end user input.
12 ///
13 /// Typical usage of this trait involves providing an implementation and then
14 /// running it using the [`visit`](fn.visit.html) function.
15 ///
16 /// Note that the abstract syntax tree for a regular expression is quite
17 /// complex. Unless you specifically need it, you might be able to use the
18 /// much simpler
19 /// [high-level intermediate representation](../hir/struct.Hir.html)
20 /// and its
21 /// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
22 /// instead.
23 pub trait Visitor {
24     /// The result of visiting an AST.
25     type Output;
26     /// An error that visiting an AST might return.
27     type Err;
28 
29     /// All implementors of `Visitor` must provide a `finish` method, which
30     /// yields the result of visiting the AST or an error.
finish(self) -> Result<Self::Output, Self::Err>31     fn finish(self) -> Result<Self::Output, Self::Err>;
32 
33     /// This method is called before beginning traversal of the AST.
start(&mut self)34     fn start(&mut self) {}
35 
36     /// This method is called on an `Ast` before descending into child `Ast`
37     /// nodes.
visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err>38     fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
39         Ok(())
40     }
41 
42     /// This method is called on an `Ast` after descending all of its child
43     /// `Ast` nodes.
visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err>44     fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
45         Ok(())
46     }
47 
48     /// This method is called between child nodes of an
49     /// [`Alternation`](struct.Alternation.html).
visit_alternation_in(&mut self) -> Result<(), Self::Err>50     fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
51         Ok(())
52     }
53 
54     /// This method is called on every
55     /// [`ClassSetItem`](enum.ClassSetItem.html)
56     /// before descending into child nodes.
visit_class_set_item_pre( &mut self, _ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>57     fn visit_class_set_item_pre(
58         &mut self,
59         _ast: &ast::ClassSetItem,
60     ) -> Result<(), Self::Err> {
61         Ok(())
62     }
63 
64     /// This method is called on every
65     /// [`ClassSetItem`](enum.ClassSetItem.html)
66     /// after descending into child nodes.
visit_class_set_item_post( &mut self, _ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>67     fn visit_class_set_item_post(
68         &mut self,
69         _ast: &ast::ClassSetItem,
70     ) -> Result<(), Self::Err> {
71         Ok(())
72     }
73 
74     /// This method is called on every
75     /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
76     /// before descending into child nodes.
visit_class_set_binary_op_pre( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>77     fn visit_class_set_binary_op_pre(
78         &mut self,
79         _ast: &ast::ClassSetBinaryOp,
80     ) -> Result<(), Self::Err> {
81         Ok(())
82     }
83 
84     /// This method is called on every
85     /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
86     /// after descending into child nodes.
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>87     fn visit_class_set_binary_op_post(
88         &mut self,
89         _ast: &ast::ClassSetBinaryOp,
90     ) -> Result<(), Self::Err> {
91         Ok(())
92     }
93 
94     /// This method is called between the left hand and right hand child nodes
95     /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
visit_class_set_binary_op_in( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>96     fn visit_class_set_binary_op_in(
97         &mut self,
98         _ast: &ast::ClassSetBinaryOp,
99     ) -> Result<(), Self::Err> {
100         Ok(())
101     }
102 }
103 
104 /// Executes an implementation of `Visitor` in constant stack space.
105 ///
106 /// This function will visit every node in the given `Ast` while calling the
107 /// appropriate methods provided by the
108 /// [`Visitor`](trait.Visitor.html) trait.
109 ///
110 /// The primary use case for this method is when one wants to perform case
111 /// analysis over an `Ast` without using a stack size proportional to the depth
112 /// of the `Ast`. Namely, this method will instead use constant stack size, but
113 /// will use heap space proportional to the size of the `Ast`. This may be
114 /// desirable in cases where the size of `Ast` is proportional to end user
115 /// input.
116 ///
117 /// If the visitor returns an error at any point, then visiting is stopped and
118 /// the error is returned.
visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err>119 pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
120     HeapVisitor::new().visit(ast, visitor)
121 }
122 
123 /// HeapVisitor visits every item in an `Ast` recursively using constant stack
124 /// size and a heap size proportional to the size of the `Ast`.
125 struct HeapVisitor<'a> {
126     /// A stack of `Ast` nodes. This is roughly analogous to the call stack
127     /// used in a typical recursive visitor.
128     stack: Vec<(&'a Ast, Frame<'a>)>,
129     /// Similar to the `Ast` stack above, but is used only for character
130     /// classes. In particular, character classes embed their own mini
131     /// recursive syntax.
132     stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
133 }
134 
135 /// Represents a single stack frame while performing structural induction over
136 /// an `Ast`.
137 enum Frame<'a> {
138     /// A stack frame allocated just before descending into a repetition
139     /// operator's child node.
140     Repetition(&'a ast::Repetition),
141     /// A stack frame allocated just before descending into a group's child
142     /// node.
143     Group(&'a ast::Group),
144     /// The stack frame used while visiting every child node of a concatenation
145     /// of expressions.
146     Concat {
147         /// The child node we are currently visiting.
148         head: &'a Ast,
149         /// The remaining child nodes to visit (which may be empty).
150         tail: &'a [Ast],
151     },
152     /// The stack frame used while visiting every child node of an alternation
153     /// of expressions.
154     Alternation {
155         /// The child node we are currently visiting.
156         head: &'a Ast,
157         /// The remaining child nodes to visit (which may be empty).
158         tail: &'a [Ast],
159     },
160 }
161 
162 /// Represents a single stack frame while performing structural induction over
163 /// a character class.
164 enum ClassFrame<'a> {
165     /// The stack frame used while visiting every child node of a union of
166     /// character class items.
167     Union {
168         /// The child node we are currently visiting.
169         head: &'a ast::ClassSetItem,
170         /// The remaining child nodes to visit (which may be empty).
171         tail: &'a [ast::ClassSetItem],
172     },
173     /// The stack frame used while a binary class operation.
174     Binary { op: &'a ast::ClassSetBinaryOp },
175     /// A stack frame allocated just before descending into a binary operator's
176     /// left hand child node.
177     BinaryLHS {
178         op: &'a ast::ClassSetBinaryOp,
179         lhs: &'a ast::ClassSet,
180         rhs: &'a ast::ClassSet,
181     },
182     /// A stack frame allocated just before descending into a binary operator's
183     /// right hand child node.
184     BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
185 }
186 
187 /// A representation of the inductive step when performing structural induction
188 /// over a character class.
189 ///
190 /// Note that there is no analogous explicit type for the inductive step for
191 /// `Ast` nodes because the inductive step is just an `Ast`. For character
192 /// classes, the inductive step can produce one of two possible child nodes:
193 /// an item or a binary operation. (An item cannot be a binary operation
194 /// because that would imply binary operations can be unioned in the concrete
195 /// syntax, which is not possible.)
196 enum ClassInduct<'a> {
197     Item(&'a ast::ClassSetItem),
198     BinaryOp(&'a ast::ClassSetBinaryOp),
199 }
200 
201 impl<'a> HeapVisitor<'a> {
new() -> HeapVisitor<'a>202     fn new() -> HeapVisitor<'a> {
203         HeapVisitor { stack: vec![], stack_class: vec![] }
204     }
205 
visit<V: Visitor>( &mut self, mut ast: &'a Ast, mut visitor: V, ) -> Result<V::Output, V::Err>206     fn visit<V: Visitor>(
207         &mut self,
208         mut ast: &'a Ast,
209         mut visitor: V,
210     ) -> Result<V::Output, V::Err> {
211         self.stack.clear();
212         self.stack_class.clear();
213 
214         visitor.start();
215         loop {
216             visitor.visit_pre(ast)?;
217             if let Some(x) = self.induct(ast, &mut visitor)? {
218                 let child = x.child();
219                 self.stack.push((ast, x));
220                 ast = child;
221                 continue;
222             }
223             // No induction means we have a base case, so we can post visit
224             // it now.
225             visitor.visit_post(ast)?;
226 
227             // At this point, we now try to pop our call stack until it is
228             // either empty or we hit another inductive case.
229             loop {
230                 let (post_ast, frame) = match self.stack.pop() {
231                     None => return visitor.finish(),
232                     Some((post_ast, frame)) => (post_ast, frame),
233                 };
234                 // If this is a concat/alternate, then we might have additional
235                 // inductive steps to process.
236                 if let Some(x) = self.pop(frame) {
237                     if let Frame::Alternation { .. } = x {
238                         visitor.visit_alternation_in()?;
239                     }
240                     ast = x.child();
241                     self.stack.push((post_ast, x));
242                     break;
243                 }
244                 // Otherwise, we've finished visiting all the child nodes for
245                 // this AST, so we can post visit it now.
246                 visitor.visit_post(post_ast)?;
247             }
248         }
249     }
250 
251     /// Build a stack frame for the given AST if one is needed (which occurs if
252     /// and only if there are child nodes in the AST). Otherwise, return None.
253     ///
254     /// If this visits a class, then the underlying visitor implementation may
255     /// return an error which will be passed on here.
induct<V: Visitor>( &mut self, ast: &'a Ast, visitor: &mut V, ) -> Result<Option<Frame<'a>>, V::Err>256     fn induct<V: Visitor>(
257         &mut self,
258         ast: &'a Ast,
259         visitor: &mut V,
260     ) -> Result<Option<Frame<'a>>, V::Err> {
261         Ok(match *ast {
262             Ast::Class(ast::Class::Bracketed(ref x)) => {
263                 self.visit_class(x, visitor)?;
264                 None
265             }
266             Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
267             Ast::Group(ref x) => Some(Frame::Group(x)),
268             Ast::Concat(ref x) if x.asts.is_empty() => None,
269             Ast::Concat(ref x) => {
270                 Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
271             }
272             Ast::Alternation(ref x) if x.asts.is_empty() => None,
273             Ast::Alternation(ref x) => Some(Frame::Alternation {
274                 head: &x.asts[0],
275                 tail: &x.asts[1..],
276             }),
277             _ => None,
278         })
279     }
280 
281     /// Pops the given frame. If the frame has an additional inductive step,
282     /// then return it, otherwise return `None`.
pop(&self, induct: Frame<'a>) -> Option<Frame<'a>>283     fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
284         match induct {
285             Frame::Repetition(_) => None,
286             Frame::Group(_) => None,
287             Frame::Concat { tail, .. } => {
288                 if tail.is_empty() {
289                     None
290                 } else {
291                     Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
292                 }
293             }
294             Frame::Alternation { tail, .. } => {
295                 if tail.is_empty() {
296                     None
297                 } else {
298                     Some(Frame::Alternation {
299                         head: &tail[0],
300                         tail: &tail[1..],
301                     })
302                 }
303             }
304         }
305     }
306 
visit_class<V: Visitor>( &mut self, ast: &'a ast::ClassBracketed, visitor: &mut V, ) -> Result<(), V::Err>307     fn visit_class<V: Visitor>(
308         &mut self,
309         ast: &'a ast::ClassBracketed,
310         visitor: &mut V,
311     ) -> Result<(), V::Err> {
312         let mut ast = ClassInduct::from_bracketed(ast);
313         loop {
314             self.visit_class_pre(&ast, visitor)?;
315             if let Some(x) = self.induct_class(&ast) {
316                 let child = x.child();
317                 self.stack_class.push((ast, x));
318                 ast = child;
319                 continue;
320             }
321             self.visit_class_post(&ast, visitor)?;
322 
323             // At this point, we now try to pop our call stack until it is
324             // either empty or we hit another inductive case.
325             loop {
326                 let (post_ast, frame) = match self.stack_class.pop() {
327                     None => return Ok(()),
328                     Some((post_ast, frame)) => (post_ast, frame),
329                 };
330                 // If this is a union or a binary op, then we might have
331                 // additional inductive steps to process.
332                 if let Some(x) = self.pop_class(frame) {
333                     if let ClassFrame::BinaryRHS { ref op, .. } = x {
334                         visitor.visit_class_set_binary_op_in(op)?;
335                     }
336                     ast = x.child();
337                     self.stack_class.push((post_ast, x));
338                     break;
339                 }
340                 // Otherwise, we've finished visiting all the child nodes for
341                 // this class node, so we can post visit it now.
342                 self.visit_class_post(&post_ast, visitor)?;
343             }
344         }
345     }
346 
347     /// Call the appropriate `Visitor` methods given an inductive step.
visit_class_pre<V: Visitor>( &self, ast: &ClassInduct<'a>, visitor: &mut V, ) -> Result<(), V::Err>348     fn visit_class_pre<V: Visitor>(
349         &self,
350         ast: &ClassInduct<'a>,
351         visitor: &mut V,
352     ) -> Result<(), V::Err> {
353         match *ast {
354             ClassInduct::Item(item) => {
355                 visitor.visit_class_set_item_pre(item)?;
356             }
357             ClassInduct::BinaryOp(op) => {
358                 visitor.visit_class_set_binary_op_pre(op)?;
359             }
360         }
361         Ok(())
362     }
363 
364     /// Call the appropriate `Visitor` methods given an inductive step.
visit_class_post<V: Visitor>( &self, ast: &ClassInduct<'a>, visitor: &mut V, ) -> Result<(), V::Err>365     fn visit_class_post<V: Visitor>(
366         &self,
367         ast: &ClassInduct<'a>,
368         visitor: &mut V,
369     ) -> Result<(), V::Err> {
370         match *ast {
371             ClassInduct::Item(item) => {
372                 visitor.visit_class_set_item_post(item)?;
373             }
374             ClassInduct::BinaryOp(op) => {
375                 visitor.visit_class_set_binary_op_post(op)?;
376             }
377         }
378         Ok(())
379     }
380 
381     /// Build a stack frame for the given class node if one is needed (which
382     /// occurs if and only if there are child nodes). Otherwise, return None.
induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>>383     fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
384         match *ast {
385             ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
386                 match x.kind {
387                     ast::ClassSet::Item(ref item) => {
388                         Some(ClassFrame::Union { head: item, tail: &[] })
389                     }
390                     ast::ClassSet::BinaryOp(ref op) => {
391                         Some(ClassFrame::Binary { op: op })
392                     }
393                 }
394             }
395             ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
396                 if x.items.is_empty() {
397                     None
398                 } else {
399                     Some(ClassFrame::Union {
400                         head: &x.items[0],
401                         tail: &x.items[1..],
402                     })
403                 }
404             }
405             ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS {
406                 op: op,
407                 lhs: &op.lhs,
408                 rhs: &op.rhs,
409             }),
410             _ => None,
411         }
412     }
413 
414     /// Pops the given frame. If the frame has an additional inductive step,
415     /// then return it, otherwise return `None`.
pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>>416     fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
417         match induct {
418             ClassFrame::Union { tail, .. } => {
419                 if tail.is_empty() {
420                     None
421                 } else {
422                     Some(ClassFrame::Union {
423                         head: &tail[0],
424                         tail: &tail[1..],
425                     })
426                 }
427             }
428             ClassFrame::Binary { .. } => None,
429             ClassFrame::BinaryLHS { op, rhs, .. } => {
430                 Some(ClassFrame::BinaryRHS { op: op, rhs: rhs })
431             }
432             ClassFrame::BinaryRHS { .. } => None,
433         }
434     }
435 }
436 
437 impl<'a> Frame<'a> {
438     /// Perform the next inductive step on this frame and return the next
439     /// child AST node to visit.
child(&self) -> &'a Ast440     fn child(&self) -> &'a Ast {
441         match *self {
442             Frame::Repetition(rep) => &rep.ast,
443             Frame::Group(group) => &group.ast,
444             Frame::Concat { head, .. } => head,
445             Frame::Alternation { head, .. } => head,
446         }
447     }
448 }
449 
450 impl<'a> ClassFrame<'a> {
451     /// Perform the next inductive step on this frame and return the next
452     /// child class node to visit.
child(&self) -> ClassInduct<'a>453     fn child(&self) -> ClassInduct<'a> {
454         match *self {
455             ClassFrame::Union { head, .. } => ClassInduct::Item(head),
456             ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
457             ClassFrame::BinaryLHS { ref lhs, .. } => {
458                 ClassInduct::from_set(lhs)
459             }
460             ClassFrame::BinaryRHS { ref rhs, .. } => {
461                 ClassInduct::from_set(rhs)
462             }
463         }
464     }
465 }
466 
467 impl<'a> ClassInduct<'a> {
from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a>468     fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
469         ClassInduct::from_set(&ast.kind)
470     }
471 
from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a>472     fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
473         match *ast {
474             ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
475             ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
476         }
477     }
478 }
479 
480 impl<'a> fmt::Debug for ClassFrame<'a> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result481     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
482         let x = match *self {
483             ClassFrame::Union { .. } => "Union",
484             ClassFrame::Binary { .. } => "Binary",
485             ClassFrame::BinaryLHS { .. } => "BinaryLHS",
486             ClassFrame::BinaryRHS { .. } => "BinaryRHS",
487         };
488         write!(f, "{}", x)
489     }
490 }
491 
492 impl<'a> fmt::Debug for ClassInduct<'a> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result493     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
494         let x = match *self {
495             ClassInduct::Item(it) => match *it {
496                 ast::ClassSetItem::Empty(_) => "Item(Empty)",
497                 ast::ClassSetItem::Literal(_) => "Item(Literal)",
498                 ast::ClassSetItem::Range(_) => "Item(Range)",
499                 ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
500                 ast::ClassSetItem::Perl(_) => "Item(Perl)",
501                 ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
502                 ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
503                 ast::ClassSetItem::Union(_) => "Item(Union)",
504             },
505             ClassInduct::BinaryOp(it) => match it.kind {
506                 ast::ClassSetBinaryOpKind::Intersection => {
507                     "BinaryOp(Intersection)"
508                 }
509                 ast::ClassSetBinaryOpKind::Difference => {
510                     "BinaryOp(Difference)"
511                 }
512                 ast::ClassSetBinaryOpKind::SymmetricDifference => {
513                     "BinaryOp(SymmetricDifference)"
514                 }
515             },
516         };
517         write!(f, "{}", x)
518     }
519 }
520