1 use collections::{map, Map};
2 use grammar::repr::*;
3 use lr1::core::*;
4 use lr1::example::*;
5 use lr1::first::*;
6 use lr1::lookahead::*;
7 use petgraph::graph::{EdgeReference, Edges, NodeIndex};
8 use petgraph::prelude::*;
9 use petgraph::{Directed, EdgeDirection, Graph};
10 use std::fmt::{Debug, Error, Formatter};
11 
12 #[cfg(test)]
13 mod test;
14 
15 /// Trace graphs are used to summarize how it is that we came to be in
16 /// a state where we can take some particular shift/reduce action; put
17 /// another way, how it is that we came to be in a state with some
18 /// particular LR(1) item.
19 ///
20 /// The nodes in the graph are each labeled with a TraceGraphNode and
21 /// hence take one of two forms:
22 ///
23 /// - TraceGraphNode::Item -- represents an LR0 item. These nodes are
24 ///   used for the starting/end points in the graph only.  Basically a
25 ///   complete trace stretches from the start item to some end item,
26 ///   and all intermediate nodes are nonterminals.
27 /// - TraceGraphNode::Nonterminal -- if this graph is for a shift,
28 ///   then these represent items where the cursor is at the beginning:
29 ///   `X = (*) ...`. If the graph is for a reduce, they represent
30 ///   items where a reduce is possible without shifting any more
31 ///   terminals (though further reductions may be needed): `X =
32 ///   ... (*) ...s` where `FIRST(...s)` includes `\epsilon`.
33 ///
34 /// The edges in the graph are also important. They are labeled with
35 /// `SymbolSets` instances, meaning that each carries a (prefix,
36 /// cursor, and suffix) tuple. The label on an edge `A -> B` means
37 /// that transitioning from a state containing `A` to a state
38 /// containing `B` is possible if you:
39 ///
40 /// - shift the symbols in `prefix`
41 /// - `B` will produce the symbol in `cursor`
42 /// - shift the symbols in `suffix` after `B` is popped
43 pub struct TraceGraph<'grammar> {
44     // A -L-> B means:
45     //
46     //     Transition from a state containing A to a state containing
47     //     B by (pushing|popping) the symbols L.
48     //
49     // If this trace graph represents a shift backtrace, then the
50     // labels are symbols that are pushed. Otherwise they are labels
51     // that are popped.
52     graph: Graph<TraceGraphNode<'grammar>, SymbolSets<'grammar>>,
53     indices: Map<TraceGraphNode<'grammar>, NodeIndex>,
54 }
55 
56 #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
57 pub enum TraceGraphNode<'grammar> {
58     Nonterminal(NonterminalString),
59     Item(LR0Item<'grammar>),
60 }
61 
62 impl<'grammar> TraceGraph<'grammar> {
new() -> Self63     pub fn new() -> Self {
64         TraceGraph {
65             graph: Graph::new(),
66             indices: map(),
67         }
68     }
69 
add_node<T>(&mut self, node: T) -> NodeIndex where T: Into<TraceGraphNode<'grammar>>,70     pub fn add_node<T>(&mut self, node: T) -> NodeIndex
71     where
72         T: Into<TraceGraphNode<'grammar>>,
73     {
74         let node = node.into();
75         let graph = &mut self.graph;
76         *self
77             .indices
78             .entry(node.clone())
79             .or_insert_with(|| graph.add_node(node))
80     }
81 
add_edge<F, T>(&mut self, from: F, to: T, labels: SymbolSets<'grammar>) where F: Into<TraceGraphNode<'grammar>>, T: Into<TraceGraphNode<'grammar>>,82     pub fn add_edge<F, T>(&mut self, from: F, to: T, labels: SymbolSets<'grammar>)
83     where
84         F: Into<TraceGraphNode<'grammar>>,
85         T: Into<TraceGraphNode<'grammar>>,
86     {
87         let from = self.add_node(from.into());
88         let to = self.add_node(to.into());
89         if !self
90             .graph
91             .edges_directed(from, EdgeDirection::Outgoing)
92             .any(|edge| edge.target() == to && *edge.weight() == labels)
93         {
94             self.graph.add_edge(from, to, labels);
95         }
96     }
97 
lr0_examples<'graph>( &'graph self, lr0_item: LR0Item<'grammar>, ) -> PathEnumerator<'graph, 'grammar>98     pub fn lr0_examples<'graph>(
99         &'graph self,
100         lr0_item: LR0Item<'grammar>,
101     ) -> PathEnumerator<'graph, 'grammar> {
102         PathEnumerator::new(self, lr0_item)
103     }
104 
lr1_examples<'trace>( &'trace self, first_sets: &'trace FirstSets, item: &LR1Item<'grammar>, ) -> FilteredPathEnumerator<'trace, 'grammar>105     pub fn lr1_examples<'trace>(
106         &'trace self,
107         first_sets: &'trace FirstSets,
108         item: &LR1Item<'grammar>,
109     ) -> FilteredPathEnumerator<'trace, 'grammar> {
110         FilteredPathEnumerator::new(first_sets, self, item.to_lr0(), item.lookahead.clone())
111     }
112 }
113 
114 impl<'grammar> Into<TraceGraphNode<'grammar>> for NonterminalString {
into(self) -> TraceGraphNode<'grammar>115     fn into(self) -> TraceGraphNode<'grammar> {
116         TraceGraphNode::Nonterminal(self)
117     }
118 }
119 
120 impl<'grammar, L: Lookahead> Into<TraceGraphNode<'grammar>> for Item<'grammar, L> {
into(self) -> TraceGraphNode<'grammar>121     fn into(self) -> TraceGraphNode<'grammar> {
122         (&self).into()
123     }
124 }
125 
126 impl<'a, 'grammar, L: Lookahead> Into<TraceGraphNode<'grammar>> for &'a Item<'grammar, L> {
into(self) -> TraceGraphNode<'grammar>127     fn into(self) -> TraceGraphNode<'grammar> {
128         TraceGraphNode::Item(self.to_lr0())
129     }
130 }
131 
132 // This just exists to help with the `Debug` impl
133 struct TraceGraphEdge<'grammar> {
134     from: TraceGraphNode<'grammar>,
135     to: TraceGraphNode<'grammar>,
136     label: (
137         &'grammar [Symbol],
138         Option<&'grammar Symbol>,
139         &'grammar [Symbol],
140     ),
141 }
142 
143 impl<'grammar> Debug for TraceGraphEdge<'grammar> {
fmt(&self, fmt: &mut Formatter) -> Result<(), Error>144     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
145         write!(fmt, "({:?} -{:?}-> {:?})", self.from, self.label, self.to)
146     }
147 }
148 
149 impl<'grammar> Debug for TraceGraph<'grammar> {
fmt(&self, fmt: &mut Formatter) -> Result<(), Error>150     fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
151         let mut s = fmt.debug_list();
152         for (node, &index) in &self.indices {
153             for edge in self.graph.edges_directed(index, EdgeDirection::Outgoing) {
154                 let label = edge.weight();
155                 s.entry(&TraceGraphEdge {
156                     from: node.clone(),
157                     to: self.graph[edge.target()].clone(),
158                     label: (label.prefix, label.cursor, label.suffix),
159                 });
160             }
161         }
162         s.finish()
163     }
164 }
165 
166 ///////////////////////////////////////////////////////////////////////////
167 // PathEnumerator
168 //
169 // The path enumerater walks a trace graph searching for paths that
170 // start at a given item and terminate at another item. If such a path
171 // is found, you can then find the complete list of symbols by calling
172 // `symbols_and_cursor` and also get access to the state.
173 
174 pub struct PathEnumerator<'graph, 'grammar: 'graph> {
175     graph: &'graph TraceGraph<'grammar>,
176     stack: Vec<EnumeratorState<'graph, 'grammar>>,
177 }
178 
179 struct EnumeratorState<'graph, 'grammar: 'graph> {
180     index: NodeIndex,
181     symbol_sets: SymbolSets<'grammar>,
182     edges: Edges<'graph, SymbolSets<'grammar>, Directed>,
183 }
184 
185 impl<'graph, 'grammar> PathEnumerator<'graph, 'grammar> {
new(graph: &'graph TraceGraph<'grammar>, lr0_item: LR0Item<'grammar>) -> Self186     fn new(graph: &'graph TraceGraph<'grammar>, lr0_item: LR0Item<'grammar>) -> Self {
187         let start_state = graph.indices[&TraceGraphNode::Item(lr0_item)];
188         let mut enumerator = PathEnumerator {
189             graph: graph,
190             stack: vec![],
191         };
192         let edges = enumerator.incoming_edges(start_state);
193         enumerator.stack.push(EnumeratorState {
194             index: start_state,
195             symbol_sets: SymbolSets::new(),
196             edges: edges,
197         });
198         enumerator.find_next_trace();
199         enumerator
200     }
201 
202     /// Advance to the next example. Returns false if there are no more
203     /// examples.
advance(&mut self) -> bool204     pub fn advance(&mut self) -> bool {
205         // If we have not yet exhausted all the examples, then the top
206         // of the stack should be the last target item that we
207         // found. Pop it off.
208         match self.stack.pop() {
209             Some(top_state) => {
210                 assert!(match self.graph.graph[top_state.index] {
211                     TraceGraphNode::Item(_) => true,
212                     TraceGraphNode::Nonterminal(_) => false,
213                 });
214 
215                 self.find_next_trace()
216             }
217             None => false,
218         }
219     }
220 
incoming_edges(&self, index: NodeIndex) -> Edges<'graph, SymbolSets<'grammar>, Directed>221     fn incoming_edges(&self, index: NodeIndex) -> Edges<'graph, SymbolSets<'grammar>, Directed> {
222         self.graph
223             .graph
224             .edges_directed(index, EdgeDirection::Incoming)
225     }
226 
227     /// This is the main operation, written in CPS style and hence it
228     /// can seem a bit confusing. The idea is that `find_next_trace`
229     /// is called when we are ready to consider the next child of
230     /// whatever is on the top of the stack. It simply withdraws
231     /// that next child (if any) and hands it to `push_next`.
find_next_trace(&mut self) -> bool232     fn find_next_trace(&mut self) -> bool {
233         if !self.stack.is_empty() {
234             let next_edge = {
235                 let top_of_stack = self.stack.last_mut().unwrap();
236                 top_of_stack.edges.next()
237             };
238             self.push_next_child_if_any(next_edge)
239         } else {
240             false
241         }
242     }
243 
244     /// Invoked with the next child (if any) of the node on the top of
245     /// the stack.
246     ///
247     /// If `next` is `Some`, we simply call `push_next_child`.
248     ///
249     /// If `next` is `None`, then the node on the top of
250     /// the stack *has* no next child, and so it is popped, and then
251     /// we call `find_next_trace` again to start with the next child
252     /// of the new top of the stack.
push_next_child_if_any( &mut self, next: Option<EdgeReference<'graph, SymbolSets<'grammar>>>, ) -> bool253     fn push_next_child_if_any(
254         &mut self,
255         next: Option<EdgeReference<'graph, SymbolSets<'grammar>>>,
256     ) -> bool {
257         if let Some(edge) = next {
258             let index = edge.source();
259             let symbol_sets = *edge.weight();
260             self.push_next_child(index, symbol_sets)
261         } else {
262             self.stack.pop();
263             self.find_next_trace()
264         }
265     }
266 
267     /// Push the next child of the top of the stack onto the stack,
268     /// making the child the new top.
269     ///
270     /// If the child is an `Item` node, we have found the next trace,
271     /// and hence our search terminates. We push the symbols from this
272     /// item node into the symbols vector and then return true.
273     ///
274     /// Otherwise, we check whether this new node would cause a cycle.
275     /// If so, we do *not* push it, and instead just call
276     /// `find_next_trace` again to proceed to the next child of the
277     /// current top.
278     ///
279     /// Finally, if the new node would NOT cause a cycle, then we can
280     /// push it onto the stack so that it becomes the new top, and
281     /// call `find_next_trace` to start searching its children.
push_next_child(&mut self, index: NodeIndex, symbol_sets: SymbolSets<'grammar>) -> bool282     fn push_next_child(&mut self, index: NodeIndex, symbol_sets: SymbolSets<'grammar>) -> bool {
283         match self.graph.graph[index] {
284             TraceGraphNode::Item(_) => {
285                 // If we reached an item like
286                 //
287                 //     X = ...p (*) ...s
288                 //
289                 // then we are done, but we still need to push on the
290                 // symbols `...p`.
291                 let edges = self.incoming_edges(index);
292                 self.stack.push(EnumeratorState {
293                     index: index,
294                     symbol_sets: symbol_sets,
295                     edges: edges,
296                 });
297                 return true;
298             }
299             TraceGraphNode::Nonterminal(_) => {
300                 // If this node already appears on the stack, do not
301                 // visit its children.
302                 if !self.stack.iter().any(|state| state.index == index) {
303                     let edges = self.incoming_edges(index);
304                     self.stack.push(EnumeratorState {
305                         index: index,
306                         symbol_sets: symbol_sets,
307                         edges: edges,
308                     });
309                 }
310                 self.find_next_trace()
311             }
312         }
313     }
314 
found_trace(&self) -> bool315     pub fn found_trace(&self) -> bool {
316         !self.stack.is_empty()
317     }
318 
319     /// Returns the 1-context for the current trace. In other words,
320     /// the set of tokens that may appear next in the input. If this
321     /// trace was derived from a shiftable item, this will always be
322     /// the terminal that was to be shifted; if derived from a reduce
323     /// item, this constitutes the set of lookaheads that will trigger
324     /// a reduce.
first0(&self, first_sets: &FirstSets) -> TokenSet325     pub fn first0(&self, first_sets: &FirstSets) -> TokenSet {
326         assert!(self.found_trace());
327         first_sets.first0(
328             self.stack[1]
329                 .symbol_sets
330                 .cursor
331                 .into_iter()
332                 .chain(self.stack.iter().flat_map(|s| s.symbol_sets.suffix)),
333         )
334     }
335 
example(&self) -> Example336     pub fn example(&self) -> Example {
337         assert!(self.found_trace());
338 
339         let mut symbols = vec![];
340 
341         symbols.extend(
342             self.stack
343                 .iter()
344                 .rev()
345                 .flat_map(|s| s.symbol_sets.prefix)
346                 .cloned()
347                 .map(ExampleSymbol::Symbol),
348         );
349 
350         let cursor = symbols.len();
351 
352         match self.stack[1].symbol_sets.cursor {
353             Some(s) => symbols.push(ExampleSymbol::Symbol(s.clone())),
354             None => {
355                 if self.stack[1].symbol_sets.prefix.is_empty() {
356                     symbols.push(ExampleSymbol::Epsilon)
357                 } else {
358                 }
359             }
360         }
361 
362         symbols.extend(
363             self.stack
364                 .iter()
365                 .flat_map(|s| s.symbol_sets.suffix)
366                 .cloned()
367                 .map(ExampleSymbol::Symbol),
368         );
369 
370         let mut cursors = (0, symbols.len());
371 
372         let mut reductions: Vec<_> = self.stack[1..]
373             .iter()
374             .rev()
375             .map(|state| {
376                 let nonterminal = match self.graph.graph[state.index] {
377                     TraceGraphNode::Nonterminal(ref nonterminal) => nonterminal.clone(),
378                     TraceGraphNode::Item(ref item) => item.production.nonterminal.clone(),
379                 };
380                 let reduction = Reduction {
381                     start: cursors.0,
382                     end: cursors.1,
383                     nonterminal: nonterminal,
384                 };
385                 cursors.0 += state.symbol_sets.prefix.len();
386                 cursors.1 -= state.symbol_sets.suffix.len();
387                 reduction
388             })
389             .collect();
390         reductions.reverse();
391 
392         Example {
393             symbols: symbols,
394             cursor: cursor,
395             reductions: reductions,
396         }
397     }
398 }
399 
400 impl<'graph, 'grammar> Iterator for PathEnumerator<'graph, 'grammar> {
401     type Item = Example;
402 
next(&mut self) -> Option<Example>403     fn next(&mut self) -> Option<Example> {
404         if self.found_trace() {
405             let example = self.example();
406             self.advance();
407             Some(example)
408         } else {
409             None
410         }
411     }
412 }
413 
414 ///////////////////////////////////////////////////////////////////////////
415 // FilteredPathEnumerator
416 //
417 // Like the path enumerator, but tests for examples with some specific
418 // lookahead
419 
420 pub struct FilteredPathEnumerator<'graph, 'grammar: 'graph> {
421     base: PathEnumerator<'graph, 'grammar>,
422     first_sets: &'graph FirstSets,
423     lookahead: TokenSet,
424 }
425 
426 impl<'graph, 'grammar> FilteredPathEnumerator<'graph, 'grammar> {
new( first_sets: &'graph FirstSets, graph: &'graph TraceGraph<'grammar>, lr0_item: LR0Item<'grammar>, lookahead: TokenSet, ) -> Self427     fn new(
428         first_sets: &'graph FirstSets,
429         graph: &'graph TraceGraph<'grammar>,
430         lr0_item: LR0Item<'grammar>,
431         lookahead: TokenSet,
432     ) -> Self {
433         FilteredPathEnumerator {
434             base: PathEnumerator::new(graph, lr0_item),
435             first_sets: first_sets,
436             lookahead: lookahead,
437         }
438     }
439 }
440 
441 impl<'graph, 'grammar> Iterator for FilteredPathEnumerator<'graph, 'grammar> {
442     type Item = Example;
443 
next(&mut self) -> Option<Example>444     fn next(&mut self) -> Option<Example> {
445         while self.base.found_trace() {
446             let firsts = self.base.first0(self.first_sets);
447             if firsts.is_intersecting(&self.lookahead) {
448                 let example = self.base.example();
449                 self.base.advance();
450                 return Some(example);
451             }
452             self.base.advance();
453         }
454         None
455     }
456 }
457