1 use collections::{map, Map}; 2 use grammar::repr::*; 3 use lr1::core::*; 4 use lr1::example::*; 5 use lr1::first::*; 6 use lr1::lookahead::*; 7 use petgraph::graph::{EdgeReference, Edges, NodeIndex}; 8 use petgraph::prelude::*; 9 use petgraph::{Directed, EdgeDirection, Graph}; 10 use std::fmt::{Debug, Error, Formatter}; 11 12 #[cfg(test)] 13 mod test; 14 15 /// Trace graphs are used to summarize how it is that we came to be in 16 /// a state where we can take some particular shift/reduce action; put 17 /// another way, how it is that we came to be in a state with some 18 /// particular LR(1) item. 19 /// 20 /// The nodes in the graph are each labeled with a TraceGraphNode and 21 /// hence take one of two forms: 22 /// 23 /// - TraceGraphNode::Item -- represents an LR0 item. These nodes are 24 /// used for the starting/end points in the graph only. Basically a 25 /// complete trace stretches from the start item to some end item, 26 /// and all intermediate nodes are nonterminals. 27 /// - TraceGraphNode::Nonterminal -- if this graph is for a shift, 28 /// then these represent items where the cursor is at the beginning: 29 /// `X = (*) ...`. If the graph is for a reduce, they represent 30 /// items where a reduce is possible without shifting any more 31 /// terminals (though further reductions may be needed): `X = 32 /// ... (*) ...s` where `FIRST(...s)` includes `\epsilon`. 33 /// 34 /// The edges in the graph are also important. They are labeled with 35 /// `SymbolSets` instances, meaning that each carries a (prefix, 36 /// cursor, and suffix) tuple. The label on an edge `A -> B` means 37 /// that transitioning from a state containing `A` to a state 38 /// containing `B` is possible if you: 39 /// 40 /// - shift the symbols in `prefix` 41 /// - `B` will produce the symbol in `cursor` 42 /// - shift the symbols in `suffix` after `B` is popped 43 pub struct TraceGraph<'grammar> { 44 // A -L-> B means: 45 // 46 // Transition from a state containing A to a state containing 47 // B by (pushing|popping) the symbols L. 48 // 49 // If this trace graph represents a shift backtrace, then the 50 // labels are symbols that are pushed. Otherwise they are labels 51 // that are popped. 52 graph: Graph<TraceGraphNode<'grammar>, SymbolSets<'grammar>>, 53 indices: Map<TraceGraphNode<'grammar>, NodeIndex>, 54 } 55 56 #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] 57 pub enum TraceGraphNode<'grammar> { 58 Nonterminal(NonterminalString), 59 Item(LR0Item<'grammar>), 60 } 61 62 impl<'grammar> TraceGraph<'grammar> { new() -> Self63 pub fn new() -> Self { 64 TraceGraph { 65 graph: Graph::new(), 66 indices: map(), 67 } 68 } 69 add_node<T>(&mut self, node: T) -> NodeIndex where T: Into<TraceGraphNode<'grammar>>,70 pub fn add_node<T>(&mut self, node: T) -> NodeIndex 71 where 72 T: Into<TraceGraphNode<'grammar>>, 73 { 74 let node = node.into(); 75 let graph = &mut self.graph; 76 *self 77 .indices 78 .entry(node.clone()) 79 .or_insert_with(|| graph.add_node(node)) 80 } 81 add_edge<F, T>(&mut self, from: F, to: T, labels: SymbolSets<'grammar>) where F: Into<TraceGraphNode<'grammar>>, T: Into<TraceGraphNode<'grammar>>,82 pub fn add_edge<F, T>(&mut self, from: F, to: T, labels: SymbolSets<'grammar>) 83 where 84 F: Into<TraceGraphNode<'grammar>>, 85 T: Into<TraceGraphNode<'grammar>>, 86 { 87 let from = self.add_node(from.into()); 88 let to = self.add_node(to.into()); 89 if !self 90 .graph 91 .edges_directed(from, EdgeDirection::Outgoing) 92 .any(|edge| edge.target() == to && *edge.weight() == labels) 93 { 94 self.graph.add_edge(from, to, labels); 95 } 96 } 97 lr0_examples<'graph>( &'graph self, lr0_item: LR0Item<'grammar>, ) -> PathEnumerator<'graph, 'grammar>98 pub fn lr0_examples<'graph>( 99 &'graph self, 100 lr0_item: LR0Item<'grammar>, 101 ) -> PathEnumerator<'graph, 'grammar> { 102 PathEnumerator::new(self, lr0_item) 103 } 104 lr1_examples<'trace>( &'trace self, first_sets: &'trace FirstSets, item: &LR1Item<'grammar>, ) -> FilteredPathEnumerator<'trace, 'grammar>105 pub fn lr1_examples<'trace>( 106 &'trace self, 107 first_sets: &'trace FirstSets, 108 item: &LR1Item<'grammar>, 109 ) -> FilteredPathEnumerator<'trace, 'grammar> { 110 FilteredPathEnumerator::new(first_sets, self, item.to_lr0(), item.lookahead.clone()) 111 } 112 } 113 114 impl<'grammar> Into<TraceGraphNode<'grammar>> for NonterminalString { into(self) -> TraceGraphNode<'grammar>115 fn into(self) -> TraceGraphNode<'grammar> { 116 TraceGraphNode::Nonterminal(self) 117 } 118 } 119 120 impl<'grammar, L: Lookahead> Into<TraceGraphNode<'grammar>> for Item<'grammar, L> { into(self) -> TraceGraphNode<'grammar>121 fn into(self) -> TraceGraphNode<'grammar> { 122 (&self).into() 123 } 124 } 125 126 impl<'a, 'grammar, L: Lookahead> Into<TraceGraphNode<'grammar>> for &'a Item<'grammar, L> { into(self) -> TraceGraphNode<'grammar>127 fn into(self) -> TraceGraphNode<'grammar> { 128 TraceGraphNode::Item(self.to_lr0()) 129 } 130 } 131 132 // This just exists to help with the `Debug` impl 133 struct TraceGraphEdge<'grammar> { 134 from: TraceGraphNode<'grammar>, 135 to: TraceGraphNode<'grammar>, 136 label: ( 137 &'grammar [Symbol], 138 Option<&'grammar Symbol>, 139 &'grammar [Symbol], 140 ), 141 } 142 143 impl<'grammar> Debug for TraceGraphEdge<'grammar> { fmt(&self, fmt: &mut Formatter) -> Result<(), Error>144 fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { 145 write!(fmt, "({:?} -{:?}-> {:?})", self.from, self.label, self.to) 146 } 147 } 148 149 impl<'grammar> Debug for TraceGraph<'grammar> { fmt(&self, fmt: &mut Formatter) -> Result<(), Error>150 fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { 151 let mut s = fmt.debug_list(); 152 for (node, &index) in &self.indices { 153 for edge in self.graph.edges_directed(index, EdgeDirection::Outgoing) { 154 let label = edge.weight(); 155 s.entry(&TraceGraphEdge { 156 from: node.clone(), 157 to: self.graph[edge.target()].clone(), 158 label: (label.prefix, label.cursor, label.suffix), 159 }); 160 } 161 } 162 s.finish() 163 } 164 } 165 166 /////////////////////////////////////////////////////////////////////////// 167 // PathEnumerator 168 // 169 // The path enumerater walks a trace graph searching for paths that 170 // start at a given item and terminate at another item. If such a path 171 // is found, you can then find the complete list of symbols by calling 172 // `symbols_and_cursor` and also get access to the state. 173 174 pub struct PathEnumerator<'graph, 'grammar: 'graph> { 175 graph: &'graph TraceGraph<'grammar>, 176 stack: Vec<EnumeratorState<'graph, 'grammar>>, 177 } 178 179 struct EnumeratorState<'graph, 'grammar: 'graph> { 180 index: NodeIndex, 181 symbol_sets: SymbolSets<'grammar>, 182 edges: Edges<'graph, SymbolSets<'grammar>, Directed>, 183 } 184 185 impl<'graph, 'grammar> PathEnumerator<'graph, 'grammar> { new(graph: &'graph TraceGraph<'grammar>, lr0_item: LR0Item<'grammar>) -> Self186 fn new(graph: &'graph TraceGraph<'grammar>, lr0_item: LR0Item<'grammar>) -> Self { 187 let start_state = graph.indices[&TraceGraphNode::Item(lr0_item)]; 188 let mut enumerator = PathEnumerator { 189 graph: graph, 190 stack: vec![], 191 }; 192 let edges = enumerator.incoming_edges(start_state); 193 enumerator.stack.push(EnumeratorState { 194 index: start_state, 195 symbol_sets: SymbolSets::new(), 196 edges: edges, 197 }); 198 enumerator.find_next_trace(); 199 enumerator 200 } 201 202 /// Advance to the next example. Returns false if there are no more 203 /// examples. advance(&mut self) -> bool204 pub fn advance(&mut self) -> bool { 205 // If we have not yet exhausted all the examples, then the top 206 // of the stack should be the last target item that we 207 // found. Pop it off. 208 match self.stack.pop() { 209 Some(top_state) => { 210 assert!(match self.graph.graph[top_state.index] { 211 TraceGraphNode::Item(_) => true, 212 TraceGraphNode::Nonterminal(_) => false, 213 }); 214 215 self.find_next_trace() 216 } 217 None => false, 218 } 219 } 220 incoming_edges(&self, index: NodeIndex) -> Edges<'graph, SymbolSets<'grammar>, Directed>221 fn incoming_edges(&self, index: NodeIndex) -> Edges<'graph, SymbolSets<'grammar>, Directed> { 222 self.graph 223 .graph 224 .edges_directed(index, EdgeDirection::Incoming) 225 } 226 227 /// This is the main operation, written in CPS style and hence it 228 /// can seem a bit confusing. The idea is that `find_next_trace` 229 /// is called when we are ready to consider the next child of 230 /// whatever is on the top of the stack. It simply withdraws 231 /// that next child (if any) and hands it to `push_next`. find_next_trace(&mut self) -> bool232 fn find_next_trace(&mut self) -> bool { 233 if !self.stack.is_empty() { 234 let next_edge = { 235 let top_of_stack = self.stack.last_mut().unwrap(); 236 top_of_stack.edges.next() 237 }; 238 self.push_next_child_if_any(next_edge) 239 } else { 240 false 241 } 242 } 243 244 /// Invoked with the next child (if any) of the node on the top of 245 /// the stack. 246 /// 247 /// If `next` is `Some`, we simply call `push_next_child`. 248 /// 249 /// If `next` is `None`, then the node on the top of 250 /// the stack *has* no next child, and so it is popped, and then 251 /// we call `find_next_trace` again to start with the next child 252 /// of the new top of the stack. push_next_child_if_any( &mut self, next: Option<EdgeReference<'graph, SymbolSets<'grammar>>>, ) -> bool253 fn push_next_child_if_any( 254 &mut self, 255 next: Option<EdgeReference<'graph, SymbolSets<'grammar>>>, 256 ) -> bool { 257 if let Some(edge) = next { 258 let index = edge.source(); 259 let symbol_sets = *edge.weight(); 260 self.push_next_child(index, symbol_sets) 261 } else { 262 self.stack.pop(); 263 self.find_next_trace() 264 } 265 } 266 267 /// Push the next child of the top of the stack onto the stack, 268 /// making the child the new top. 269 /// 270 /// If the child is an `Item` node, we have found the next trace, 271 /// and hence our search terminates. We push the symbols from this 272 /// item node into the symbols vector and then return true. 273 /// 274 /// Otherwise, we check whether this new node would cause a cycle. 275 /// If so, we do *not* push it, and instead just call 276 /// `find_next_trace` again to proceed to the next child of the 277 /// current top. 278 /// 279 /// Finally, if the new node would NOT cause a cycle, then we can 280 /// push it onto the stack so that it becomes the new top, and 281 /// call `find_next_trace` to start searching its children. push_next_child(&mut self, index: NodeIndex, symbol_sets: SymbolSets<'grammar>) -> bool282 fn push_next_child(&mut self, index: NodeIndex, symbol_sets: SymbolSets<'grammar>) -> bool { 283 match self.graph.graph[index] { 284 TraceGraphNode::Item(_) => { 285 // If we reached an item like 286 // 287 // X = ...p (*) ...s 288 // 289 // then we are done, but we still need to push on the 290 // symbols `...p`. 291 let edges = self.incoming_edges(index); 292 self.stack.push(EnumeratorState { 293 index: index, 294 symbol_sets: symbol_sets, 295 edges: edges, 296 }); 297 return true; 298 } 299 TraceGraphNode::Nonterminal(_) => { 300 // If this node already appears on the stack, do not 301 // visit its children. 302 if !self.stack.iter().any(|state| state.index == index) { 303 let edges = self.incoming_edges(index); 304 self.stack.push(EnumeratorState { 305 index: index, 306 symbol_sets: symbol_sets, 307 edges: edges, 308 }); 309 } 310 self.find_next_trace() 311 } 312 } 313 } 314 found_trace(&self) -> bool315 pub fn found_trace(&self) -> bool { 316 !self.stack.is_empty() 317 } 318 319 /// Returns the 1-context for the current trace. In other words, 320 /// the set of tokens that may appear next in the input. If this 321 /// trace was derived from a shiftable item, this will always be 322 /// the terminal that was to be shifted; if derived from a reduce 323 /// item, this constitutes the set of lookaheads that will trigger 324 /// a reduce. first0(&self, first_sets: &FirstSets) -> TokenSet325 pub fn first0(&self, first_sets: &FirstSets) -> TokenSet { 326 assert!(self.found_trace()); 327 first_sets.first0( 328 self.stack[1] 329 .symbol_sets 330 .cursor 331 .into_iter() 332 .chain(self.stack.iter().flat_map(|s| s.symbol_sets.suffix)), 333 ) 334 } 335 example(&self) -> Example336 pub fn example(&self) -> Example { 337 assert!(self.found_trace()); 338 339 let mut symbols = vec![]; 340 341 symbols.extend( 342 self.stack 343 .iter() 344 .rev() 345 .flat_map(|s| s.symbol_sets.prefix) 346 .cloned() 347 .map(ExampleSymbol::Symbol), 348 ); 349 350 let cursor = symbols.len(); 351 352 match self.stack[1].symbol_sets.cursor { 353 Some(s) => symbols.push(ExampleSymbol::Symbol(s.clone())), 354 None => { 355 if self.stack[1].symbol_sets.prefix.is_empty() { 356 symbols.push(ExampleSymbol::Epsilon) 357 } else { 358 } 359 } 360 } 361 362 symbols.extend( 363 self.stack 364 .iter() 365 .flat_map(|s| s.symbol_sets.suffix) 366 .cloned() 367 .map(ExampleSymbol::Symbol), 368 ); 369 370 let mut cursors = (0, symbols.len()); 371 372 let mut reductions: Vec<_> = self.stack[1..] 373 .iter() 374 .rev() 375 .map(|state| { 376 let nonterminal = match self.graph.graph[state.index] { 377 TraceGraphNode::Nonterminal(ref nonterminal) => nonterminal.clone(), 378 TraceGraphNode::Item(ref item) => item.production.nonterminal.clone(), 379 }; 380 let reduction = Reduction { 381 start: cursors.0, 382 end: cursors.1, 383 nonterminal: nonterminal, 384 }; 385 cursors.0 += state.symbol_sets.prefix.len(); 386 cursors.1 -= state.symbol_sets.suffix.len(); 387 reduction 388 }) 389 .collect(); 390 reductions.reverse(); 391 392 Example { 393 symbols: symbols, 394 cursor: cursor, 395 reductions: reductions, 396 } 397 } 398 } 399 400 impl<'graph, 'grammar> Iterator for PathEnumerator<'graph, 'grammar> { 401 type Item = Example; 402 next(&mut self) -> Option<Example>403 fn next(&mut self) -> Option<Example> { 404 if self.found_trace() { 405 let example = self.example(); 406 self.advance(); 407 Some(example) 408 } else { 409 None 410 } 411 } 412 } 413 414 /////////////////////////////////////////////////////////////////////////// 415 // FilteredPathEnumerator 416 // 417 // Like the path enumerator, but tests for examples with some specific 418 // lookahead 419 420 pub struct FilteredPathEnumerator<'graph, 'grammar: 'graph> { 421 base: PathEnumerator<'graph, 'grammar>, 422 first_sets: &'graph FirstSets, 423 lookahead: TokenSet, 424 } 425 426 impl<'graph, 'grammar> FilteredPathEnumerator<'graph, 'grammar> { new( first_sets: &'graph FirstSets, graph: &'graph TraceGraph<'grammar>, lr0_item: LR0Item<'grammar>, lookahead: TokenSet, ) -> Self427 fn new( 428 first_sets: &'graph FirstSets, 429 graph: &'graph TraceGraph<'grammar>, 430 lr0_item: LR0Item<'grammar>, 431 lookahead: TokenSet, 432 ) -> Self { 433 FilteredPathEnumerator { 434 base: PathEnumerator::new(graph, lr0_item), 435 first_sets: first_sets, 436 lookahead: lookahead, 437 } 438 } 439 } 440 441 impl<'graph, 'grammar> Iterator for FilteredPathEnumerator<'graph, 'grammar> { 442 type Item = Example; 443 next(&mut self) -> Option<Example>444 fn next(&mut self) -> Option<Example> { 445 while self.base.found_trace() { 446 let firsts = self.base.first0(self.first_sets); 447 if firsts.is_intersecting(&self.lookahead) { 448 let example = self.base.example(); 449 self.base.advance(); 450 return Some(example); 451 } 452 self.base.advance(); 453 } 454 None 455 } 456 } 457