1from __future__ import annotations
2# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls
3
4import collections
5import hashlib
6import os
7import pickle
8import typing
9import itertools
10
11from . import types
12from .utils import consume, keep_until, split, default_id_dict, default_fwd_dict
13from .ordered import OrderedSet, OrderedFrozenSet
14from .actions import Action, Replay, Reduce, FilterStates, Seq
15from .grammar import End, ErrorSymbol, InitNt, Nt
16from .rewrites import CanonicalGrammar
17from .lr0 import LR0Generator, Term
18from .aps import APS, Edge, Path
19
20# StateAndTransitions objects are indexed using a StateId which is an integer.
21StateId = int
22
23# Action or ordered sequence of action which have to be performed.
24DelayedAction = typing.Union[Action, typing.Tuple[Action, ...]]
25
26
27class StateAndTransitions:
28    """This is one state of the parse table, which has transitions based on
29    terminals (text), non-terminals (grammar rules) and epsilon (reduce).
30
31    In this model epsilon transitions are used to represent code to be executed
32    such as reduce actions and any others actions.
33    """
34
35    __slots__ = ["index", "locations", "terminals", "nonterminals", "errors",
36                 "epsilon", "delayed_actions", "arguments", "backedges", "_hash",
37                 "stable_hash"]
38
39    # Numerical index of this state.
40    index: StateId
41
42    # The stable_str of each LRItem we could be parsing in this state: the
43    # places in grammar productions that tell what we've already parsed,
44    # i.e. how we got to this state.
45    locations: OrderedFrozenSet[str]
46
47    # Ordered set of Actions which are pushed to the next state after a
48    # conflict.
49    delayed_actions: OrderedFrozenSet[DelayedAction]
50
51    # Number of argument of an action state.
52    #
53    # Instead of having action states with a non-empty replay list of terms, we
54    # have a non-empty list of argument which size is described by this
55    # variable.
56    arguments: int
57
58    # Outgoing edges taken when shifting terminals.
59    terminals: typing.Dict[str, StateId]
60
61    # Outgoing edges taken when shifting nonterminals after reducing.
62    nonterminals: typing.Dict[Nt, StateId]
63
64    # Error symbol transitions.
65    errors: typing.Dict[ErrorSymbol, StateId]
66
67    # List of epsilon transitions with associated actions.
68    epsilon: typing.List[typing.Tuple[Action, StateId]]
69
70    # Set of edges that lead to this state.
71    backedges: OrderedSet[Edge]
72
73    # Cached hash code. This class implements __hash__ and __eq__ in order to
74    # help detect equivalent states (which must be merged, for correctness).
75    _hash: int
76
77    # A hash code computed the same way as _hash, but used only for
78    # human-readable output. The stability is useful for debugging, to match
79    # states across multiple runs of the parser generator.
80    stable_hash: str
81
82    def __init__(
83            self,
84            index: StateId,
85            locations: OrderedFrozenSet[str],
86            delayed_actions: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(),
87            arguments: int = 0
88    ) -> None:
89        assert isinstance(locations, OrderedFrozenSet)
90        assert isinstance(delayed_actions, OrderedFrozenSet)
91        self.index = index
92        self.terminals = {}
93        self.nonterminals = {}
94        self.errors = {}
95        self.epsilon = []
96        self.locations = locations
97        self.delayed_actions = delayed_actions
98        self.arguments = arguments
99        self.backedges = OrderedSet()
100
101        # NOTE: The hash of a state depends on its location in the LR0
102        # parse-table, as well as the actions which have not yet been executed.
103        def hashed_content() -> typing.Iterator[object]:
104            for item in sorted(self.locations):
105                yield item
106                yield "\n"
107            yield "delayed_actions"
108            for action in self.delayed_actions:
109                yield hash(action)
110            yield "arguments"
111            yield arguments
112
113        self._hash = hash(tuple(hashed_content()))
114        h = hashlib.md5()
115        h.update("".join(map(str, hashed_content())).encode())
116        self.stable_hash = h.hexdigest()[:6]
117
118    def is_inconsistent(self) -> bool:
119        "Returns True if the state transitions are inconsistent."
120        # TODO: We could easily allow having a state with non-terminal
121        # transition and other epsilon transitions, as the non-terminal shift
122        # transitions are a form of condition based on the fact that a
123        # non-terminal, produced by a reduce action is consumed by the
124        # automaton.
125        if len(self.terminals) + len(self.nonterminals) + len(self.errors) > 0 and len(self.epsilon) > 0:
126            return True
127        elif len(self.epsilon) == 1:
128            if any(k.is_inconsistent() for k, s in self.epsilon):
129                return True
130        elif len(self.epsilon) > 1:
131            if any(k.is_inconsistent() for k, s in self.epsilon):
132                return True
133            # NOTE: We can accept multiple conditions as epsilon transitions
134            # iff they are checking the same variable with non-overlapping
135            # values. This implies that we can implement these conditions as a
136            # deterministic switch statement in the code emitter.
137            if any(not k.is_condition() for k, s in self.epsilon):
138                return True
139            iterator = iter(self.epsilon)
140            first, _ = next(iterator)
141            if any(not first.check_same_variable(k) for k, s in iterator):
142                return True
143            # "type: ignore" because mypy does not see that the preceding if-statement
144            # means all k.condition() actions are FilterFlags.
145            pairs = itertools.combinations((k for k, s in self.epsilon), 2)
146            if any(not k1.check_different_values(k2) for k1, k2 in pairs):
147                return True
148        else:
149            try:
150                self.get_error_symbol()
151            except ValueError:
152                return True
153        return False
154
155    def shifted_edges(self) -> typing.Iterator[
156            typing.Tuple[typing.Union[str, Nt, ErrorSymbol], StateId]
157    ]:
158        k: Term
159        s: StateId
160        for k, s in self.terminals.items():
161            yield (k, s)
162        for k, s in self.nonterminals.items():
163            yield (k, s)
164        for k, s in self.errors.items():
165            yield (k, s)
166
167    def edges(self) -> typing.Iterator[typing.Tuple[Term, StateId]]:
168        k: Term
169        s: StateId
170        for k, s in self.terminals.items():
171            yield (k, s)
172        for k, s in self.nonterminals.items():
173            yield (k, s)
174        for k, s in self.errors.items():
175            yield (k, s)
176        for k, s in self.epsilon:
177            yield (k, s)
178
179    def rewrite_state_indexes(
180            self,
181            state_map: typing.Dict[StateId, StateId]
182    ) -> None:
183        def apply_on_term(term: typing.Union[Term, None]) -> Term:
184            assert term is not None
185            if isinstance(term, Action):
186                return term.rewrite_state_indexes(state_map)
187            return term
188
189        self.index = state_map[self.index]
190        self.terminals = {
191            k: state_map[s] for k, s in self.terminals.items()
192        }
193        self.nonterminals = {
194            k: state_map[s] for k, s in self.nonterminals.items()
195        }
196        self.errors = {
197            k: state_map[s] for k, s in self.errors.items()
198        }
199        self.epsilon = [
200            (k.rewrite_state_indexes(state_map), state_map[s])
201            for k, s in self.epsilon
202        ]
203        # We cannot have multiple identical actions jumping to different locations.
204        assert len(self.epsilon) == len(set(k for k, _ in self.epsilon))
205        self.backedges = OrderedSet(
206            Edge(state_map[edge.src], apply_on_term(edge.term))
207            for edge in self.backedges
208        )
209
210    def get_error_symbol(self) -> typing.Optional[ErrorSymbol]:
211        if len(self.errors) > 1:
212            raise ValueError("More than one error symbol on the same state.")
213        else:
214            return next(iter(self.errors), None)
215
216    def __contains__(self, term: object) -> bool:
217        if isinstance(term, Action):
218            for t, s in self.epsilon:
219                if t == term:
220                    return True
221            return False
222        elif isinstance(term, Nt):
223            return term in self.nonterminals
224        elif isinstance(term, ErrorSymbol):
225            return term in self.errors
226        else:
227            return term in self.terminals
228
229    def __getitem__(self, term: Term) -> StateId:
230        if isinstance(term, Action):
231            for t, s in self.epsilon:
232                if t == term:
233                    return s
234            raise KeyError(term)
235        elif isinstance(term, Nt):
236            return self.nonterminals[term]
237        if isinstance(term, ErrorSymbol):
238            return self.errors[term]
239        else:
240            return self.terminals[term]
241
242    def get(self, term: Term, default: object) -> object:
243        try:
244            return self.__getitem__(term)
245        except KeyError:
246            return default
247
248    def stable_str(self, states: typing.List[StateAndTransitions]) -> str:
249        conflict = ""
250        if self.is_inconsistent():
251            conflict = " (inconsistent)"
252        return "{}{}:\n{}".format(self.stable_hash, conflict, "\n".join([
253            "\t{} --> {}".format(k, states[s].stable_hash) for k, s in self.edges()]))
254
255    def __str__(self) -> str:
256        conflict = ""
257        if self.is_inconsistent():
258            conflict = " (inconsistent)"
259        return "{}{}:\n{}".format(self.index, conflict, "\n".join([
260            "\t{} --> {}".format(k, s) for k, s in self.edges()]))
261
262    def __eq__(self, other: object) -> bool:
263        return (isinstance(other, StateAndTransitions)
264                and sorted(self.locations) == sorted(other.locations)
265                and sorted(self.delayed_actions) == sorted(other.delayed_actions)
266                and self.arguments == other.arguments)
267
268    def __hash__(self) -> int:
269        return self._hash
270
271
272DebugInfo = typing.Dict[StateId, int]
273
274
275class ParseTable:
276    """The parser can be represented as a matrix of state transitions where on one
277    side we have the current state, and on the other we have the expected
278    terminal, non-terminal or epsilon transition.
279
280            a   b   c   A   B   C   #1   #2   #3
281          +---+---+---+---+---+---+----+----+----+
282      s1  |   |   |   |   |   |   |    |    |    |
283      s2  |   |   |   |   |   |   |    |    |    |
284      s3  |   |   |   |   |   |   |    |    |    |
285       .  |   |   |   |   |   |   |    |    |    |
286       .  |   |   |   |   |   |   |    |    |    |
287       .  |   |   |   |   |   |   |    |    |    |
288      s67 |   |   |   |   |   |   |    |    |    |
289      s68 |   |   |   |   |   |   |    |    |    |
290      s69 |   |   |   |   |   |   |    |    |    |
291          +---+---+---+---+---+---+----+----+----+
292
293    The terminals `a` are the token which are read from the input. The
294    non-terminals `A` are the token which are pushed by the reduce actions of
295    the epsilon transitions. The epsilon transitions `#1` are the actions which
296    have to be executed as code by the parser.
297
298    A parse table is inconsistent if there is any state which has an epsilon
299    transitions and terminals/non-terminals transitions (shift-reduce
300    conflict), or a state with more than one epsilon transitions (reduce-reduce
301    conflict). This is equivalent to having a non deterministic state machine.
302
303    """
304
305    __slots__ = [
306        "actions", "states", "state_cache", "named_goals", "terminals",
307        "nonterminals", "debug_info", "exec_modes", "assume_inconsistent"
308    ]
309
310    # Map of actions identifier to the corresponding object.
311    actions: typing.List[Action]
312
313    # Map of state identifier to the corresponding object.
314    states: typing.List[StateAndTransitions]
315
316    # Hash table of state objects, ensuring we never have two equal states.
317    state_cache: typing.Dict[StateAndTransitions, StateAndTransitions]
318
319    # List of (Nt, states) tuples which are the entry point of the state
320    # machine.
321    named_goals: typing.List[typing.Tuple[Nt, StateId]]
322
323    # Set of all terminals.
324    terminals: OrderedFrozenSet[typing.Union[str, End]]
325
326    # List of non-terminals.
327    nonterminals: typing.List[Nt]
328
329    # Carry the info to be used when generating debug_context. If False,
330    # then no debug_context is ever produced.
331    debug_info: typing.Union[bool, DebugInfo]
332
333    # Execution modes are used by the code generator to decide which
334    # function is executed when. This is a dictionary of OrderedSet, where
335    # the keys are the various parsing modes, and the mapped set contains
336    # the list of traits which have to be implemented, and consequently
337    # which functions would be encoded.
338    exec_modes: typing.Optional[typing.DefaultDict[str, OrderedSet[types.Type]]]
339
340    # True if the parse table might be inconsistent. When this is False, we add
341    # extra assertions when computing the reduce path.
342    assume_inconsistent: bool
343
344    def __init__(
345            self,
346            grammar: CanonicalGrammar,
347            verbose: bool = False,
348            progress: bool = False,
349            debug: bool = False
350    ) -> None:
351        self.actions = []
352        self.states = []
353        self.state_cache = {}
354        self.named_goals = []
355        self.terminals = grammar.grammar.terminals
356        self.nonterminals = typing.cast(
357            typing.List[Nt],
358            list(grammar.grammar.nonterminals.keys()))
359
360        # typing.cast() doesn't actually check at run time, so let's do that:
361        assert all(isinstance(nt, Nt) for nt in self.nonterminals)
362
363        self.debug_info = debug
364        self.exec_modes = grammar.grammar.exec_modes
365        self.assume_inconsistent = True
366        self.create_lr0_table(grammar, verbose, progress)
367        self.fix_inconsistent_table(verbose, progress)
368        # TODO: Optimize chains of actions into sequences.
369        # Optimize by removing unused states.
370        self.remove_all_unreachable_state(verbose, progress)
371        # TODO: Statically compute replayed terms. (maybe?)
372        # Replace reduce actions by programmatic stack manipulation.
373        self.lower_reduce_actions(verbose, progress)
374        # Fold Replay followed by Unwind instruction.
375        self.fold_replay_unwind(verbose, progress)
376        # Fold paths which have the same ending.
377        self.fold_identical_endings(verbose, progress)
378        # Group state with similar non-terminal edges close-by, to improve the
379        # generated Rust code by grouping matched state numbers.
380        self.group_nonterminal_states(verbose, progress)
381        # Split shift states from epsilon states.
382        # self.group_epsilon_states(verbose, progress)
383
384    def save(self, filename: os.PathLike) -> None:
385        with open(filename, 'wb') as f:
386            pickle.dump(self, f)
387
388    @classmethod
389    def load(cls, filename: os.PathLike) -> ParseTable:
390        with open(filename, 'rb') as f:
391            obj = pickle.load(f)
392            if len(f.read()) != 0:
393                raise ValueError("file has unexpected extra bytes at end")
394        if not isinstance(obj, cls):
395            raise TypeError("file contains wrong kind of object: expected {}, got {}"
396                            .format(cls.__name__, obj.__class__.__name__))
397        return obj
398
399    def is_inconsistent(self) -> bool:
400        "Returns True if the grammar contains any inconsistent state."
401        for s in self.states:
402            if s is not None and s.is_inconsistent():
403                return True
404        return False
405
406    def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> None:
407        for s in self.states:
408            if s is not None:
409                s.rewrite_state_indexes(state_map)
410        self.named_goals = [
411            (nt, state_map[s]) for nt, s in self.named_goals
412        ]
413
414        # After a rewrite, multiple actions (conditions) might jump to the same
415        # target, attempt to fold these conditions based on having the same
416        # target. If we can merge them, then remove previous edges (updating
417        # the backedges of successor states) and replace them by the newly
418        # created edges.
419        for s in self.states:
420            if s is not None and len(s.epsilon) != 0:
421                epsilon_by_dest = collections.defaultdict(list)
422                for k, d in s.epsilon:
423                    epsilon_by_dest[d].append(k)
424                for d, ks in epsilon_by_dest.items():
425                    if len(ks) == 1:
426                        continue
427                    new_ks = ks[0].fold_by_destination(ks)
428                    if new_ks == ks:
429                        continue
430                    # This collection is required by `remove_edge`, but in this
431                    # particular case we know for sure that at least one edge
432                    # would be added back. Therefore no need to use the content
433                    # of the set.
434                    maybe_unreachable_set: OrderedSet[StateId] = OrderedSet()
435                    assert len(new_ks) > 0
436                    for k in ks:
437                        self.remove_edge(s, k, maybe_unreachable_set)
438                    for k in new_ks:
439                        self.add_edge(s, k, d)
440
441        self.assert_table_invariants()
442
443    def rewrite_reordered_state_indexes(self) -> None:
444        state_map = {
445            s.index: i
446            for i, s in enumerate(self.states)
447            if s is not None
448        }
449        self.rewrite_state_indexes(state_map)
450
451    def new_state(
452            self,
453            locations: OrderedFrozenSet[str],
454            delayed_actions: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(),
455            arguments: int = 0
456    ) -> typing.Tuple[bool, StateAndTransitions]:
457        """Get or create state with an LR0 location and delayed actions. Returns a tuple
458        where the first element is whether the element is newly created, and
459        the second element is the State object."""
460        index = len(self.states)
461        state = StateAndTransitions(index, locations, delayed_actions, arguments)
462        try:
463            return False, self.state_cache[state]
464        except KeyError:
465            self.state_cache[state] = state
466            self.states.append(state)
467            return True, state
468
469    def get_state(
470            self,
471            locations: OrderedFrozenSet[str],
472            delayed_actions: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(),
473            arguments: int = 0
474    ) -> StateAndTransitions:
475        """Like new_state(), but only returns the state without returning whether it is
476        newly created or not."""
477        _, state = self.new_state(locations, delayed_actions, arguments)
478        return state
479
480    def remove_state(self, s: StateId, maybe_unreachable_set: OrderedSet[StateId]) -> None:
481        state = self.states[s]
482        self.clear_edges(state, maybe_unreachable_set)
483        del self.state_cache[state]
484
485        # "type: ignore" because the type annotation on `states` doesn't allow
486        # entries to be `None`.
487        self.states[s] = None  # type: ignore
488
489    def add_edge(
490            self,
491            src: StateAndTransitions,
492            term: Term,
493            dest: StateId
494    ) -> None:
495        assert term not in src
496        assert dest < len(self.states)
497        if isinstance(term, Action):
498            src.epsilon.append((term, dest))
499        elif isinstance(term, Nt):
500            src.nonterminals[term] = dest
501        elif isinstance(term, ErrorSymbol):
502            src.errors[term] = dest
503        else:
504            src.terminals[term] = dest
505        self.states[dest].backedges.add(Edge(src.index, term))
506
507    def remove_backedge(
508            self,
509            src: StateAndTransitions,
510            term: Term,
511            dest: StateId,
512            maybe_unreachable_set: OrderedSet[StateId]
513    ) -> None:
514        self.states[dest].backedges.remove(Edge(src.index, term))
515        maybe_unreachable_set.add(dest)
516
517    def replace_edge(
518            self,
519            src: StateAndTransitions,
520            term: Term,
521            dest: StateId,
522            maybe_unreachable_set: OrderedSet[StateId]
523    ) -> None:
524        assert isinstance(dest, int) and dest < len(self.states)
525
526        edge_existed = term in src
527        if edge_existed:
528            old_dest = src[term]
529            self.remove_backedge(src, term, old_dest, maybe_unreachable_set)
530
531        if isinstance(term, Action):
532            src.epsilon = [(t, d) for t, d in src.epsilon if t != term]
533            src.epsilon.append((term, dest))
534        elif isinstance(term, Nt):
535            src.nonterminals[term] = dest
536        elif isinstance(term, ErrorSymbol):
537            src.errors[term] = dest
538        else:
539            src.terminals[term] = dest
540        self.states[dest].backedges.add(Edge(src.index, term))
541
542        self.assert_state_invariants(src)
543        self.assert_state_invariants(dest)
544        if edge_existed:
545            self.assert_state_invariants(old_dest)
546
547    def remove_edge(
548            self,
549            src: StateAndTransitions,
550            term: Term,
551            maybe_unreachable_set: OrderedSet[StateId]
552    ) -> None:
553        edge_existed = term in src
554        if edge_existed:
555            old_dest = src[term]
556            self.remove_backedge(src, term, old_dest, maybe_unreachable_set)
557        if isinstance(term, Action):
558            src.epsilon = [(t, d) for t, d in src.epsilon if t != term]
559        elif isinstance(term, Nt):
560            del src.nonterminals[term]
561        elif isinstance(term, ErrorSymbol):
562            del src.errors[term]
563        else:
564            del src.terminals[term]
565
566        self.assert_state_invariants(src)
567        if edge_existed:
568            self.assert_state_invariants(old_dest)
569
570    def clear_edges(
571            self,
572            src: StateAndTransitions,
573            maybe_unreachable_set: OrderedSet[StateId]
574    ) -> None:
575        """Remove all existing edges, in order to replace them by new one. This is used
576        when resolving shift-reduce conflicts."""
577        assert isinstance(src, StateAndTransitions)
578        old_dest = []
579        for term, dest in src.edges():
580            self.remove_backedge(src, term, dest, maybe_unreachable_set)
581            old_dest.append(dest)
582        src.terminals = {}
583        src.nonterminals = {}
584        src.errors = {}
585        src.epsilon = []
586        self.assert_state_invariants(src)
587        for dest in old_dest:
588            self.assert_state_invariants(dest)
589
590    def assert_table_invariants(self) -> None:
591        for s in self.states:
592            if s is not None:
593                self.assert_state_invariants(s)
594
595    def assert_state_invariants(self, src: typing.Union[StateId, StateAndTransitions]) -> None:
596        if not self.debug_info:
597            return
598        if isinstance(src, int):
599            src = self.states[src]
600        assert isinstance(src, StateAndTransitions)
601        try:
602            for term, dest in src.edges():
603                assert Edge(src.index, term) in self.states[dest].backedges
604            for e in src.backedges:
605                assert e.term is not None
606                assert self.states[e.src][e.term] == src.index
607            if not self.assume_inconsistent:
608                assert not src.is_inconsistent()
609        except AssertionError as exc:
610            print("assert_state_inveriants for {}\n".format(src))
611            for e in src.backedges:
612                print("backedge {} from {}\n".format(e, self.states[e.src]))
613            raise exc
614
615    def remove_unreachable_states(
616            self,
617            maybe_unreachable_set: OrderedSet[StateId]
618    ) -> None:
619        # TODO: This function is incomplete in case of loops, some cycle might
620        # remain isolated while not being reachable from the init states. We
621        # should maintain a notion of depth per-state, such that we can
622        # identify loops by noticing the all backedges have a larger depth than
623        # the current state.
624        init: OrderedSet[StateId]
625        init = OrderedSet(goal for name, goal in self.named_goals)
626        while maybe_unreachable_set:
627            next_set: OrderedSet[StateId] = OrderedSet()
628            for s in maybe_unreachable_set:
629                # Check if the state is reachable, if not remove the state and
630                # fill the next_set with all outgoing edges.
631                if len(self.states[s].backedges) == 0 and s not in init:
632                    self.remove_state(s, next_set)
633            maybe_unreachable_set = next_set
634
635    def is_reachable_state(self, s: StateId) -> bool:
636        """Check whether the current state is reachable or not."""
637        if self.states[s] is None:
638            return False
639        reachable_back: OrderedSet[StateId] = OrderedSet()
640        todo = [s]
641        while todo:
642            s = todo.pop()
643            reachable_back.add(s)
644            for edge in self.states[s].backedges:
645                if edge.src not in reachable_back:
646                    todo.append(edge.src)
647        for _, s in self.named_goals:
648            if s in reachable_back:
649                return True
650        return False
651
652    def debug_dump(self) -> None:
653        # Sort the grammar by state hash, such that it can be compared
654        # before/after grammar modifications.
655        temp = [s for s in self.states if s is not None]
656        temp = sorted(temp, key=lambda s: s.stable_hash)
657        for s in temp:
658            print(s.stable_str(self.states))
659
660    def create_lr0_table(
661            self,
662            grammar: CanonicalGrammar,
663            verbose: bool,
664            progress: bool
665    ) -> None:
666        if verbose or progress:
667            print("Create LR(0) parse table.")
668
669        goals = grammar.grammar.goals()
670        self.named_goals = []
671
672        # Temporary work queue.
673        todo: typing.Deque[typing.Tuple[LR0Generator, StateAndTransitions]]
674        todo = collections.deque()
675
676        # Record the starting goals in the todo list.
677        for nt in goals:
678            init_nt = Nt(InitNt(nt), ())
679            it = LR0Generator.start(grammar, init_nt)
680            s = self.get_state(it.stable_locations())
681            todo.append((it, s))
682            self.named_goals.append((nt, s.index))
683
684        # Iterate the grammar with sets of LR Items abstracted by the
685        # LR0Generator, and create new states in the parse table as long as new
686        # sets of LR Items are discovered.
687        def visit_grammar() -> typing.Iterator[None]:
688            while todo:
689                yield  # progress bar.
690                # TODO: Compare stack / queue, for the traversal of the states.
691                s_it, s = todo.popleft()
692                if verbose:
693                    print("\nMapping state {} to LR0:\n{}".format(s.stable_hash, s_it))
694                for k, sk_it in s_it.transitions().items():
695                    locations = sk_it.stable_locations()
696                    if not self.term_is_shifted(k):
697                        locations = OrderedFrozenSet()
698                    is_new, sk = self.new_state(locations)
699                    if is_new:
700                        todo.append((sk_it, sk))
701
702                    # Add the edge from s to sk with k.
703                    self.add_edge(s, k, sk.index)
704
705        consume(visit_grammar(), progress)
706
707        if verbose:
708            print("Create LR(0) Table Result:")
709            self.debug_dump()
710
711    def term_is_shifted(self, term: typing.Optional[Term]) -> bool:
712        return not isinstance(term, Action) or term.follow_edge()
713
714    def is_valid_path(
715            self,
716            path: typing.Sequence[Edge],
717            state: typing.Optional[StateId] = None
718    ) -> bool:
719        """This function is used to check a list of edges and returns whether it
720        corresponds to a valid path within the parse table. This is useful when
721        merging sequences of edges from various locations."""
722        if not state and path != []:
723            state = path[0].src
724        while path:
725            edge = path[0]
726            path = path[1:]
727            if state != edge.src:
728                return False
729            assert isinstance(state, StateId)
730
731            term = edge.term
732            if term is None and len(path) == 0:
733                return True
734
735            row = self.states[state]
736            if term not in row:
737                return False
738            assert term is not None
739            state = row[term]
740        return True
741
742    def term_is_stacked(self, term: typing.Optional[Term]) -> bool:
743        # The `term` argument is annotated as Optional because `Edge.term` is a
744        # common argument. If it's ever None in practice, the caller has a bug.
745        assert term is not None
746
747        return not isinstance(term, Action)
748
749    def aps_visitor(self, aps: APS, visit: typing.Callable[[APS], bool]) -> None:
750        """Visit all the states of the parse table, as-if we were running a
751        Generalized LR parser.
752
753        However, instead parsing content, we use this algorithm to generate
754        both the content which remains to be parsed as well as the context
755        which might lead us to be in the state which from which we started.
756
757        This algorithm takes an APS (Abstract Parser State) and a callback, and
758        consider all edges of the parse table, unless restricted by one of the
759        previously encountered actions. These restrictions, such as replayed
760        lookahead or the path which might be reduced are used for filtering out
761        states which are not handled by this parse table.
762
763        For each edge, this functions calls the visit functions to know whether
764        to stop or continue. The visit function might capture APS given as
765        argument to be used for other analysis.
766
767        """
768        todo = [aps]
769        while todo:
770            aps = todo.pop()
771            cont = visit(aps)
772            if not cont:
773                continue
774            todo.extend(aps.shift_next(self))
775
776    def context_lanes(self, state: StateId) -> typing.Tuple[bool, typing.List[APS]]:
777        """Compute lanes, such that each reduce action can have set of unique stack to
778        reach the given state. The stacks are assumed to be loop-free by
779        reducing edges at most once.
780
781        In order to avoid attempting to eagerly solve everything using context
782        information, we break this loop as soon as we have one token of
783        lookahead in a case which does not have enough context.
784
785        The return value is a tuple where the first element is a boolean which
786        is True if we should fallback on solving this issue with more
787        lookahead, and the second is the list of APS lanes which are providing
788        enough context to disambiguate the inconsistency of the given state."""
789
790        def not_interesting(aps: APS) -> bool:
791            reduce_list = [e for e in aps.history if self.term_is_shifted(e.term)]
792            has_reduce_loop = len(reduce_list) != len(set(reduce_list))
793            return has_reduce_loop
794
795        # The context is a dictionary which maps all stack suffixes from an APS
796        # stack. It is mapped to a list of tuples, where the each tuple is the
797        # index with the APS stack and the APS action used to follow this path.
798        context: typing.DefaultDict[typing.Tuple[Edge, ...], typing.List[Edge]]
799        context = collections.defaultdict(lambda: [])
800
801        def has_enough_context(aps: APS) -> bool:
802            try:
803                assert aps.history[0] in context[tuple(aps.stack)]
804                # Check the number of different actions which can reach this
805                # location. If there is more than 1, then we do not have enough
806                # context.
807                return len(set(context[tuple(aps.stack)])) <= 1
808            except IndexError:
809                return False
810
811        collect = [APS.start(state)]
812        enough_context = False
813        while not enough_context:
814            # print("collect.len = {}".format(len(collect)))
815            # Fill the context dictionary with all the sub-stack which might be
816            # encountered by other APS.
817            recurse = []
818            context = collections.defaultdict(lambda: [])
819            while collect:
820                aps = collect.pop()
821                recurse.append(aps)
822                if aps.history == []:
823                    continue
824                for i in range(len(aps.stack)):
825                    context[tuple(aps.stack[i:])].append(aps.history[0])
826            assert collect == []
827
828            # print("recurse.len = {}".format(len(recurse)))
829            # Iterate over APS which do not yet have enough context information
830            # to uniquely identify a single action.
831            enough_context = True
832            while recurse:
833                aps = recurse.pop()
834                if not_interesting(aps):
835                    # print("discard uninteresting context lane:")
836                    # print(aps.string("\tcontext"))
837                    continue
838                if has_enough_context(aps):
839                    collect.append(aps)
840                    continue
841                # If we have not enough context but some lookahead is
842                # available, attempt to first solve this issue using more
843                # lookahead before attempting to use context information.
844                if len(aps.lookahead) >= 1:
845                    # print("discard context_lanes due to lookahead:")
846                    # for aps in itertools.chain(collect, recurse, [aps]):
847                    #     print(aps.string("\tcontext"))
848                    return True, []
849                enough_context = False
850                # print("extend starting at:\n{}".format(aps.string("\tcontext")))
851                collect.extend(aps.shift_next(self))
852            assert recurse == []
853
854        # print("context_lanes:")
855        # for aps in collect:
856        #     print(aps.string("\tcontext"))
857
858        return False, collect
859
860    def lookahead_lanes(self, state: StateId) -> typing.List[APS]:
861        """Compute lanes to collect all lookahead symbols available. After each reduce
862        action, there is no need to consider the same non-terminal multiple
863        times, we are only interested in lookahead token and not in the context
864        information provided by reducing action."""
865
866        record = []
867        # After the first reduce action, we do not want to spend too much
868        # resource visiting edges which would give us the same information.
869        # Therefore, if we already reduce an action to a given state, then we
870        # skip looking for lookahead that we already visited.
871        #
872        # Set of (first-reduce-edge, reducing-base, last-reduce-edge)
873        seen_edge_after_reduce: typing.Set[typing.Tuple[Edge, StateId, typing.Optional[Term]]]
874        seen_edge_after_reduce = set()
875
876        def find_first_reduce(
877                edges: Path
878        ) -> typing.Tuple[int, typing.Optional[Edge]]:
879            for i, edge in enumerate(edges):
880                if not self.term_is_shifted(edge.term):
881                    return i, edge
882            return 0, None
883
884        def find_last_reduce(
885                edges: Path
886        ) -> typing.Tuple[int, typing.Optional[Edge]]:
887            for i, edge in zip(reversed(range(len(edges))), reversed(edges)):
888                if not self.term_is_shifted(edge.term):
889                    return i, edge
890            return 0, None
891
892        def visit(aps: APS) -> bool:
893            # Note, this suppose that we are not considering flags when
894            # computing, as flag might prevent some lookahead investigations.
895            reduce_key = None
896            first_index, first_reduce = find_first_reduce(aps.history)
897            last_index, last_reduce = find_last_reduce(aps.history)
898            if first_index != last_index and first_reduce and last_reduce:
899                if not isinstance(aps.history[-1].term, Action):
900                    reduce_key = (first_reduce, aps.shift[0].src, last_reduce.term)
901            has_seen_edge_after_reduce = reduce_key and reduce_key in seen_edge_after_reduce
902            has_lookahead = len(aps.lookahead) >= 1
903            stop = has_seen_edge_after_reduce or has_lookahead
904            # print("stop: {}, size lookahead: {}, seen_edge_after_reduce: {}".format(
905            #     stop, len(aps.lookahead), repr(reduce_key)
906            # ))
907            # print(aps.string("\tvisitor"))
908            if stop:
909                if has_lookahead:
910                    record.append(aps)
911            if reduce_key:
912                seen_edge_after_reduce.add(reduce_key)
913            return not stop
914
915        self.aps_visitor(APS.start(state), visit)
916        return record
917
918    def fix_with_context(self, s: StateId, aps_lanes: typing.List[APS]) -> None:
919        raise ValueError("fix_with_context: Not Implemented")
920    #     # This strategy is about using context information. By using chains of
921    #     # reduce actions, we are able to increase the knowledge of the stack
922    #     # content. The stack content is the context which can be used to
923    #     # determine how to consider a reduction. The stack content is also
924    #     # called a lane, as defined in the Lane Table algorithm.
925    #     #
926    #     # To add context information to the current graph, we add flags
927    #     # manipulation actions.
928    #     #
929    #     # Consider each edge as having an implicit function which can map one
930    #     # flag value to another. The following implements a unification
931    #     # algorithm which is attempting to solve the question of what is the
932    #     # flag value, and where it should be changed.
933    #     #
934    #     # NOTE: (nbp) I would not be surprised if there is a more specialized
935    #     # algorithm, but I failed to find one so far, and this problem
936    #     # definitely looks like a unification problem.
937    #     Id = collections.namedtuple("Id", "edge")
938    #     Eq = collections.namedtuple("Eq", "flag_in edge flag_out")
939    #     Var = collections.namedtuple("Var", "n")
940    #     SubSt = collections.namedtuple("SubSt", "var by")
941    #
942    #     # Unify expression, and return one substitution if both expressions
943    #     # can be unified.
944    #     def unify_expr(expr1, expr2, swapped=False):
945    #         if isinstance(expr1, Eq) and isinstance(expr2, Id):
946    #             if expr1.edge != expr2.edge:
947    #                 # Different edges are ok, but produce no substituions.
948    #                 return True
949    #             if isinstance(expr1.flag_in, Var):
950    #                 return SubSt(expr1.flag_in, expr1.flag_out)
951    #             if isinstance(expr1.flag_out, Var):
952    #                 return SubSt(expr1.flag_out, expr1.flag_in)
953    #             # We are unifying with a relation which consider the current
954    #             # function as an identity function. Having different values as
955    #             # input and output fails the unification rule.
956    #             return expr1.flag_out == expr1.flag_in
957    #         if isinstance(expr1, Eq) and isinstance(expr2, Eq):
958    #             if expr1.edge != expr2.edge:
959    #                 # Different edges are ok, but produce no substituions.
960    #                 return True
961    #             if expr1.flag_in is None and isinstance(expr2.flag_in, Var):
962    #                 return SubSt(expr2.flag_in, None)
963    #             if expr1.flag_out is None and isinstance(expr2.flag_out, Var):
964    #                 return SubSt(expr2.flag_out, None)
965    #             if expr1.flag_in == expr2.flag_in:
966    #                 if isinstance(expr1.flag_out, Var):
967    #                     return SubSt(expr1.flag_out, expr2.flag_out)
968    #                 elif isinstance(expr2.flag_out, Var):
969    #                     return SubSt(expr2.flag_out, expr1.flag_out)
970    #                 # Reject solutions which are not deterministic. We do not
971    #                 # want the same input flag to have multiple outputs.
972    #                 return expr1.flag_out == expr2.flag_out
973    #             if expr1.flag_out == expr2.flag_out:
974    #                 if isinstance(expr1.flag_in, Var):
975    #                     return SubSt(expr1.flag_in, expr2.flag_in)
976    #                 elif isinstance(expr2.flag_in, Var):
977    #                     return SubSt(expr2.flag_in, expr1.flag_in)
978    #                 return True
979    #         if not swapped:
980    #             return unify_expr(expr2, expr1, True)
981    #         return True
982    #
983    #     # Apply substituion rule to an expression.
984    #     def subst_expr(subst, expr):
985    #         if expr == subst.var:
986    #             return True, subst.by
987    #         if isinstance(expr, Eq):
988    #             subst1, flag_in = subst_expr(subst, expr.flag_in)
989    #             subst2, flag_out = subst_expr(subst, expr.flag_out)
990    #             return subst1 or subst2, Eq(flag_in, expr.edge, flag_out)
991    #         return False, expr
992    #
993    #     # Add an expression to an existing knowledge based which is relying on
994    #     # a set of free variables.
995    #     def unify_with(expr, knowledge, free_vars):
996    #         old_knowledge = knowledge
997    #         old_free_Vars = free_vars
998    #         while True:
999    #             subst = None
1000    #             for rel in knowledge:
1001    #                 subst = unify_expr(rel, expr)
1002    #                 if subst is False:
1003    #                     raise Error("Failed to find a coherent solution")
1004    #                 if subst is True:
1005    #                     continue
1006    #                 break
1007    #             else:
1008    #                 return knowledge + [expr], free_vars
1009    #             free_vars = [fv for fv in free_vars if fv != subst.var]
1010    #             # Substitue variables, and re-add rules which have substituted
1011    #             # vars to check the changes to other rules, in case 2 rules are
1012    #             # now in conflict or in case we can propagate more variable
1013    #             # changes.
1014    #             subst_rules = [subst_expr(subst, k) for k in knowledge]
1015    #             knowledge = [rule for changed, rule in subst_rule if not changed]
1016    #             for changed, rule in subst_rule:
1017    #                 if not changed:
1018    #                     continue
1019    #                 knowledge, free_vars = unify_with(rule, knowledge, free_vars)
1020    #
1021    #     # Register boundary conditions as part of the knowledge based, i-e that
1022    #     # reduce actions are expecting to see the flag value matching the
1023    #     # reduced non-terminal, and that we have no flag value at the start of
1024    #     # every lane head.
1025    #     #
1026    #     # TODO: Catch exceptions from the unify function in case we do not yet
1027    #     # have enough context to disambiguate.
1028    #     rules = []
1029    #     free_vars = []
1030    #     last_free = 0
1031    #     maybe_id_edges = set()
1032    #     nts = set()
1033    #     for aps in aps_lanes:
1034    #         assert len(aps.stack) >= 1
1035    #         flag_in = None
1036    #         for edge in aps.stack[-1]:
1037    #             i = last_free
1038    #             last_free += 1
1039    #             free_vars.append(Var(i))
1040    #             rule = Eq(flag_in, edge, Var(i))
1041    #             rules, free_vars = unify_with(rule, rules, free_vars)
1042    #             flag_in = Var(i)
1043    #             if flag_in is not None:
1044    #                 maybe_id_edges.add(Id(edge))
1045    #         edge = aps.stack[-1]
1046    #         nt = edge.term.update_stack_with().nt
1047    #         rule = Eq(nt, edge, None)
1048    #         rules, free_vars = unify_with(rule, rules, free_vars)
1049    #         nts.add(nt)
1050    #
1051    #     # We want to produce a parse table where most of the node are ignoring
1052    #     # the content of the flag which is being added. Thus we want to find a
1053    #     # solution where most edges are the identical function.
1054    #     def fill_with_id_functions(rules, free_vars, maybe_id_edges):
1055    #         min_rules, min_vars = rules, free_vars
1056    #         for num_id_edges in reversed(range(len(maybe_id_edges))):
1057    #             for id_edges in itertools.combinations(edges, num_id_edges):
1058    #                 for edge in id_edges:
1059    #                     new_rules, new_free_vars = unify_with(rule, rules, free_vars)
1060    #                     if new_free_vars == []:
1061    #                         return new_rules, new_free_vars
1062    #                     if len(new_free_vars) < len(min_free_vars):
1063    #                         min_vars = new_free_vars
1064    #                         min_rules = new_rules
1065    #         return rules, free_vars
1066    #
1067    #     rules, free_vars = fill_with_id_functions(rules, free_vars, maybe_id_edges)
1068    #     if free_vars != []:
1069    #         raise Error("Hum … maybe we can try to iterate over the remaining free-variable.")
1070    #     print("debug: Great we found a solution for a reduce-reduce conflict")
1071    #
1072    #     # The set of rules describe the function that each edge is expected to
1073    #     # support. If there is an Id(edge), then we know that we do not have to
1074    #     # change the graph for the given edge. If the rule is Eq(A, edge, B),
1075    #     # then we have to (filter A & pop) and push B, except if A or B is
1076    #     # None.
1077    #     #
1078    #     # For each edge, collect the set of rules concerning the edge to
1079    #     # determine which edges have to be transformed to add the filter&pop
1080    #     # and push actions.
1081    #     edge_rules = collections.defaultdict(lambda: [])
1082    #     for rule in rules:
1083    #         if isinstance(rule, Id):
1084    #             edge_rules[rule.edge] = None
1085    #         elif isinstance(rule, Eq):
1086    #             if edge_rules[rule.edge] is not None:
1087    #                 edge_rules[rule.edge].append(rule)
1088    #
1089    #     maybe_unreachable_set = set()
1090    #     flag_name = self.get_flag_for(nts)
1091    #     for edge, rules in edge_rules.items():
1092    #         # If the edge is an identity function, then skip doing any
1093    #         # modifications on it.
1094    #         if rules is None:
1095    #             continue
1096    #         # Otherwise, create a new state and transition for each mapping.
1097    #         src = self.states[edge.src]
1098    #         dest = src[edge.term]
1099    #         dest_state = self.states[dest]
1100    #         # TODO: Add some information to avoid having identical hashes as
1101    #         # the destination.
1102    #         actions = []
1103    #         for rule in OrderedFrozenSet(rules):
1104    #             assert isinstance(rule, Eq)
1105    #             seq = []
1106    #             if rule.flag_in is not None:
1107    #                 seq.append(FilterFlag(flag_name, True))
1108    #                 if rule.flag_in != rule.flag_out:
1109    #                     seq.append(PopFlag(flag_name))
1110    #             if rule.flag_out is not None and rule.flag_in != rule.flag_out:
1111    #                 seq.append(PushFlag(flag_name, rule.flag_out))
1112    #             actions.append(Seq(seq))
1113    #         # Assert that we do not map flag_in more than once.
1114    #         assert len(set(eq.flag_in for eq in rules)) < len(rules)
1115    #         # Create the new state and add edges.
1116    #         is_new, switch = self.new_state(dest.locations, OrderedFrozenSet(actions))
1117    #         assert is_new
1118    #         for seq in actions:
1119    #             self.add_edge(switch, seq, dest)
1120    #
1121    #         # Replace the edge from src to dest, by an edge from src to the
1122    #         # newly created switch state, which then decide which flag to set
1123    #         # before going to the destination target.
1124    #         self.replace_edge(src, edge.term, switch, maybe_unreachable_set)
1125    #
1126    #     self.remove_unreachable_states(maybe_unreachable_set)
1127    #     pass
1128
1129    def fix_with_lookahead(self, s: StateId, aps_lanes: typing.List[APS]) -> None:
1130        # Find the list of terminals following each actions (even reduce
1131        # actions).
1132        assert all(len(aps.lookahead) >= 1 for aps in aps_lanes)
1133        if self.debug_info:
1134            for aps in aps_lanes:
1135                print(str(aps))
1136        maybe_unreachable_set: OrderedSet[StateId] = OrderedSet()
1137
1138        # For each shifted term, associate a set of state and actions which
1139        # would have to be executed.
1140        shift_map: typing.DefaultDict[
1141            Term,
1142            typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]]
1143        ]
1144        shift_map = collections.defaultdict(lambda: [])
1145        for aps in aps_lanes:
1146            actions = aps.history
1147            assert isinstance(actions[-1], Edge)
1148            src = actions[-1].src
1149            term = actions[-1].term
1150            assert term == aps.lookahead[0]
1151            assert isinstance(term, (str, End, ErrorSymbol, Nt))
1152
1153            # No need to consider any action beyind the first reduced action
1154            # since the reduced action is in charge of replaying the lookahead
1155            # terms.
1156            actions = list(keep_until(actions[:-1], lambda edge: not self.term_is_shifted(edge.term)))
1157            assert all(isinstance(edge.term, Action) for edge in actions)
1158
1159            # Change the order of the shifted term, shift all actions by 1 with
1160            # the given lookahead term, in order to match the newly generated
1161            # state machine.
1162            #
1163            # Shifting actions with the list of shifted terms is used to record
1164            # the number of terms to be replayed, as well as verifying whether
1165            # Lookahead filter actions should accept or reject this lane.
1166            new_actions = []
1167            accept = True
1168            for edge in actions:
1169                edge_term = edge.term
1170                assert isinstance(edge_term, Action)
1171                new_term = edge_term.shifted_action(term)
1172                if isinstance(new_term, bool):
1173                    if new_term is False:
1174                        accept = False
1175                        break
1176                    else:
1177                        continue
1178                new_actions.append(Edge(edge.src, new_term))
1179            if accept:
1180                target_id = self.states[src][term]
1181                target = self.states[target_id]
1182                shift_map[term].append((target, new_actions))
1183
1184        # Restore the new state machine based on a given state to use as a base
1185        # and the shift_map corresponding to edges.
1186        def restore_edges(
1187                state: StateAndTransitions,
1188                shift_map: typing.DefaultDict[
1189                    Term,
1190                    typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]]
1191                ],
1192                depth: str
1193        ) -> None:
1194            # print("{}starting with {}\n".format(depth, state))
1195            edges = {}
1196            for term, actions_list in shift_map.items():
1197                # print("{}term: {}, lists: {}\n".format(depth, repr(term), repr(actions_list)))
1198                # Collect all the states reachable after shifting the term.
1199                # Compute the unique name, based on the locations and actions
1200                # which are delayed.
1201                locations: OrderedSet[str] = OrderedSet()
1202                delayed: OrderedSet[DelayedAction] = OrderedSet()
1203                new_shift_map: typing.DefaultDict[
1204                    Term,
1205                    typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]]
1206                ]
1207                new_shift_map = collections.defaultdict(lambda: [])
1208                recurse = False
1209                if not self.term_is_shifted(term):
1210                    # There is no more target after a reduce action.
1211                    actions_list = []
1212                for target, actions in actions_list:
1213                    assert isinstance(target, StateAndTransitions)
1214                    locations |= target.locations
1215                    delayed |= target.delayed_actions
1216                    if actions != []:
1217                        # Pull edges, with delayed actions.
1218                        edge = actions[0]
1219                        assert isinstance(edge, Edge)
1220                        for action in actions:
1221                            action_term = action.term
1222                            assert isinstance(action_term, Action)
1223                            delayed.add(action_term)
1224                        edge_term = edge.term
1225                        assert edge_term is not None
1226                        new_shift_map[edge_term].append((target, actions[1:]))
1227                        recurse = True
1228                    else:
1229                        # Pull edges, as a copy of existing edges.
1230                        for next_term, next_dest_id in target.edges():
1231                            next_dest = self.states[next_dest_id]
1232                            new_shift_map[next_term].append((next_dest, []))
1233
1234                is_new, new_target = self.new_state(
1235                    OrderedFrozenSet(locations), OrderedFrozenSet(delayed))
1236                edges[term] = new_target.index
1237                if self.debug_info:
1238                    print("{}is_new = {}, index = {}".format(depth, is_new, new_target.index))
1239                    print("{}Add: {} -- {} --> {}".format(depth, state.index, str(term), new_target.index))
1240                    print("{}continue: (is_new: {}) or (recurse: {})".format(depth, is_new, recurse))
1241                if is_new or recurse:
1242                    restore_edges(new_target, new_shift_map, depth + "  ")
1243
1244            self.clear_edges(state, maybe_unreachable_set)
1245            for term, target_id in edges.items():
1246                self.add_edge(state, term, target_id)
1247            if self.debug_info:
1248                print("{}replaced by {}\n".format(depth, state))
1249
1250        state = self.states[s]
1251        restore_edges(state, shift_map, "")
1252        self.remove_unreachable_states(maybe_unreachable_set)
1253
1254    def fix_inconsistent_state(self, s: StateId, verbose: bool) -> bool:
1255        # Fix inconsistent states works one state at a time. The goal is to
1256        # achieve the same method as the Lane Tracer, but instead of building a
1257        # table to then mutate the parse state, we mutate the parse state
1258        # directly.
1259        #
1260        # This strategy is simpler, and should be able to reproduce the same
1261        # graph mutations as seen with Lane Table algorithm. One of the problem
1262        # with the Lane Table algorithm is that it assume reduce operations,
1263        # and as such it does not apply simply to epsilon transitions which are
1264        # used as conditions on the parse table.
1265        #
1266        # By using push-flag and filter-flag actions, we are capable to
1267        # decompose the Lane Table transformation of the parse table into
1268        # multiple steps which are working one step at a time, and with less
1269        # table state duplication.
1270
1271        state = self.states[s]
1272        if state is None or not state.is_inconsistent():
1273            return False
1274
1275        all_reduce = all(a.update_stack() for a, _ in state.epsilon)
1276        any_shift = (len(state.terminals) + len(state.nonterminals) + len(state.errors)) > 0
1277        try_with_context = all_reduce and not any_shift
1278        try_with_lookahead = not try_with_context
1279        # if verbose:
1280        #     print(aps_lanes_str(aps_lanes, "fix_inconsistent_state:", "\taps"))
1281        if try_with_context:
1282            if verbose:
1283                print("\tFix with context.")
1284            try_with_lookahead, aps_lanes = self.context_lanes(s)
1285            if not try_with_lookahead:
1286                assert aps_lanes != []
1287                self.fix_with_context(s, aps_lanes)
1288            elif verbose:
1289                print("\tFallback on fixing with lookahead.")
1290        if try_with_lookahead:
1291            if verbose:
1292                print("\tFix with lookahead.")
1293            aps_lanes = self.lookahead_lanes(s)
1294            assert aps_lanes != []
1295            self.fix_with_lookahead(s, aps_lanes)
1296        return True
1297
1298    def fix_inconsistent_table(self, verbose: bool, progress: bool) -> None:
1299        """The parse table might be inconsistent. We fix the parse table by looking
1300        around the inconsistent states for more context. Either by looking at the
1301        potential stack state which might lead to the inconsistent state, or by
1302        increasing the lookahead."""
1303        self.assume_inconsistent = True
1304        if verbose or progress:
1305            print("Fix parse table inconsistencies.")
1306
1307        todo: typing.Deque[StateId] = collections.deque()
1308        for state in self.states:
1309            if state.is_inconsistent():
1310                todo.append(state.index)
1311
1312        if verbose and todo:
1313            print("\n".join([
1314                "\nGrammar is inconsistent.",
1315                "\tNumber of States = {}",
1316                "\tNumber of inconsistencies found = {}"]).format(
1317                    len(self.states), len(todo)))
1318
1319        count = 0
1320
1321        def visit_table() -> typing.Iterator[None]:
1322            nonlocal count
1323            unreachable = []
1324            while todo:
1325                while todo:
1326                    yield  # progress bar.
1327                    # TODO: Compare stack / queue, for the traversal of the states.
1328                    s = todo.popleft()
1329                    if not self.is_reachable_state(s):
1330                        # NOTE: We do not fix unreachable states, as we might
1331                        # not be able to compute the reduce actions. However,
1332                        # we should not clean edges not backedges as the state
1333                        # might become reachable later on, since states are
1334                        # shared if they have the same locations.
1335                        unreachable.append(s)
1336                        continue
1337                    assert self.states[s].is_inconsistent()
1338                    start_len = len(self.states)
1339                    if verbose:
1340                        count = count + 1
1341                        print("Fixing state {}\n".format(self.states[s].stable_str(self.states)))
1342                    try:
1343                        self.fix_inconsistent_state(s, verbose)
1344                    except Exception as exc:
1345                        self.debug_info = True
1346                        raise ValueError(
1347                            "Error while fixing conflict in state {}\n\n"
1348                            "In the following grammar productions:\n{}"
1349                            .format(self.states[s].stable_str(self.states),
1350                                    self.debug_context(s, "\n", "\t"))
1351                        ) from exc
1352                    new_inconsistent_states = [
1353                        s.index for s in self.states[start_len:]
1354                        if s.is_inconsistent()
1355                    ]
1356                    if verbose:
1357                        print("\tAdding {} states".format(len(self.states[start_len:])))
1358                        print("\tWith {} inconsistent states".format(len(new_inconsistent_states)))
1359                    todo.extend(new_inconsistent_states)
1360
1361                # Check whether none of the previously inconsistent and
1362                # unreahable state became reachable. If so add it back to the
1363                # todo list.
1364                still_unreachable = []
1365                for s in unreachable:
1366                    if self.is_reachable_state(s):
1367                        todo.append(s)
1368                    else:
1369                        still_unreachable.append(s)
1370                unreachable = still_unreachable
1371
1372        consume(visit_table(), progress)
1373        if verbose:
1374            print("\n".join([
1375                "\nGrammar is now consistent.",
1376                "\tNumber of States = {}",
1377                "\tNumber of inconsistencies solved = {}"]).format(
1378                    len(self.states), count))
1379        assert not self.is_inconsistent()
1380        self.assume_inconsistent = False
1381
1382        if verbose:
1383            print("Fix Inconsistent Table Result:")
1384            self.debug_dump()
1385
1386    def remove_all_unreachable_state(self, verbose: bool, progress: bool) -> None:
1387        self.states = [s for s in self.states if s is not None]
1388        self.rewrite_reordered_state_indexes()
1389
1390    def lower_reduce_actions(self, verbose: bool, progress: bool) -> None:
1391        # Remove Reduce actions and replace them by the programmatic
1392        # equivalent.
1393        #
1394        # This transformation preserves the stack manipulations of the parse
1395        # table. It only changes it from being implicitly executed by the LR
1396        # parser, to being explicitly executed with actions.
1397        #
1398        # This transformation converts the hard-to-predict load of the shift
1399        # table into a branch prediction which is potentially easier to
1400        # predict.
1401        #
1402        # A side-effect of this transformation is that it removes the need for
1403        # replaying non-terminals, thus the backends could safely ignore the
1404        # ability of the shift function from handling non-terminals.
1405        if verbose or progress:
1406            print("Lower Reduce actions.")
1407
1408        maybe_unreachable_set: OrderedSet[StateId] = OrderedSet()
1409
1410        def transform() -> typing.Iterator[None]:
1411            for s in self.states:
1412                term, _ = next(iter(s.epsilon), (None, None))
1413                if self.term_is_shifted(term):
1414                    continue
1415                assert len(s.epsilon) == 1
1416                yield  # progress bar.
1417                reduce_state = s
1418                if verbose:
1419                    print("Inlining shift-operation for state {}".format(str(reduce_state)))
1420
1421                # The reduced_aps should contain all reduced path of the single
1422                # Reduce action which is present on this state. However, as
1423                # state of the graph are shared, some reduced paths might follow
1424                # the same path and reach the same state.
1425                #
1426                # This code collect for each replayed path, the tops of the
1427                # stack on top of which these states are replayed.
1428                aps = APS.start(s.index)
1429                states_by_replay_term = collections.defaultdict(list)
1430                # print("Start:\n{}".format(aps.string(name="\titer_aps")))
1431                # print(s.stable_str(self.states))
1432                for reduced_aps in aps.shift_next(self):
1433                    # As long as we have elements to replay, we should only
1434                    # have a single path for each reduced path. If the next
1435                    # state contains an action, then we stop here.
1436                    iter_aps = reduced_aps
1437                    next_is_action = self.states[iter_aps.state].epsilon != []
1438                    has_replay = iter_aps.replay != []
1439                    assert next_is_action is False and has_replay is True
1440                    while (not next_is_action) and has_replay:
1441                        # print("Step {}:\n{}".format(len(iter_aps.history),
1442                        #                             iter_aps.string(name="\titer_aps")))
1443                        next_aps = list(iter_aps.shift_next(self))
1444                        if len(next_aps) == 0:
1445                            # Note, this might happen as we are adding
1446                            # lookahead tokens from any successor, we might not
1447                            # always have a way to replay all tokens, in such
1448                            # case an error should be produced, but in the mean
1449                            # time, let's use the shift function as usual.
1450                            break
1451                        assert len(next_aps) == 1
1452                        iter_aps = next_aps[0]
1453                        next_is_action = self.states[iter_aps.state].epsilon != []
1454                        has_replay = iter_aps.replay != []
1455                    # print("End at {}:\n{}".format(len(iter_aps.history),
1456                    #                               iter_aps.string(name="\titer_aps")))
1457                    replay_list = [e.src for e in iter_aps.shift]
1458                    assert len(replay_list) >= 2
1459                    replay_term = Replay(replay_list[1:])
1460                    states_by_replay_term[replay_term].append(replay_list[0])
1461
1462                # Create FilterStates actions.
1463                filter_by_replay_term = {
1464                    replay_term: FilterStates(states)
1465                    for replay_term, states in states_by_replay_term.items()
1466                }
1467
1468                # Convert the Reduce action to an Unwind action.
1469                reduce_term, _ = next(iter(s.epsilon))
1470                if isinstance(reduce_term, Reduce):
1471                    unwind_term: Action = reduce_term.unwind
1472                else:
1473                    assert isinstance(reduce_term, Seq)
1474                    assert isinstance(reduce_term.actions[-1], Reduce)
1475                    unwind_term = Seq(list(reduce_term.actions[:-1]) + [reduce_term.actions[-1].unwind])
1476
1477                # Remove the old Reduce edge if still present.
1478                # print("Before:\n{}".format(reduce_state.stable_str(self.states)))
1479                self.remove_edge(reduce_state, reduce_term, maybe_unreachable_set)
1480
1481                # Add Unwind action.
1482                # print("After:\n")
1483                locations = reduce_state.locations
1484                delayed: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(filter_by_replay_term.items())
1485                replay_size = 1  # Replay the unwound non-terminal
1486                is_new, filter_state = self.new_state(locations, delayed, replay_size)
1487                self.add_edge(reduce_state, unwind_term, filter_state.index)
1488                if not is_new:
1489                    # The destination state already exists. Assert that all
1490                    # outgoing edges are matching what we would have generated.
1491                    if len(filter_by_replay_term) == 1:
1492                        # There is only one predecessor, no need for a
1493                        # FilterState condition.
1494                        replay_term = next(iter(filter_by_replay_term))
1495                        assert replay_term in filter_state
1496                        continue
1497
1498                    for replay_term, filter_term in filter_by_replay_term.items():
1499                        assert filter_term in filter_state
1500                        replay_state = self.states[filter_state[filter_term]]
1501                        assert replay_term in replay_state
1502                    continue
1503
1504                if len(filter_by_replay_term) == 1:
1505                    replay_term = next(iter(filter_by_replay_term))
1506                    dest_idx = replay_term.replay_steps[-1]
1507                    # Do not add the FilterStates action, as there is only one.
1508                    # Add Replay actions from the filter_state to the destination.
1509                    self.add_edge(filter_state, replay_term, dest_idx)
1510                else:
1511                    for replay_term, filter_term in filter_by_replay_term.items():
1512                        dest_idx = replay_term.replay_steps[-1]
1513                        dest = self.states[dest_idx]
1514
1515                        # Add FilterStates action from the filter_state to the replay_state.
1516                        locations = dest.locations
1517                        delayed = OrderedFrozenSet(itertools.chain(dest.delayed_actions, [replay_term]))
1518                        is_new, replay_state = self.new_state(locations, delayed, replay_size)
1519                        self.add_edge(filter_state, filter_term, replay_state.index)
1520                        assert (not is_new) == (replay_term in replay_state)
1521
1522                        # Add Replay actions from the replay_state to the destination.
1523                        if is_new:
1524                            dest_idx = replay_term.replay_steps[-1]
1525                            self.add_edge(replay_state, replay_term, dest_idx)
1526                        # print(replay_state.stable_str(self.states))
1527                        assert not replay_state.is_inconsistent()
1528
1529                # print(filter_state.stable_str(self.states))
1530                # print(reduce_state.stable_str(self.states))
1531                assert not reduce_state.is_inconsistent()
1532                assert not filter_state.is_inconsistent()
1533
1534        consume(transform(), progress)
1535
1536    def fold_replay_unwind(self, verbose: bool, progress: bool) -> None:
1537        """Convert Replay action falling into Unwind action to an Unwind action which
1538        replay less terms."""
1539        if verbose or progress:
1540            print("Fold Replay followed by Unwind actions.")
1541
1542        maybe_unreachable_set: OrderedSet[StateId] = OrderedSet()
1543
1544        def try_transform(s: StateAndTransitions) -> bool:
1545            if len(s.epsilon) != 1:
1546                return False
1547            replay_term, replay_dest_idx = next(iter(s.epsilon))
1548            if not isinstance(replay_term, Replay):
1549                return False
1550            replay_dest = self.states[replay_dest_idx]
1551            if len(replay_dest.epsilon) != 1:
1552                return False
1553            unwind_term, unwind_dest_idx = next(iter(replay_dest.epsilon))
1554            if not unwind_term.update_stack():
1555                return False
1556            stack_diff = unwind_term.update_stack_with()
1557            if not stack_diff.reduce_stack():
1558                return False
1559            if stack_diff.pop + stack_diff.replay <= 0:
1560                return False
1561
1562            # Remove replayed terms from the Unwind action.
1563            replayed = replay_term.replay_steps
1564            unshifted = min(stack_diff.replay + min(s.arguments, stack_diff.pop), len(replayed))
1565            if unshifted < len(replayed):
1566                # We do not have all replayed terms as arguments, thus do not
1567                # consume arguments
1568                unshifted = min(stack_diff.replay, len(replayed))
1569            if unshifted == 0:
1570                return False
1571            new_unwind_term = unwind_term.unshift_action(unshifted)
1572            new_replay = new_unwind_term.update_stack_with().replay
1573
1574            # Replace the replay_term and unwind_term by terms which are
1575            # avoiding extra replay actions.
1576            self.remove_edge(s, replay_term, maybe_unreachable_set)
1577            if len(replayed) == unshifted:
1578                # The Unwind action replay more terms than what we originally
1579                # had. The replay term is replaced by an Unwind edge instead.
1580                assert s.arguments >= -new_replay
1581                self.add_edge(s, new_unwind_term, unwind_dest_idx)
1582            else:
1583                # The Unwind action replay and pop less terms than what we
1584                # originally had. Thus the replay action is shortened and a new
1585                # state is created to accomodate the new Unwind action.
1586                assert unshifted >= 1
1587                new_replay_term = Replay(replayed[:-unshifted])
1588                implicit_replay_term = Replay(replayed[-unshifted:])
1589                locations = replay_dest.locations
1590                delayed: OrderedFrozenSet[DelayedAction]
1591                delayed = OrderedFrozenSet(
1592                    itertools.chain(replay_dest.delayed_actions, [implicit_replay_term]))
1593                is_new, unwind_state = self.new_state(locations, delayed)
1594                assert (not is_new) == (new_unwind_term in unwind_state)
1595
1596                # Add new Replay and new Unwind actions.
1597                self.add_edge(s, new_replay_term, unwind_state.index)
1598                if is_new:
1599                    assert unwind_state.arguments >= -new_replay
1600                    self.add_edge(unwind_state, new_unwind_term, unwind_dest_idx)
1601                assert not unwind_state.is_inconsistent()
1602            assert not s.is_inconsistent()
1603            return True
1604
1605        def transform() -> typing.Iterator[None]:
1606            for s in self.states:
1607                if try_transform(s):
1608                    yield  # progress bar
1609
1610        consume(transform(), progress)
1611        self.remove_unreachable_states(maybe_unreachable_set)
1612
1613    def fold_identical_endings(self, verbose: bool, progress: bool) -> None:
1614        # If 2 states have the same outgoing edges, then we can merge the 2
1615        # states into a single state, and rewrite all the backedges leading to
1616        # these states to be replaced by edges going to the reference state.
1617        if verbose or progress:
1618            print("Fold identical endings.")
1619
1620        def rewrite_backedges(state_list: typing.List[StateAndTransitions],
1621                              state_map: typing.Dict[StateId, StateId],
1622                              backrefs: typing.Dict[StateId,
1623                                                    typing.List[typing.Tuple[StateId, Action, StateId]]],
1624                              maybe_unreachable: OrderedSet[StateId]) -> bool:
1625            all_backrefs = []
1626            new_backrefs = set()
1627            for s in state_list:
1628                all_backrefs.extend(backrefs[s.index])
1629            # All states have the same outgoing edges. Thus we replace all of
1630            # them by a single state. We do that by replacing edges of which
1631            # are targeting the state in the state_list by edges targetting the
1632            # ref state.
1633            ref = state_list.pop()
1634            tmp_state_map = default_fwd_dict(state_map)
1635            for s in state_list:
1636                tmp_state_map[s.index] = ref.index
1637
1638            for ref_s, ref_t, _d in all_backrefs:
1639                new_backrefs.add((ref_s, ref_t.rewrite_state_indexes(tmp_state_map)))
1640            if len(all_backrefs) != len(new_backrefs):
1641                # Skip this rewrite if when rewritting we are going to cause
1642                # some aliasing to happen between actions which are going to
1643                # different states.
1644                return False
1645
1646            replace_edges = [e for s in state_list for e in s.backedges]
1647            hit = False
1648            for edge in replace_edges:
1649                edge_term = edge.term
1650                assert edge_term is not None
1651                src = self.states[edge.src]
1652                old_dest = src[edge_term]
1653                # print("replace {} -- {} --> {}, by {} -- {} --> {}"
1654                #       .format(src.index, edge_term, src[edge_term], src.index, edge_term, ref.index))
1655                self.replace_edge(src, edge_term, ref.index, maybe_unreachable)
1656                state_map[old_dest] = ref.index
1657                hit = True
1658            return hit
1659
1660        def rewrite_if_same_outedges(state_list: typing.List[StateAndTransitions]) -> bool:
1661            maybe_unreachable: OrderedSet[StateId] = OrderedSet()
1662            backrefs = collections.defaultdict(list)
1663            outedges = collections.defaultdict(list)
1664            for s in state_list:
1665                # Iterate first over actions, then over ordinary states.
1666                self.assert_state_invariants(s)
1667                outedges[tuple(s.edges())].append(s)
1668                if s.epsilon == []:
1669                    continue
1670                for t, d in s.edges():
1671                    if not isinstance(t, Action):
1672                        continue
1673                    for r in t.state_refs():
1674                        backrefs[r].append((s.index, t, d))
1675            hit = False
1676            state_map: typing.Dict[StateId, StateId] = default_id_dict()
1677            for same in outedges.values():
1678                if len(same) > 1:
1679                    hit = rewrite_backedges(same, state_map, backrefs, maybe_unreachable) or hit
1680            if hit:
1681                self.remove_unreachable_states(maybe_unreachable)
1682                self.rewrite_state_indexes(state_map)
1683                self.remove_all_unreachable_state(verbose, progress)
1684            return hit
1685
1686        def visit_table() -> typing.Iterator[None]:
1687            hit = True
1688            while hit:
1689                yield  # progress bar.
1690                hit = rewrite_if_same_outedges(self.states)
1691
1692        consume(visit_table(), progress)
1693
1694    def group_epsilon_states(self, verbose: bool, progress: bool) -> None:
1695        def all_action_inedges(s: StateAndTransitions) -> bool:
1696            return all(isinstance(e.term, Action) for e in s.backedges)
1697        shift_states, action_states = split(self.states, lambda s: len(s.epsilon) == 0)
1698        from_act_action_states, from_shf_action_states = split(action_states, all_action_inedges)
1699        self.states = []
1700        self.states.extend(shift_states)
1701        self.states.extend(from_shf_action_states)
1702        self.states.extend(from_act_action_states)
1703        self.rewrite_reordered_state_indexes()
1704
1705    def group_nonterminal_states(self, verbose: bool, progress: bool) -> None:
1706        # This function is used to reduce the range of FilterStates values,
1707        # such that the Rust compiler can compile FilterStates match statements
1708        # to a table-switch.
1709        freq_count = collections.Counter(nt for s in self.states for nt in s.nonterminals)
1710        freq_nt, _ = zip(*freq_count.most_common())
1711
1712        def state_value(s: StateAndTransitions) -> float:
1713            value = 0.0
1714            if len(s.epsilon) != 0:
1715                return 4.0
1716            if len(s.nonterminals) == 0:
1717                return 2.0
1718            i = 1.0
1719            for nt in freq_nt:
1720                if nt in s:
1721                    value += i
1722                i /= 2.0
1723            return -value
1724        self.states.sort(key=state_value)
1725        self.rewrite_reordered_state_indexes()
1726
1727    def count_shift_states(self) -> int:
1728        return sum(1 for s in self.states if s is not None and len(s.epsilon) == 0)
1729
1730    def count_action_states(self) -> int:
1731        return sum(1 for s in self.states if s is not None and len(s.epsilon) > 0)
1732
1733    def count_action_from_shift_states(self) -> int:
1734        def from_shift_states(s: StateAndTransitions) -> bool:
1735            return any(not isinstance(e.term, Action) for e in s.backedges)
1736
1737        return sum(1 for s in self.states if len(s.epsilon) > 0 and from_shift_states(s))
1738
1739    def prepare_debug_context(self) -> DebugInfo:
1740        """To better filter out the traversal of the grammar in debug context, we
1741        pre-compute for each state the maximal depth of each state within a
1742        production. Therefore, if visiting a state no increases the reducing
1743        depth beyind the ability to shrink the shift list to 0, then we can
1744        stop going deeper, as we entered a different production. """
1745        depths = collections.defaultdict(lambda: [])
1746        for s in self.states:
1747            if s is None or not s.epsilon:
1748                continue
1749            aps = APS.start(s.index)
1750            for aps_next in aps.shift_next(self):
1751                if not aps_next.reducing:
1752                    continue
1753                for i, edge in enumerate(aps_next.stack):
1754                    depths[edge.src].append(i + 1)
1755        return {s: max(ds) for s, ds in depths.items()}
1756
1757    def debug_context(
1758            self,
1759            state: StateId,
1760            split_txt: str = "; ",
1761            prefix: str = ""
1762    ) -> str:
1763        """Reconstruct the grammar production by traversing the parse table."""
1764        if self.debug_info is False:
1765            return ""
1766        if self.debug_info is True:
1767            self.debug_info = self.prepare_debug_context()
1768        debug_info = typing.cast(typing.Dict[StateId, int], self.debug_info)
1769
1770        record = []
1771
1772        def visit(aps: APS) -> bool:
1773            # Stop after reducing once.
1774            if aps.history == []:
1775                return True
1776            last = aps.history[-1].term
1777            is_unwind = isinstance(last, Action) and last.update_stack()
1778            has_shift_loop = len(aps.shift) != 1 + len(set(zip(aps.shift, aps.shift[1:])))
1779            can_reduce_later = True
1780            try:
1781                can_reduce_later = debug_info[aps.shift[-1].src] >= len(aps.shift)
1782            except KeyError:
1783                can_reduce_later = False
1784            stop = is_unwind or has_shift_loop or not can_reduce_later
1785            # Record state which are reducing at most all the shifted states.
1786            save = stop and len(aps.shift) == 1
1787            save = save and is_unwind
1788            if save:
1789                assert isinstance(last, Action)
1790                save = last.update_stack_with().nt in self.states[aps.shift[0].src]
1791            if save:
1792                record.append(aps)
1793            return not stop
1794
1795        self.aps_visitor(APS.start(state), visit)
1796
1797        context: OrderedSet[str] = OrderedSet()
1798        for aps in record:
1799            assert aps.history != []
1800            action = aps.history[-1].term
1801            assert isinstance(action, Action)
1802            assert action.update_stack()
1803            stack_diff = action.update_stack_with()
1804            replay = stack_diff.replay
1805            before = [repr(e.term) for e in aps.stack[:-1]]
1806            after = [repr(e.term) for e in aps.history[:-1]]
1807            prod = before + ["\N{MIDDLE DOT}"] + after
1808            if replay < len(after) and replay > 0:
1809                del prod[-replay:]
1810                replay = 0
1811            if replay > len(after):
1812                replay += 1
1813            if replay > 0:
1814                prod = prod[:-replay] + ["[lookahead:"] + prod[-replay:] + ["]"]
1815            txt = "{}{} ::= {}".format(prefix, repr(stack_diff.nt), " ".join(prod))
1816            context.add(txt)
1817
1818        if split_txt is None:
1819            return context
1820        return split_txt.join(txt for txt in sorted(context))
1821