1# -*- coding: utf-8 -*-
2# Copyright 2012, Peter A. Bigot
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain a
6# copy of the License at:
7#
8#            http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
15
16"""This module provides Finite Automata with Counters.
17
18FACs are type of state machine where a transition may include a
19constraint and a modification to a set of counters.  They are used to
20implement regular expressions with numerical constraints, as are found
21in POSIX regexp, Perl, and XML schema.
22
23The implementation here derives from U{Regular Expressions with
24Numerical Constraints and Automata with Counters
25<https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>},
26Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684,
27Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245.  In what
28follows, this reference will be denoted B{HOV09}.
29
30A regular expression is directly translated into a term tree, where
31nodes are operators such as sequence, choice, and counter
32restrictions, and the leaf nodes denote symbols in the language of the
33regular expression.
34
35In the case of XML content models, the symbols include L{element
36declarations <pyxb.xmlschema.structures.ElementDeclaration>} and
37L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}.  A
38numerical constraint node corresponds to an L{XML particle
39<pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes
40derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of
41types B{choice} and B{sequence}.  As suggested in U{The Membership
42Problem for Regular Expressions with Unordered Concatenation and
43Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>}
44the B{all} content model can be translated into state machine using
45choice and sequence at the cost of a quadratic size explosion.  Since
46some XML content models might have a hundred terms in an unordered
47catenation, this is not acceptable, and the implementation here
48optimizes this construct by creating a leaf node in the automaton
49which in turn contains sub-automata for each term, and permits an exit
50transition only when all the terms that are required have been
51completed.
52
53@note: In XSD 1.1 the restriction that terms in an B{all} model group
54occur at most once has been removed.  Since the current implementation
55removes a completed term from the set of available terms, this will
56not work: instead the subconfiguration with its counter values must be
57retained between matches.
58"""
59
60import operator
61import functools
62import logging
63from pyxb.utils import six
64from pyxb.utils.six.moves import xrange
65
66log_ = logging.getLogger(__name__)
67
68class FACError (Exception):
69    pass
70
71class InvalidTermTreeError (FACError):
72    """Exception raised when a FAC term tree is not a tree.
73
74    For example, a L{Symbol} node appears multiple times, or a cycle is detected."""
75
76    parent = None
77    """The L{MultiTermNode} containing the term that proves invalidity"""
78
79    term = None
80    """The L{Node} that proves invalidity"""
81
82    def __init__ (self, *args):
83        (self.parent, self.term) = args
84        super(InvalidTermTreeError, self).__init__(*args)
85
86class UpdateApplicationError (FACError):
87    """Exception raised when an unsatisfied update instruction is executed.
88
89    This indicates an internal error in the implementation."""
90
91    update_instruction = None
92    """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}"""
93
94    values = None
95    """The unsatisfying value map from L{CounterCondition} instances to integers"""
96
97    def __init__ (self, *args):
98        (self.update_instruction, self.values) = args
99        super(UpdateApplicationError, self).__init__(*args)
100
101class AutomatonStepError (Exception):
102    """Symbol rejected by L{Configuration_ABC.step}.
103
104    The exception indicates that the proposed symbol either failed to
105    produce a transition (L{UnrecognizedSymbolError}) or produced
106    multiple equally valid transitions
107    (L{NondeterministicSymbolError})."""
108
109    configuration = None
110    """The instance of L{Configuration_ABC} that raised the exception.
111    From L{Configuration_ABC.acceptableSymbols} you can determine what
112    alternatives might be present."""
113
114    symbol = None
115    """The symbol that was not accepted."""
116
117    def __get_acceptable (self):
118        """A list of symbols that the configuration would accept in its current state."""
119        return self.configuration.acceptableSymbols()
120    acceptable = property(__get_acceptable)
121
122    def __init__ (self, *args):
123        (self.configuration, self.symbol) = args
124        super(AutomatonStepError, self).__init__(*args)
125
126class UnrecognizedSymbolError (AutomatonStepError):
127    """L{Configuration.step} failed to find a valid transition."""
128    pass
129
130class NondeterministicSymbolError (AutomatonStepError):
131    """L{Configuration.step} found multiple transitions."""
132    pass
133
134class SymbolMatch_mixin (object):
135    """Mix-in used by symbols to provide a custom match implementation.
136
137    If a L{State.symbol} value is an instance of this mix-in, then it
138    will be used to validate a candidate symbol for a match."""
139
140    def match (self, symbol):
141        raise NotImplementedError('%s.match' % (type(self).__name__,))
142
143class State (object):
144    """A thin wrapper around an object reference.
145
146    The state of the automaton corresponds to a position, or marked
147    symbol, in the term tree.  Because the same symbol may appear at
148    multiple locations in the tree, and the distinction between these
149    positions is critical, a L{State} wrapper is provided to maintain
150    distinct values."""
151
152    def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False):
153        """Create a FAC state.
154
155        @param symbol: The symbol associated with the state.
156        Normally initialized from the L{Symbol.metadata} value.  The
157        state may be entered if, among other conditions, the L{match}
158        routine accepts the proposed input as being consistent with
159        this value.
160
161        @param is_initial: C{True} iff this state may serve as the
162        first state of the automaton.
163
164        @param final_update: C{None} if this state is not an
165        accepting state of the automaton; otherwise a set of
166        L{UpdateInstruction} values that must be satisfied by the
167        counter values in a configuration as a further restriction of
168        acceptance.
169
170        @param is_unordered_catenation: C{True} if this state has
171        subautomata that must be matched to execute the unordered
172        catenation of an L{All} node; C{False} if this is a regular
173        symbol."""
174        self.__symbol = symbol
175        self.__isInitial = not not is_initial
176        self.__finalUpdate = final_update
177        self.__isUnorderedCatenation = is_unordered_catenation
178
179    __automaton = None
180    def __get_automaton (self):
181        """Link to the L{Automaton} to which the state belongs."""
182        return self.__automaton
183    def _set_automaton (self, automaton):
184        """Method invoked during automaton construction to set state owner."""
185        assert self.__automaton is None
186        self.__automaton = automaton
187        return self
188    automaton = property(__get_automaton)
189
190    __symbol = None
191    def __get_symbol (self):
192        """Application-specific metadata identifying the symbol.
193
194        See also L{match}."""
195        return self.__symbol
196    symbol = property(__get_symbol)
197
198    __isUnorderedCatenation = None
199    def __get_isUnorderedCatenation (self):
200        """Indicate whether the state has subautomata for unordered
201        catenation.
202
203        To reduce state explosion due to non-determinism, such a state
204        executes internal transitions in subautomata until all terms
205        have matched or a failure is discovered."""
206        return self.__isUnorderedCatenation
207    isUnorderedCatenation = property(__get_isUnorderedCatenation)
208
209    __subAutomata = None
210    def __get_subAutomata (self):
211        """A sequence of sub-automata supporting internal state transitions.
212
213        This will return C{None} unless L{isUnorderedCatenation} is C{True}."""
214        return self.__subAutomata
215    def _set_subAutomata (self, *automata):
216        assert self.__subAutomata is None
217        assert self.__isUnorderedCatenation
218        self.__subAutomata = automata
219    subAutomata = property(__get_subAutomata)
220
221    __isInitial = None
222    def __get_isInitial (self):
223        """C{True} iff this state may be the first state the automaton enters."""
224        return self.__isInitial
225    isInitial = property(__get_isInitial)
226
227    __automatonEntryTransitions = None
228    def __get_automatonEntryTransitions (self):
229        """Return the set of initial transitions allowing entry to the automata through this state.
230
231        These are structurally-permitted transitions only, and must be
232        filtered based on the symbol that might trigger the
233        transition.  The results are not filtered based on counter
234        value, since this value is used to determine how the
235        containing automaton might be entered.  Consequently the
236        return value is the empty set unless this is an initial state.
237
238        The returned set is closed under entry to sub-automata,
239        i.e. it is guaranteed that each transition includes a
240        consuming state even if it requires a multi-element chain of
241        transitions into subautomata to reach one."""
242        if self.__automatonEntryTransitions is None:
243            transitions = []
244            if self.__isInitial:
245                xit = Transition(self, set())
246                if self.__subAutomata is None:
247                    transitions.append(xit)
248                else:
249                    for sa in self.__subAutomata:
250                        for saxit in sa.initialTransitions:
251                            transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition()))
252            self.__automatonEntryTransitions = transitions
253        return self.__automatonEntryTransitions
254    automatonEntryTransitions = property(__get_automatonEntryTransitions)
255
256    __finalUpdate = None
257    def __get_finalUpdate (self):
258        """Return the update instructions that must be satisfied for this to be a final state."""
259        return self.__finalUpdate
260    finalUpdate = property(__get_finalUpdate)
261
262    def subAutomataInitialTransitions (self, sub_automata=None):
263        """Return the set of candidate transitions to enter a sub-automaton of this state.
264
265        @param sub_automata: A subset of the sub-automata of this
266        state which should contribute to the result.  If C{None}, all
267        sub-automata are used.
268
269        @return: A pair C{(nullable, transitions)} where C{nullable}
270        is C{True} iff there is at least one sub-automaton that is in
271        an accepting state on entry, and C{transitions} is a list of
272        L{Transition} instances describing how to reach some state in
273        a sub-automaton via a consumed symbol.
274        """
275        assert self.__subAutomata is not None
276        is_nullable = True
277        transitions = []
278        if sub_automata is None:
279            sub_automata = self.__subAutomata
280        for sa in sub_automata:
281            if not sa.nullable:
282                is_nullable = False
283            transitions.extend(sa.initialTransitions)
284        return (is_nullable, transitions)
285
286    def isAccepting (self, counter_values):
287        """C{True} iff this state is an accepting state for the automaton.
288
289        @param counter_values: Counter values that further validate
290        whether the requirements of the automaton have been met.
291
292        @return: C{True} if this is an accepting state and the
293        counter values relevant at it are satisfied."""
294        if self.__finalUpdate is None:
295            return False
296        return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate)
297
298    __transitionSet = None
299    def __get_transitionSet (self):
300        """Definitions of viable transitions from this state.
301
302        The transition set of a state is a set of L{Transition} nodes
303        identifying a state reachable in a single step from this
304        state, and a set of counter updates that must apply if the
305        transition is taken.
306
307        These transitions may not in themselves consume a symbol.  For
308        example, if the destination state represents a match of an
309        L{unordered catenation of terms<All>}, then secondary
310        processing must be done to traverse into the automata for
311        those terms and identify transitions that include a symbol
312        consumption.
313
314        @note: Although conceptually the viable transitions are a set,
315        this implementation maintains them in a list so that order is
316        preserved when automata processing becomes non-deterministic.
317        PyXB is careful to build the transition list so that the
318        states are attempted in the order in which they appear in the
319        schema that define the automata.
320        """
321        return self.__transitionSet
322    transitionSet = property(__get_transitionSet)
323
324    def _set_transitionSet (self, transition_set):
325        """Method invoked during automaton construction to set the
326        legal transitions from the state.
327
328        The set of transitions cannot be defined until all states that
329        appear in it are available, so the creation of the automaton
330        requires that the association of the transition set be
331        delayed.  (Though described as a set, the transitions are a
332        list where order reflects priority.)
333
334        @param transition_set: a list of pairs where the first
335        member is the destination L{State} and the second member is the
336        set of L{UpdateInstruction}s that apply when the automaton
337        transitions to the destination state."""
338
339        self.__transitionSet = []
340        seen = set()
341        for xit in transition_set:
342            if not (xit in seen):
343                seen.add(xit)
344                self.__transitionSet.append(xit)
345
346    def match (self, symbol):
347        """Return C{True} iff the symbol matches for this state.
348
349        This may be overridden by subclasses when matching by
350        equivalence does not work.  Alternatively, if the symbol
351        stored in this node is a subclass of L{SymbolMatch_mixin}, then
352        its match method will be used.  Otherwise C{symbol} matches
353        only if it is equal to the L{symbol} of this state.
354
355        @param symbol: A candidate symbol corresponding to the
356        expression symbol for this state.
357
358        @return: C{True} iff C{symbol} is a match for this state.
359        """
360        if isinstance(self.__symbol, SymbolMatch_mixin):
361            return self.__symbol.match(symbol)
362        return self.__symbol == symbol
363
364    def __str__ (self):
365        return 'S.%x' % (id(self),)
366
367    def _facText (self):
368        rv = []
369        rv.extend(map(str, self.__transitionSet))
370        if self.__finalUpdate is not None:
371            if 0 == len(self.__finalUpdate):
372                rv.append('Final (no conditions)')
373            else:
374                rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate))))
375        return '\n'.join(rv)
376
377class CounterCondition (object):
378    """A counter condition is a range limit on valid counter values.
379
380    Instances of this class serve as keys for the counters that
381    represent the configuration of a FAC.  The instance also maintains
382    a pointer to application-specific L{metadata}."""
383
384    __min = None
385    def __get_min (self):
386        """The minimum legal value for the counter.
387
388        This is a non-negative integer."""
389        return self.__min
390    min = property(__get_min)
391
392    __max = None
393    def __get_max (self):
394        """The maximum legal value for the counter.
395
396        This is a positive integer, or C{None} to indicate that the
397        counter is unbounded."""
398        return self.__max
399    max = property(__get_max)
400
401    __metadata = None
402    def __get_metadata (self):
403        """A pointer to application metadata provided when the condition was created."""
404        return self.__metadata
405    metadata = property(__get_metadata)
406
407    def __init__ (self, min, max, metadata=None):
408        """Create a counter condition.
409
410        @param min: The value for L{min}
411        @param max: The value for L{max}
412        @param metadata: The value for L{metadata}
413        """
414        self.__min = min
415        self.__max = max
416        self.__metadata = metadata
417
418    def __hash__ (self):
419        return hash(self.__min) ^ hash(self.__max) ^ hash(self.__metadata)
420
421    def __eq__ (self, other):
422        return (other is not None) \
423            and (self.__min == other.__min) \
424            and (self.__max == other.__max) \
425            and (self.__metadata == other.__metadata)
426
427    def __ne__ (self, other):
428        return not self.__eq__(other)
429
430    def __str__ (self):
431        return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '')
432
433class UpdateInstruction:
434    """An update instruction pairs a counter with a mutation of that
435    counter.
436
437    The instruction is executed during a transition from one state to
438    another, and causes the corresponding counter to be incremented or
439    reset.  The instruction may only be applied if doing so does not
440    violate the conditions of the counter it affects."""
441
442    __counterCondition = None
443    def __get_counterCondition (self):
444        """A reference to the L{CounterCondition} identifying the
445        counter to be updated.
446
447        The counter condition instance is used as a key to the
448        dictionary maintaining current counter values."""
449        return self.__counterCondition
450    counterCondition = property(__get_counterCondition)
451
452    __doIncrement = None
453    def __get_doIncrement (self):
454        """C{True} if the counter is to be incremented; C{False} if it is to be reset."""
455        return self.__doIncrement
456    doIncrement = property(__get_doIncrement)
457
458    # Cached values extracted from counter condition
459    __min = None
460    __max = None
461
462    def __init__ (self, counter_condition, do_increment):
463        """Create an update instruction.
464
465        @param counter_condition: A L{CounterCondition} identifying a
466        minimum and maximum value for a counter, and serving as a map
467        key for the value of the corresponding counter.
468
469        @param do_increment: C{True} if the update is to increment
470        the value of the counter; C{False} if the update is to reset
471        the counter.
472        """
473        self.__counterCondition = counter_condition
474        self.__doIncrement = not not do_increment
475        self.__min = counter_condition.min
476        self.__max = counter_condition.max
477
478    def satisfiedBy (self, counter_values):
479        """Implement a component of definition 5 from B{HOV09}.
480
481        The update instruction is satisfied by the counter values if
482        its action may be legitimately applied to the value of its
483        associated counter.
484
485        @param counter_values: A map from  L{CounterCondition}s to
486        non-negative integers
487
488        @return:  C{True} or C{False}
489        """
490        value = counter_values[self.__counterCondition]
491        if self.__doIncrement \
492                and (self.__max is not None) \
493                and (value >= self.__max):
494            return False
495        if (not self.__doIncrement) \
496                and (value < self.__min):
497            return False
498        return True
499
500    @classmethod
501    def Satisfies (cls, counter_values, update_instructions):
502        """Return C{True} iff the counter values satisfy the update
503        instructions.
504
505        @param counter_values: A map from L{CounterCondition} to
506        integer counter values
507
508        @param update_instructions: A set of L{UpdateInstruction}
509        instances
510
511        @return: C{True} iff all instructions are satisfied by the
512        values and limits."""
513        for psi in update_instructions:
514            if not psi.satisfiedBy(counter_values):
515                return False
516        return True
517
518    def apply (self, counter_values):
519        """Apply the update instruction to the provided counter values.
520
521        @param counter_values: A map from L{CounterCondition} to
522        integer counter values.  This map is updated in-place."""
523        if not self.satisfiedBy(counter_values):
524            raise UpdateApplicationError(self, counter_values)
525        value = counter_values[self.__counterCondition]
526        if self.__doIncrement:
527            value += 1
528        else:
529            value = 1
530        counter_values[self.__counterCondition] = value
531
532    @classmethod
533    def Apply (cls, update_instructions, counter_values):
534        """Apply the update instructions to the counter values.
535
536        @param update_instructions: A set of L{UpdateInstruction}
537        instances.
538
539        @param counter_values: A map from L{CounterCondition}
540        instances to non-negative integers.  This map is updated
541        in-place by applying each instruction in
542        C{update_instructions}."""
543        for psi in update_instructions:
544            psi.apply(counter_values)
545
546    def __hash__ (self):
547        return hash(self.__counterCondition) ^ hash(self.__doIncrement)
548
549    def __eq__ (self, other):
550        return (other is not None) \
551            and (self.__doIncrement == other.__doIncrement) \
552            and (self.__counterCondition == other.__counterCondition)
553
554    def __ne__ (self, other):
555        return not self.__eq__(other)
556
557    def __str__ (self):
558        return '%s %s' % (self.__doIncrement and 'inc' or 'reset', self.__counterCondition)
559
560class Transition (object):
561    """Representation of a FAC state transition."""
562
563    __destination = None
564    def __get_destination (self):
565        """The transition destination state."""
566        return self.__destination
567    destination = property(__get_destination)
568
569    __updateInstructions = None
570    def __get_updateInstructions (self):
571        """The set of counter updates that are applied when the transition is taken."""
572        return self.__updateInstructions
573    updateInstructions = property(__get_updateInstructions)
574
575    __nextTransition = None
576    def __get_nextTransition (self):
577        """The next transition to apply in this chain.
578
579        C{None} if this is the last transition in the chain."""
580        return self.__nextTransition
581    nextTransition = property(__get_nextTransition)
582
583    __layerLink = None
584    def __get_layerLink (self):
585        """A directive relating to changing automaton layer on transition.
586
587        C{None} indicates this transition is from one state to another
588        within a single automaton.
589
590        An instance of L{Configuration} is a transition on completion
591        of a subautomaton back to the configuration in the parent
592        automaton.  The L{destination} is the state in the parent automaton.
593
594        An instance of L{Automaton} requires creation of a
595        sub-configuration and initial entry into the automaton.  The
596        L{destination} is the state in the sub-automaton.
597        """
598        return self.__layerLink
599    layerLink = property(__get_layerLink)
600
601    def __init__ (self, destination, update_instructions, layer_link=None):
602        """Create a transition to a state.
603
604        @param destination: the state into which the transition is
605        made
606
607        @param update_instructions: A iterable of L{UpdateInstruction}s
608        denoting the changes that must be made to counters as a
609        consequence of taking the transition.
610
611        @keyword layer_link: The value for L{layerLink}."""
612        self.__destination = destination
613        if not isinstance(update_instructions, list):
614            update_instructions = list(update_instructions)
615        self.__updateInstructions = update_instructions
616        self.__layerLink = layer_link
617
618    def consumingState (self):
619        """Return the state in this transition chain that must match a symbol."""
620
621        # Transitions to a state with subautomata never consume anything
622        if self.__destination.subAutomata is not None:
623            if not self.__nextTransition:
624                return None
625            return self.__nextTransition.consumingState()
626        # I don't think there should be subsequent transitions
627        assert self.__nextTransition is None
628        return self.__destination
629
630    def consumedSymbol (self):
631        """Return the L{symbol<State.symbol>} of the L{consumingState}."""
632        return self.consumingState().symbol
633
634    def satisfiedBy (self, configuration):
635        """Check the transition update instructions against
636        configuration counter values.
637
638        This implementation follows layer changes, updating the
639        configuration used as counter value source as necessary.
640
641        @param configuration: A L{Configuration} instance containing
642        counter data against which update instruction satisfaction is
643        checked.
644
645        @return: C{True} iff all update instructions along the
646        transition chain are satisfied by their relevant
647        configuration."""
648        # If we're entering an automaton, we know no subsequent
649        # transitions have update instructions
650        if isinstance(self.__layerLink, Automaton):
651            return True
652        # If we're leaving an automaton, switch to the configuration
653        # that is relevant to the destination of the transition.
654        if isinstance(self.__layerLink, Configuration):
655            configuration = self.__layerLink
656        assert self.destination.automaton == configuration.automaton
657        # Blow chunks if the configuration doesn't satisfy the transition
658        if not configuration.satisfies(self):
659            return False
660        # Otherwise try the next transition, or succeed if there isn't one
661        if self.__nextTransition:
662            return self.__nextTransition.satisfiedBy(configuration)
663        return True
664
665    def apply (self, configuration, clone_map=None):
666        """Apply the transitition to a configuration.
667
668        This updates the configuration counter values based on the
669        update instructions, and sets the new configuration state.
670
671        @note: If the transition involves leaving a sub-automaton or
672        creating a new sub-automaton, the returned configuration
673        structure will be different from the one passed in.  You
674        should invoke this as::
675
676          cfg = transition.apply(cfg)
677
678        @param configuration: A L{Configuration} of an executing automaton
679
680        @param clone_map: A map from L{Configuration} to
681        L{Configuration} reflecting the replacements made when the
682        configuration for which the transition was calculated was
683        subsequently cloned into the C{configuration} passed into this
684        method.  This is only necessary when the transition includes
685        layer transitions.
686
687        @return: The resulting configuration
688        """
689        layer_link = self.__layerLink
690        if isinstance(layer_link, Configuration):
691            if clone_map is not None:
692                layer_link = clone_map[layer_link]
693            configuration = layer_link.leaveAutomaton(configuration)
694        elif isinstance(layer_link, Automaton):
695            configuration = configuration.enterAutomaton(layer_link)
696        UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues())
697        configuration._set_state(self.destination, layer_link is None)
698        if self.__nextTransition is None:
699            return configuration
700        return self.__nextTransition.apply(configuration, clone_map)
701
702    def chainTo (self, next_transition):
703        """Duplicate the state and chain the duplicate to a successor
704        transition.
705
706        This returns a new transition which applies the operation for
707        this transition, then proceeds to apply the next transition in
708        the chain.
709
710        @note: The node that is invoking this must not have successor
711        transitions.
712
713        @param next_transition: A L{Transition} node describing a
714        subsequent transition.
715
716        @return: a clone of this node, augmented with a link to
717        C{next_transition}."""
718        assert not self.__nextTransition
719        head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink)
720        head.__nextTransition = next_transition
721        return head
722
723    def makeEnterAutomatonTransition (self):
724        """Replicate the transition as a layer link into its automaton.
725
726        This is used on initial transitions into sub-automata where a
727        sub-configuration must be created and recorded."""
728        assert self.__layerLink is None
729        assert self.__nextTransition is None
730        head = type(self)(self.__destination, self.__updateInstructions)
731        head.__layerLink = self.__destination.automaton
732        return head
733
734    def __hash__ (self):
735        rv = hash(self.__destination)
736        for ui in self.__updateInstructions:
737            rv ^= hash(ui)
738        return rv ^ hash(self.__nextTransition) ^ hash(self.__layerLink)
739
740    def __eq__ (self, other):
741        return (other is not None) \
742            and (self.__destination == other.__destination) \
743            and (self.__updateInstructions == other.__updateInstructions) \
744            and (self.__nextTransition == other.__nextTransition) \
745            and (self.__layerLink == other.__layerLink)
746
747    def __ne__ (self, other):
748        return not self.__eq__(other)
749
750    def __str__ (self):
751        rv = []
752        if isinstance(self.__layerLink, Configuration):
753            rv.append('from A%x ' % (id(self.__layerLink.automaton),))
754        elif isinstance(self.__layerLink, Automaton):
755            rv.append('in A%x ' % (id(self.__layerLink)))
756        rv.append('enter %s ' % (self.destination,))
757        if (self.consumingState() == self.destination):
758            rv.append('via %s ' % (self.destination.symbol,))
759        rv.append('with %s' % (' ; '.join(map(str, self.updateInstructions)),))
760        if self.__nextTransition:
761            rv.append("\n\tthen ")
762            rv.append(str(self.__nextTransition))
763        return ''.join(rv)
764
765class Configuration_ABC (object):
766    """Base class for something that represents an L{Automaton} in
767    execution.
768
769    For deterministic automata, this is generally a L{Configuration}
770    which records the current automaton state along with its counter
771    values.
772
773    For non-deterministic automata, this is a L{MultiConfiguration}
774    which records a set of L{Configuration}s."""
775
776    def acceptableSymbols (self):
777        """Return the acceptable L{Symbol}s given the current
778        configuration.
779
780        This method extracts the symbol from all candidate transitions
781        that are permitted based on the current counter values.
782        Because transitions are presented in a preferred order, the
783        symbols are as well."""
784        raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,))
785
786    def step (self, symbol):
787        """Execute an automaton transition using the given symbol.
788
789        @param symbol: A symbol from the alphabet of the automaton's
790        language.  This is a Python value that should be accepted by
791        the L{SymbolMatch_mixin.match} method of a L{State.symbol}.
792        It is not a L{Symbol} instance.
793
794        @return: The new configuration resulting from the step.
795
796        @raises AutomatonStepError: L{UnrecognizedSymbolError}
797        when no transition compatible with C{symbol} is available, and
798        L{NondeterministicSymbolError} if C{symbol} admits multiple
799        transitions and the subclass does not support
800        non-deterministic steps (see L{MultiConfiguration}).
801
802        @warning: If the step entered or left a sub-automaton the
803        return value will not be the configuration that was used to
804        execute the step.  The proper pattern for using this method
805        is::
806
807           cfg = cfg.step(sym)
808
809        """
810        raise NotImplementedError('%s.step' % (type(self).__name__,))
811
812class Configuration (Configuration_ABC):
813    """The state of an L{Automaton} in execution.
814
815    This combines a state node of the automaton with a set of counter
816    values."""
817
818    __state = None
819    def __get_state (self):
820        """The state of the configuration.
821
822        This is C{None} to indicate an initial state, or one of the underlying automaton's states."""
823        return self.__state
824    def _set_state (self, state, is_layer_change):
825        """Internal state transition interface.
826
827        @param state: the new destination state
828
829        @param is_layer_change: C{True} iff the transition inducing
830        the state change involves a layer change.
831        """
832
833        # If the new state and old state are the same, the layer
834        # change has no effect (we're probably leaving a
835        # subconfiguration, and we want to keep the current set of
836        # sub-automata.)
837        if state == self.__state:
838            return
839
840        # Otherwise, discard any unprocessed automata in the former
841        # state, set the state, and if the new state has subautomata
842        # create a set holding them so they can be processed.
843        if is_layer_change:
844            self.__subConfiguration = None
845            self.__subAutomata = None
846        self.__state = state
847        if is_layer_change and (state.subAutomata is not None):
848            assert self.__subAutomata is None
849            self.__subAutomata = list(state.subAutomata)
850    state = property(__get_state)
851
852    __counterValues = None
853    """The values of the counters.
854
855    This is a map from the CounterCondition instances of the
856    underlying automaton to integer values."""
857    def _get_counterValues (self):
858        return self.__counterValues
859
860    __automaton = None
861    def __get_automaton (self):
862        return self.__automaton
863    automaton = property(__get_automaton)
864
865    __subConfiguration = None
866    def __get_subConfiguration (self):
867        """Reference to configuration being executed in a sub-automaton.
868
869        C{None} if no sub-automaton is active, else a reference to a
870        configuration that is being executed in a sub-automaton.
871
872        Sub-configurations are used to match sub-terms in an
873        L{unordered catenation<All>} term.  A configuration may have
874        at most one sub-configuration at a time, and the configuration
875        will be removed and possibly replaced when the term being
876        processed completes."""
877        return self.__subConfiguration
878    subConfiguration = property(__get_subConfiguration)
879
880    __superConfiguration = None
881    def __get_superConfiguration (self):
882        """Reference to the configuration for which this is a
883        sub-configuration.
884
885        C{None} if no super-automaton is active, else a reference to a
886        configuration that is being executed in a super-automaton.
887
888        The super-configuration relation persists for the lifetime of
889        the configuration."""
890        return self.__superConfiguration
891    superConfiguration = property(__get_superConfiguration)
892
893    __subAutomata = None
894    def __get_subAutomata (self):
895        """A set of automata that must be satisfied before the current state can complete.
896
897        This is used in unordered catenation.  Each sub-automaton
898        represents a term in the catenation.  When the configuration
899        enters a state with sub-automata, a set containing references
900        to those automata is assigned to this attribute.
901        Subsequently, until all automata in the state are satisfied,
902        transitions can only occur within an active sub-automaton, out
903        of the active sub-automaton if it is in an accepting state,
904        and into a new sub-automaton if no sub-automaton is active.
905        """
906        return self.__subAutomata
907    def _set_subAutomata (self, automata):
908        self.__subAutomata = list(automata)
909    subAutomata = property(__get_subAutomata)
910
911    def makeLeaveAutomatonTransition (self):
912        """Create a transition back to the containing configuration.
913
914        This is done when a configuration is in an accepting state and
915        there are candidate transitions to other states that must be
916        considered.  The transition does not consume a symbol."""
917        assert self.__superConfiguration is not None
918        return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration)
919
920    def leaveAutomaton (self, sub_configuration):
921        """Execute steps to leave a sub-automaton.
922
923        @param sub_configuration: The configuration associated with
924        the automata that has completed.
925
926        @return: C{self}"""
927        assert sub_configuration.__superConfiguration == self
928        self.__subConfiguration = None
929        return self
930
931    def enterAutomaton (self, automaton):
932        """Execute steps to enter a new automaton.
933
934        The new automaton is removed from the set of remaining
935        automata for the current state, and a new configuration
936        created.  No transition is made in that new configuration.
937
938        @param automaton: The automaton to be entered
939
940        @return: The configuration that executes the new automaton as
941        a sub-configuration of C{self}."""
942        assert self.__subConfiguration is None
943        assert self.__subAutomata is not None
944        self.__subAutomata.remove(automaton)
945        self.__subConfiguration = Configuration(automaton)
946        self.__subConfiguration.__superConfiguration = self
947        return self.__subConfiguration
948
949    def satisfies (self, transition):
950        return UpdateInstruction.Satisfies(self.__counterValues, transition.updateInstructions)
951
952    def reset (self):
953        fac = self.__automaton
954        self.__state = None
955        self.__counterValues = dict(zip(fac.counterConditions, len(fac.counterConditions) * (1,)))
956        self.__subConfiguration = None
957        self.__subAutomata = None
958
959    def candidateTransitions (self, symbol=None):
960        """Return list of viable transitions on C{symbol}
961
962        The transitions that are structurally permitted from this
963        state, in order, filtering out those transitions where the
964        update instruction is not satisfied by the configuration
965        counter values and optionally those for which the symbol does
966        not match.
967
968        @param symbol: A symbol through which a transition from this
969        state is intended.  A value of C{None} indicates that the set
970        of transitions should ignore the symbol; candidates are still
971        filtered based on the counter state of the configuration.
972
973        @return: A list of L{Transition} instances permitted from the
974        current configuration.  If C{symbol} is not C{None},
975        transitions that would not accept the symbol are excluded.
976        Any transition that would require an unsatisfied counter
977        update is also excluded.  Non-deterministic automata may
978        result in a lits with multiple members. """
979
980        fac = self.__automaton
981        transitions = []
982        if symbol is None:
983            match_filter = lambda _xit: True
984        else:
985            match_filter = lambda _xit: _xit.consumingState().match(symbol)
986        update_filter = lambda _xit: _xit.satisfiedBy(self)
987
988        if self.__state is None:
989            # Special-case the initial entry to the topmost configuration
990            transitions.extend(fac.initialTransitions)
991        elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting():
992            # If there's an active subconfiguration that is not in an
993            # accepting state, we can't do anything at this level.
994            pass
995        else:
996            # Normally include transitions at this level, but in some
997            # cases they are not permitted.
998            include_local = True
999            if self.__subAutomata:
1000                # Disallow transitions in this level if there are
1001                # subautomata that require symbols before a transition
1002                # out of this node is allowed.
1003                (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata)
1004                transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial))
1005            if include_local:
1006                # Transitions within this layer
1007                for xit in filter(update_filter, self.__state.transitionSet):
1008                    if xit.consumingState() is not None:
1009                        transitions.append(xit)
1010                    else:
1011                        # The transition did not consume a symbol, so we have to find
1012                        # one that does, from among the subautomata of the destination.
1013                        # We do not care if the destination is nullable; alternatives
1014                        # to it are already being handled with different transitions.
1015                        (_, sub_initial) = xit.destination.subAutomataInitialTransitions()
1016                        transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial))
1017                if (self.__superConfiguration is not None) and self.isAccepting():
1018                    # Transitions that leave this automaton
1019                    lxit = self.makeLeaveAutomatonTransition()
1020                    supxit = self.__superConfiguration.candidateTransitions(symbol)
1021                    transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit))
1022        assert len(frozenset(transitions)) == len(transitions)
1023        return list(filter(update_filter, filter(match_filter, transitions)))
1024
1025    def acceptableSymbols (self):
1026        return [ _xit.consumedSymbol() for _xit in self.candidateTransitions()]
1027
1028    def step (self, symbol):
1029        transitions = self.candidateTransitions(symbol)
1030        if 0 == len(transitions):
1031            raise UnrecognizedSymbolError(self, symbol)
1032        if 1 < len(transitions):
1033            raise NondeterministicSymbolError(self, symbol)
1034        return transitions[0].apply(self)
1035
1036    def isInitial (self):
1037        """Return C{True} iff no transitions have ever been made."""
1038        return self.__state is None
1039
1040    def isAccepting (self):
1041        """Return C{True} iff the automaton is in an accepting state."""
1042        if self.__state is not None:
1043            # Any active sub-configuration must be accepting
1044            if (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting():
1045                return False
1046            # Any unprocessed sub-automata must be nullable
1047            if self.__subAutomata is not None:
1048                if not functools.reduce(operator.and_, map(lambda _sa: _sa.nullable, self.__subAutomata), True):
1049                    return False
1050            # This state must be accepting
1051            return self.__state.isAccepting(self.__counterValues)
1052        # Accepting without any action requires nullable automaton
1053        return self.__automaton.nullable
1054
1055    def __init__ (self, automaton, super_configuration=None):
1056        self.__automaton = automaton
1057        self.__superConfiguration = super_configuration
1058        self.reset()
1059
1060    def clone (self, clone_map=None):
1061        """Clone a configuration and its descendents.
1062
1063        This is used for parallel execution where a configuration has
1064        multiple candidate transitions and must follow all of them.
1065        It clones the entire chain of configurations through
1066        multiple layers.
1067
1068        @param clone_map: Optional map into which the translation from
1069        the original configuration object to the corresponding cloned
1070        configuration object can be reconstructed, e.g. when applying
1071        a transition that includes automata exits referencing
1072        superconfigurations from the original configuration.
1073        """
1074        if clone_map is None:
1075            clone_map = {}
1076        root = self
1077        while root.__superConfiguration is not None:
1078            root = root.__superConfiguration
1079        root = root._clone(clone_map, None)
1080        return clone_map.get(self)
1081
1082    def _clone (self, clone_map, super_configuration):
1083        assert not self in clone_map
1084        other = type(self)(self.__automaton)
1085        clone_map[self] = other
1086        other.__state = self.__state
1087        other.__counterValues = self.__counterValues.copy()
1088        other.__superConfiguration = super_configuration
1089        if self.__subAutomata is not None:
1090            other.__subAutomata = self.__subAutomata[:]
1091            if self.__subConfiguration:
1092                other.__subConfiguration = self.__subConfiguration._clone(clone_map, other)
1093        return other
1094
1095    def __str__ (self):
1096        return '%s: %s' % (self.__state, ' ; '.join([ '%s=%u' % (_c,_v) for (_c,_v) in six.iteritems(self.__counterValues)]))
1097
1098class MultiConfiguration (Configuration_ABC):
1099    """Support parallel execution of state machine.
1100
1101    This holds a set of configurations, and executes each transition
1102    on each one.  Configurations which fail to accept a step are
1103    silently dropped; only if this results in no remaining
1104    configurations will L{UnrecognizedSymbolError} be raised.  If a
1105    step admits multiple valid transitions, a configuration is added
1106    for each one.
1107
1108    See L{pyxb.binding.content.AutomatonConfiguration} for an
1109    alternative solution which holds actions associated with the
1110    transition until the non-determinism is resolved."""
1111
1112    __configurations = None
1113
1114    def __init__ (self, configuration):
1115        self.__configurations = [ configuration]
1116
1117    def acceptableSymbols (self):
1118        acceptable = []
1119        for cfg in self.__configurations:
1120            acceptable.extend(cfg.acceptableSymbols())
1121        return acceptable
1122
1123    def step (self, symbol):
1124        next_configs = []
1125        for cfg in self.__configurations:
1126            transitions = cfg.candidateTransitions(symbol)
1127            if 0 == len(transitions):
1128                pass
1129            elif 1 == len(transitions):
1130                next_configs.append(transitions[0].apply(cfg))
1131            else:
1132                for transition in transitions:
1133                    clone_map = {}
1134                    ccfg = cfg.clone(clone_map)
1135                    next_configs.append(transition.apply(ccfg, clone_map))
1136        if 0 == len(next_configs):
1137            raise UnrecognizedSymbolError(self, symbol)
1138        assert len(frozenset(next_configs)) == len(next_configs)
1139        self.__configurations = next_configs
1140        return self
1141
1142    def acceptingConfigurations (self):
1143        """Return the set of configurations that are in an accepting state.
1144
1145        Note that some of the configurations may be within a
1146        sub-automaton; their presence in the return value is because
1147        the root configuration is also accepting."""
1148        accepting = []
1149        for cfg in self.__configurations:
1150            rcfg = cfg
1151            # Rule out configurations that are accepting within their
1152            # automaton, but not in the containing automaton.
1153            while rcfg.superConfiguration is not None:
1154                rcfg = rcfg.superConfiguration
1155            if rcfg.isAccepting():
1156                accepting.append(cfg)
1157        return accepting
1158
1159class Automaton (object):
1160    """Representation of a Finite Automaton with Counters.
1161
1162    This has all the standard FAC elements, plus links to other
1163    states/automata as required to support the nested automata
1164    construct used for matching unordered catenation terms."""
1165    __states = None
1166    def __get_states (self):
1167        """The set of L{State}s in the automaton.
1168
1169        These correspond essentially to marked symbols in the original
1170        regular expression, or L{element
1171        declarations<pyxb.xmlschema.structures.ElementDeclaration>} in
1172        an XML schema.
1173
1174        @note: These are conceptually a set and are stored that way.
1175        When an L{Automaton} is constructed the incoming states should
1176        be passed as a list so the calculated initial transitions are
1177        executed in a deterministic order."""
1178        return self.__states
1179    states = property(__get_states)
1180
1181    __counterConditions = None
1182    def __get_counterConditions (self):
1183        """The set of L{CounterCondition}s in the automaton.
1184
1185        These are marked positions in the regular expression, or
1186        L{particles<pyxb.xmlschema.structures.Particle>} in an XML
1187        schema, paired with their occurrence constraints."""
1188        return self.__counterConditions
1189    counterConditions = property(__get_counterConditions)
1190
1191    __nullable = None
1192    def __get_nullable (self):
1193        """C{True} iff the automaton accepts the empty string."""
1194        return self.__nullable
1195    nullable = property(__get_nullable)
1196
1197    __initialTransitions = None
1198    def __get_initialTransitions (self):
1199        """The set of transitions that may be made to enter the automaton.
1200
1201        These are full transitions, including chains into subautomata
1202        if an initial state represents a node with sub-automata.
1203
1204        @note: As with L{State.transitionSet}, the set is represented
1205        as a list to preserve priority when resolving
1206        non-deterministic matches."""
1207        return self.__initialTransitions
1208    initialTransitions = property(__get_initialTransitions)
1209
1210    __containingState = None
1211    def __get_containingState (self):
1212        """The L{State} instance for which this is a sub-automaton.
1213
1214        C{None} if this is not a sub-automaton."""
1215        return self.__containingState
1216    containingState = property(__get_containingState)
1217
1218    __finalStates = None
1219    def __get_finalStates (self):
1220        """The set of L{State} members which can terminate a match."""
1221        return self.__finalStates
1222    finalStates = property(__get_finalStates)
1223
1224    def __init__ (self, states, counter_conditions, nullable, containing_state=None):
1225        self.__states = frozenset(states)
1226        for st in self.__states:
1227            st._set_automaton(self)
1228        self.__counterConditions = frozenset(counter_conditions)
1229        self.__nullable = nullable
1230        self.__containingState = containing_state
1231        xit = []
1232        fnl = set()
1233        # Iterate over states, not self.__states, in case the input was a list.
1234        # This way we preserve the priority for initial transitions.
1235        for s in states:
1236            if s.isInitial:
1237                xit.extend(s.automatonEntryTransitions)
1238            if s.finalUpdate is not None:
1239                fnl.add(s)
1240        self.__initialTransitions = xit
1241        self.__finalStates = frozenset(fnl)
1242
1243    def newConfiguration (self):
1244        """Return a new L{Configuration} instance for this automaton."""
1245        return Configuration(self)
1246
1247    def __str__ (self):
1248        rv = []
1249        rv.append('sigma = %s' % (' '.join(map(lambda _s: str(_s.symbol), self.__states))))
1250        rv.append('states = %s' % (' '.join(map(str, self.__states))))
1251        for s in self.__states:
1252            if s.subAutomata is not None:
1253                for i in xrange(len(s.subAutomata)):
1254                    rv.append('SA %s.%u is %x:\n  ' % (str(s), i, id(s.subAutomata[i])) + '\n  '.join(str(s.subAutomata[i]).split('\n')))
1255        rv.append('counters = %s' % (' '.join(map(str, self.__counterConditions))))
1256        rv.append('initial = %s' % (' ; '.join([ '%s on %s' % (_s, _s.symbol) for _s in filter(lambda _s: _s.isInitial, self.__states)])))
1257        rv.append('initial transitions:\n%s' % ('\n'.join(map(str, self.initialTransitions))))
1258        rv.append('States:')
1259        for s in self.__states:
1260            rv.append('%s: %s' % (s, s._facText()))
1261        return '\n'.join(rv)
1262
1263class Node (object):
1264    """Abstract class for any node in the term tree.
1265
1266    In its original form a B{position} (C{pos}) is a tuple of
1267    non-negative integers comprising a path from a node in the term
1268    tree.  It identifies a node in the tree.  After the FAC has been
1269    constructed, only positions that are leaf nodes in the term tree
1270    remain, and the corresponding symbol value (Python instance) is
1271    used as the position.
1272
1273    An B{update instruction} (C{psi}) is a map from positions to
1274    either L{Node.RESET} or L{Node.INCREMENT}.  It identifies actions
1275    to be taken on the counter states corresponding to the positions
1276    in its domain.
1277
1278    A B{transition} is a pair containing a position and an update
1279    instruction.  It identifies a potential next node in the state and
1280    the updates that are to be performed if the transition is taken.
1281
1282    A B{follow value} is a map from a position to a set of transitions
1283    that may originate from the pos.  This set is represented as a
1284    Python list since update instructions are dicts and cannot be
1285    hashed.
1286    """
1287
1288    _Precedence = None
1289    """An integral value used for parenthesizing expressions.
1290
1291    A subterm that has a precedence less than that of its containing
1292    term must be enclosed in parentheses when forming a text
1293    expression representing the containing term."""
1294
1295    RESET = False
1296    """An arbitrary value representing reset of a counter."""
1297
1298    INCREMENT = True
1299    """An arbitrary value representing increment of a counter."""
1300
1301    def __init__ (self, **kw):
1302        """Create a FAC term-tree node.
1303
1304        @keyword metadata: Any application-specific metadata retained in
1305        the term tree for transfer to the resulting automaton."""
1306        self.__metadata = kw.get('metadata')
1307
1308    def clone (self, *args, **kw):
1309        """Create a deep copy of the node.
1310
1311        All term-tree--related attributes and properties are replaced
1312        with deep clones.  Other attributes are preserved.
1313
1314        @param args: A tuple of arguments to be passed to the instance
1315        constructor.
1316
1317        @param kw: A dict of keywords to be passed to the instance
1318        constructor.
1319
1320        @note: Subclasses should pre-extend this method to augment the
1321        C{args} and C{kw} parameters as necessary to match the
1322        expectations of the C{__init__} method of the class being
1323        cloned."""
1324        kw.setdefault('metadata', self.metadata)
1325        return type(self)(*args, **kw)
1326
1327    __metadata = None
1328    def __get_metadata (self):
1329        """Application-specific metadata provided during construction."""
1330        return self.__metadata
1331    metadata = property(__get_metadata)
1332
1333    __first = None
1334    def __get_first (self):
1335        """The I{first} set for the node.
1336
1337        This is the set of positions leading to symbols that can
1338        appear first in a string matched by an execution starting at
1339        the node."""
1340        if self.__first is None:
1341            self.__first = frozenset(self._first())
1342        return self.__first
1343    first = property(__get_first)
1344
1345    def _first (self):
1346        """Abstract method that defines L{first} for the subclass.
1347
1348        The return value should be an iterable of tuples of integers
1349        denoting paths from this node through the term tree to a
1350        symbol."""
1351        raise NotImplementedError('%s.first' % (type(self).__name__,))
1352
1353    __last = None
1354    def __get_last (self):
1355        """The I{last} set for the node.
1356
1357        This is the set of positions leading to symbols that can
1358        appear last in a string matched by an execution starting at
1359        the node."""
1360        if self.__last is None:
1361            self.__last = frozenset(self._last())
1362        return self.__last
1363    last = property(__get_last)
1364
1365    def _last (self):
1366        """Abstract method that defines L{last} for the subclass.
1367
1368        The return value should be an iterable of tuples of integers
1369        denoting paths from this node through the term tree to a
1370        symbol."""
1371        raise NotImplementedError('%s.last' % (type(self).__name__,))
1372
1373    __nullable = None
1374    def __get_nullable (self):
1375        """C{True} iff the empty string is accepted by this node."""
1376        if self.__nullable is None:
1377            self.__nullable = self._nullable()
1378        return self.__nullable
1379    nullable = property(__get_nullable)
1380
1381    def _nullable (self):
1382        """Abstract method that defines L{nullable} for the subclass.
1383
1384        The return value should be C{True} or C{False}."""
1385        raise NotImplementedError('%s.nullable' % (type(self).__name__,))
1386
1387    __follow = None
1388    def __get_follow (self):
1389        """The I{follow} map for the node."""
1390        if self.__follow is None:
1391            self.__follow = self._follow()
1392        return self.__follow
1393    follow = property(__get_follow)
1394
1395    def _follow (self):
1396        """Abstract method that defines L{follow} for the subclass.
1397
1398        The return value should be a map from tuples of integers (positions)
1399        to a list of transitions, where a transition is a position and
1400        an update instruction."""
1401        raise NotImplementedError('%s.follow' % (type(self).__name__,))
1402
1403    def reset (self):
1404        """Reset any term-tree state associated with the node.
1405
1406        Any change to the structure of the term tree in which the node
1407        appears invalidates memoized first/follow sets and related
1408        information.  This method clears all that data so it can be
1409        recalculated.  It does not clear the L{metadata} link, or any
1410        existing structural data."""
1411        self.__first = None
1412        self.__last = None
1413        self.__nullable = None
1414        self.__follow = None
1415        self.__counterPositions = None
1416
1417    def walkTermTree (self, pre, post, arg):
1418        """Utility function for term tree processing.
1419
1420        @param pre: a callable that, unless C{None}, is invoked at
1421        each node C{n} with parameters C{n}, C{pos}, and C{arg}, where
1422        C{pos} is the tuple of integers identifying the path from the
1423        node at on which this method was invoked to the node being
1424        processed.  The invocation occurs before processing any
1425        subordinate nodes.
1426
1427        @param post: as with C{pre} but invocation occurs after
1428        processing any subordinate nodes.
1429
1430        @param arg: a value passed to invocations of C{pre} and
1431        C{post}."""
1432        self._walkTermTree((), pre, post, arg)
1433
1434    def _walkTermTree (self, position, pre, post, arg):
1435        """Abstract method implementing L{walkTermTree} for the subclass."""
1436        raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,))
1437
1438    __posNodeMap = None
1439    def __get_posNodeMap (self):
1440        """A map from positions to nodes in the term tree."""
1441        if self.__posNodeMap is None:
1442            pnm = { }
1443            self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm)
1444            self.__posNodeMap = pnm
1445        return self.__posNodeMap
1446    posNodeMap = property(__get_posNodeMap)
1447
1448    __nodePosMap = None
1449    def __get_nodePosMap (self):
1450        """A map from nodes to their position in the term tree."""
1451        if self.__nodePosMap is None:
1452            npm = {}
1453            for (p,n) in six.iteritems(self.posNodeMap):
1454                npm[n] = p
1455            self.__nodePosMap = npm
1456        return self.__nodePosMap
1457    nodePosMap = property(__get_nodePosMap)
1458
1459    @classmethod
1460    def _PosConcatPosSet (cls, pos, pos_set):
1461        """Implement definition 11.1 in B{HOV09}."""
1462        return frozenset([ pos + _mp for _mp in pos_set ])
1463
1464    @classmethod
1465    def _PosConcatUpdateInstruction (cls, pos, psi):
1466        """Implement definition 11.2 in B{HOV09}"""
1467        rv = {}
1468        for (q, v) in six.iteritems(psi):
1469            rv[pos + q] = v
1470        return rv
1471
1472    @classmethod
1473    def _PosConcatTransitionSet (cls, pos, transition_set):
1474        """Implement definition 11.3 in B{HOV09}"""
1475        ts = []
1476        for (q, psi) in transition_set:
1477            ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) ))
1478        return ts
1479
1480    def __resetAndValidate (self, node, pos, visited_nodes):
1481        if node in visited_nodes:
1482            raise InvalidTermTreeError(self, node)
1483        node.reset()
1484        visited_nodes.add(node)
1485
1486    def buildAutomaton (self, state_ctor=State, ctr_cond_ctor=CounterCondition, containing_state=None):
1487        # Validate that the term tree is in fact a tree.  A DAG does
1488        # not work.  If the tree had cycles, the automaton build
1489        # wouldn't even return.
1490        self.walkTermTree(self.__resetAndValidate, None, set())
1491
1492        counter_map = { }
1493        for pos in self.counterPositions:
1494            nci = self.posNodeMap.get(pos)
1495            assert isinstance(nci, NumericalConstraint)
1496            assert nci not in counter_map
1497            counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata)
1498        counters = list(six.itervalues(counter_map))
1499
1500        state_map = { }
1501        for pos in six.iterkeys(self.follow):
1502            sym = self.posNodeMap.get(pos)
1503            assert isinstance(sym, LeafNode)
1504            assert sym not in state_map
1505
1506            # The state may be an initial state if it is in the first
1507            # set for the root of the term tree.
1508            is_initial = pos in self.first
1509
1510            # The state may be a final state if it is nullable or is
1511            # in the last set of the term tree.
1512            final_update = None
1513            if (() == pos and sym.nullable) or (pos in self.last):
1514                # Acceptance is further constrained by the counter
1515                # values satisfying an update rule that would reset
1516                # all counters that are relevant at the state.
1517                final_update = set()
1518                for nci in map(counter_map.get, self.counterSubPositions(pos)):
1519                    final_update.add(UpdateInstruction(nci, False))
1520            state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All))
1521            if isinstance(sym, All):
1522                state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms))
1523        states = list(six.itervalues(state_map))
1524
1525        for (spos, transition_set) in six.iteritems(self.follow):
1526            src = state_map[spos]
1527            phi = []
1528            for (dpos, psi) in transition_set:
1529                dst = state_map[dpos]
1530                uiset = set()
1531                for (counter, action) in six.iteritems(psi):
1532                    uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action))
1533                phi.append(Transition(dst, uiset))
1534            src._set_transitionSet(phi)
1535
1536        return Automaton(states, counters, self.nullable, containing_state=containing_state)
1537
1538    __counterPositions = None
1539    def __get_counterPositions (self):
1540        """Implement definition 13.1 from B{HOV09}.
1541
1542        The return value is the set of all positions leading to
1543        L{NumericalConstraint} nodes for which either the minimum
1544        value is not 1 or the maximum value is not unbounded."""
1545        if self.__counterPositions is None:
1546            cpos = []
1547            self.walkTermTree(lambda _n,_p,_a: \
1548                                  isinstance(_n, NumericalConstraint) \
1549                                  and ((1 != _n.min) \
1550                                       or (_n.max is not None)) \
1551                                  and _a.append(_p),
1552                              None, cpos)
1553            self.__counterPositions = frozenset(cpos)
1554        return self.__counterPositions
1555    counterPositions = property(__get_counterPositions)
1556
1557    def counterSubPositions (self, pos):
1558        """Implement definition 13.2 from B{HOV09}.
1559
1560        This is the subset of L{counterPositions} that occur along the
1561        path to C{pos}."""
1562        rv = set()
1563        for cpos in self.counterPositions:
1564            if cpos == pos[:len(cpos)]:
1565                rv.add(cpos)
1566        return frozenset(rv)
1567
1568    def _facToString (self):
1569        """Obtain a description of the FAC in text format.
1570
1571        This is a diagnostic tool, returning first, last, and follow
1572        maps using positions."""
1573        rv = []
1574        rv.append('r\t= %s' % (str(self),))
1575        states = list(six.iterkeys(self.follow))
1576        rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states)))))
1577        rv.append('first(r)\t= %s' % (' '.join(map(str, self.first))))
1578        rv.append('last(r)\t= %s' % (' '.join(map(str, self.last))))
1579        rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions))))
1580        for pos in self.first:
1581            rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos)))
1582        for spos in states:
1583            for (dpos, transition_set) in self.follow[spos]:
1584                dst = self.posNodeMap[dpos]
1585                uv = []
1586                for (c, u) in six.iteritems(transition_set):
1587                    uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c)))
1588                rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv)))
1589        return '\n'.join(rv)
1590
1591class MultiTermNode (Node):
1592    """Intermediary for nodes that have multiple child nodes."""
1593
1594    __terms = None
1595    def __get_terms (self):
1596        """The set of subordinate terms of the current node."""
1597        return self.__terms
1598    terms = property(__get_terms)
1599
1600    def __init__ (self, *terms, **kw):
1601        """Term that collects an ordered sequence of terms.
1602
1603        The terms are provided as arguments.  All must be instances of
1604        a subclass of L{Node}."""
1605        super(MultiTermNode, self).__init__(**kw)
1606        self.__terms = terms
1607
1608    def clone (self):
1609        cterms = map(lambda _s: _s.clone(), self.__terms)
1610        return super(MultiTermNode, self).clone(*cterms)
1611
1612    def _walkTermTree (self, position, pre, post, arg):
1613        if pre is not None:
1614            pre(self, position, arg)
1615        for c in xrange(len(self.__terms)):
1616            self.__terms[c]._walkTermTree(position + (c,), pre, post, arg)
1617        if post is not None:
1618            post(self, position, arg)
1619
1620class LeafNode (Node):
1621    """Intermediary for nodes that have no child nodes."""
1622    def _first (self):
1623        return [()]
1624    def _last (self):
1625        return [()]
1626    def _nullable (self):
1627        return False
1628    def _follow (self):
1629        return { (): frozenset() }
1630
1631    def _walkTermTree (self, position, pre, post, arg):
1632        if pre is not None:
1633            pre(self, position, arg)
1634        if post is not None:
1635            post(self, position, arg)
1636
1637class NumericalConstraint (Node):
1638    """A term with a numeric range constraint.
1639
1640    This corresponds to a "particle" in the XML Schema content model."""
1641
1642    _Precedence = -1
1643
1644    __min = None
1645    def __get_min (self):
1646        return self.__min
1647    min = property(__get_min)
1648
1649    __max = None
1650    def __get_max (self):
1651        return self.__max
1652    max = property(__get_max)
1653
1654    __term = None
1655    def __get_term (self):
1656        return self.__term
1657    term = property(__get_term)
1658
1659    def __init__ (self, term, min=0, max=1, **kw):
1660        """Term with a numerical constraint.
1661
1662        @param term: A term, the number of appearances of which is
1663        constrained in this term.
1664        @type term: L{Node}
1665
1666        @keyword min: The minimum number of occurrences of C{term}.
1667        The value must be non-negative.
1668
1669        @keyword max: The maximum number of occurrences of C{term}.
1670        The value must be positive (in which case it must also be no
1671        smaller than C{min}), or C{None} to indicate an unbounded
1672        number of occurrences."""
1673        super(NumericalConstraint, self).__init__(**kw)
1674        self.__term = term
1675        self.__min = min
1676        self.__max = max
1677
1678    def clone (self):
1679        return super(NumericalConstraint, self).clone(self.__term, self.__min, self.__max)
1680
1681    def _first (self):
1682        return [ (0,) + _fc for _fc in self.__term.first ]
1683
1684    def _last (self):
1685        return [ (0,) + _lc for _lc in self.__term.last ]
1686
1687    def _nullable (self):
1688        return (0 == self.__min) or self.__term.nullable
1689
1690    def _follow (self):
1691        rv = {}
1692        pp = (0,)
1693        last_r1 = set(self.__term.last)
1694        for (q, transition_set) in six.iteritems(self.__term.follow):
1695            rv[pp+q] = self._PosConcatTransitionSet(pp, transition_set)
1696            if q in last_r1:
1697                last_r1.remove(q)
1698                for sq1 in self.__term.first:
1699                    q1 = pp+sq1
1700                    psi = {}
1701                    for p1 in self.__term.counterSubPositions(q):
1702                        psi[pp+p1] = self.RESET
1703                    if (1 != self.min) or (self.max is not None):
1704                        psi[()] = self.INCREMENT
1705                    rv[pp+q].append((q1, psi))
1706        assert not last_r1
1707        return rv
1708
1709    def _walkTermTree (self, position, pre, post, arg):
1710        if pre is not None:
1711            pre(self, position, arg)
1712        self.__term._walkTermTree(position + (0,), pre, post, arg)
1713        if post is not None:
1714            post(self, position, arg)
1715
1716    def __str__ (self):
1717        rv = str(self.__term)
1718        if self.__term._Precedence < self._Precedence:
1719            rv = '(' + rv + ')'
1720        rv += '^(%u,' % (self.__min,)
1721        if self.__max is not None:
1722            rv += '%u' % (self.__max)
1723        return rv + ')'
1724
1725class Choice (MultiTermNode):
1726    """A term that may be any one of a set of terms.
1727
1728    This term matches if any one of its contained terms matches."""
1729
1730    _Precedence = -3
1731
1732    def __init__ (self, *terms, **kw):
1733        """Term that selects one of a set of terms.
1734
1735        The terms are provided as arguments.  All must be instances of
1736        a subclass of L{Node}."""
1737        super(Choice, self).__init__(*terms, **kw)
1738
1739    def _first (self):
1740        rv = set()
1741        for c in xrange(len(self.terms)):
1742            rv.update([ (c,) + _fc for _fc in self.terms[c].first])
1743        return rv
1744
1745    def _last (self):
1746        rv = set()
1747        for c in xrange(len(self.terms)):
1748            rv.update([ (c,) + _lc for _lc in self.terms[c].last])
1749        return rv
1750
1751    def _nullable (self):
1752        for t in self.terms:
1753            if t.nullable:
1754                return True
1755        return False
1756
1757    def _follow (self):
1758        rv = {}
1759        for c in xrange(len(self.terms)):
1760            for (q, transition_set) in six.iteritems(self.terms[c].follow):
1761                pp = (c,)
1762                rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set)
1763        return rv
1764
1765    def __str__ (self):
1766        elts = []
1767        for t in self.terms:
1768            if t._Precedence < self._Precedence:
1769                elts.append('(' + str(t) + ')')
1770            else:
1771                elts.append(str(t))
1772        return '+'.join(elts)
1773
1774class Sequence (MultiTermNode):
1775    """A term that is an ordered sequence of terms."""
1776
1777    _Precedence = -2
1778
1779    def __init__ (self, *terms, **kw):
1780        """Term that collects an ordered sequence of terms.
1781
1782        The terms are provided as arguments.  All must be instances of
1783        a subclass of L{Node}."""
1784        super(Sequence, self).__init__(*terms, **kw)
1785
1786    def _first (self):
1787        rv = set()
1788        c = 0
1789        while c < len(self.terms):
1790            t = self.terms[c]
1791            rv.update([ (c,) + _fc for _fc in t.first])
1792            if not t.nullable:
1793                break
1794            c += 1
1795        return rv
1796
1797    def _last (self):
1798        rv = set()
1799        c = len(self.terms) - 1
1800        while 0 <= c:
1801            t = self.terms[c]
1802            rv.update([ (c,) + _lc for _lc in t.last])
1803            if not t.nullable:
1804                break
1805            c -= 1
1806        return rv
1807
1808    def _nullable (self):
1809        for t in self.terms:
1810            if not t.nullable:
1811                return False
1812        return True
1813
1814    def _follow (self):
1815        rv = {}
1816        for c in xrange(len(self.terms)):
1817            pp = (c,)
1818            for (q, transition_set) in six.iteritems(self.terms[c].follow):
1819                rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set)
1820        for c in xrange(len(self.terms)-1):
1821            t = self.terms[c]
1822            pp = (c,)
1823            # Link from the last of one term to the first of the next term.
1824            # Repeat while the destination term is nullable and there are
1825            # successor terms.
1826            for q in t.last:
1827                psi = {}
1828                for p1 in t.counterSubPositions(q):
1829                    psi[pp + p1] = self.RESET
1830                nc = c
1831                while nc+1 < len(self.terms):
1832                    nc += 1
1833                    nt = self.terms[nc]
1834                    for sq1 in nt.first:
1835                        q1 = (nc,) + sq1
1836                        rv[pp+q].append((q1, psi))
1837                    if not nt.nullable:
1838                        break
1839        return rv
1840
1841    def __str__ (self):
1842        elts = []
1843        for t in self.terms:
1844            if t._Precedence < self._Precedence:
1845                elts.append('(' + str(t) + ')')
1846            else:
1847                elts.append(str(t))
1848        return '.'.join(elts)
1849
1850class All (MultiTermNode, LeafNode):
1851    """A term that is an unordered sequence of terms.
1852
1853    Note that the inheritance structure for this node is unusual.  It
1854    has multiple children when it is treated as a term tree, but is
1855    considered a leaf node when constructing an automaton.
1856    """
1857
1858    _Precedence = 0
1859
1860    def __init__ (self, *terms, **kw):
1861        """Term that collects an unordered sequence of terms.
1862
1863        The terms are provided as arguments.  All must be instances of
1864        a subclass of L{Node}."""
1865        super(All, self).__init__(*terms, **kw)
1866
1867    def _nullable (self):
1868        for t in self.terms:
1869            if not t.nullable:
1870                return False
1871        return True
1872
1873    @classmethod
1874    def CreateTermTree (cls, *terms):
1875        """Create a term tree that implements unordered catenation of
1876        the terms.
1877
1878        This expansion results in a standard choice/sequence term
1879        tree, at the cost of quadratic state expansion because terms
1880        are L{cloned<Node.clone>} as required to satisfy the tree
1881        requirements of the term tree.
1882
1883        @param terms: The tuple of terms that are elements of an
1884        accepted sequence.
1885
1886        @return: A term tree comprising a choice between sequences
1887        that connect each term to the unordered catenation of the
1888        remaining terms."""
1889        if 1 == len(terms):
1890            return terms[0]
1891        disjuncts = []
1892        for i in xrange(len(terms)):
1893            n = terms[i]
1894            rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:])
1895            disjuncts.append(Sequence(n, cls.CreateTermTree(*rem)))
1896        return Choice(*disjuncts)
1897
1898    def __str__ (self):
1899        return six.u('&(') + six.u(',').join([str(_t) for _t in self.terms]) + ')'
1900
1901class Symbol (LeafNode):
1902    """A leaf term that is a symbol.
1903
1904    The symbol is represented by the L{metadata} field."""
1905
1906    _Precedence = 0
1907
1908    def __init__ (self, symbol, **kw):
1909        kw['metadata'] = symbol
1910        super(Symbol, self).__init__(**kw)
1911
1912    def clone (self):
1913        return super(Symbol, self).clone(self.metadata)
1914
1915    def __str__ (self):
1916        return str(self.metadata)
1917