1# -*- coding: utf-8 -*- 2# Copyright 2012, Peter A. Bigot 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may 5# not use this file except in compliance with the License. You may obtain a 6# copy of the License at: 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations 14# under the License. 15 16"""This module provides Finite Automata with Counters. 17 18FACs are type of state machine where a transition may include a 19constraint and a modification to a set of counters. They are used to 20implement regular expressions with numerical constraints, as are found 21in POSIX regexp, Perl, and XML schema. 22 23The implementation here derives from U{Regular Expressions with 24Numerical Constraints and Automata with Counters 25<https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>}, 26Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684, 27Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245. In what 28follows, this reference will be denoted B{HOV09}. 29 30A regular expression is directly translated into a term tree, where 31nodes are operators such as sequence, choice, and counter 32restrictions, and the leaf nodes denote symbols in the language of the 33regular expression. 34 35In the case of XML content models, the symbols include L{element 36declarations <pyxb.xmlschema.structures.ElementDeclaration>} and 37L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}. A 38numerical constraint node corresponds to an L{XML particle 39<pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes 40derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of 41types B{choice} and B{sequence}. As suggested in U{The Membership 42Problem for Regular Expressions with Unordered Concatenation and 43Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>} 44the B{all} content model can be translated into state machine using 45choice and sequence at the cost of a quadratic size explosion. Since 46some XML content models might have a hundred terms in an unordered 47catenation, this is not acceptable, and the implementation here 48optimizes this construct by creating a leaf node in the automaton 49which in turn contains sub-automata for each term, and permits an exit 50transition only when all the terms that are required have been 51completed. 52 53@note: In XSD 1.1 the restriction that terms in an B{all} model group 54occur at most once has been removed. Since the current implementation 55removes a completed term from the set of available terms, this will 56not work: instead the subconfiguration with its counter values must be 57retained between matches. 58""" 59 60import operator 61import functools 62import logging 63from pyxb.utils import six 64from pyxb.utils.six.moves import xrange 65 66log_ = logging.getLogger(__name__) 67 68class FACError (Exception): 69 pass 70 71class InvalidTermTreeError (FACError): 72 """Exception raised when a FAC term tree is not a tree. 73 74 For example, a L{Symbol} node appears multiple times, or a cycle is detected.""" 75 76 parent = None 77 """The L{MultiTermNode} containing the term that proves invalidity""" 78 79 term = None 80 """The L{Node} that proves invalidity""" 81 82 def __init__ (self, *args): 83 (self.parent, self.term) = args 84 super(InvalidTermTreeError, self).__init__(*args) 85 86class UpdateApplicationError (FACError): 87 """Exception raised when an unsatisfied update instruction is executed. 88 89 This indicates an internal error in the implementation.""" 90 91 update_instruction = None 92 """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}""" 93 94 values = None 95 """The unsatisfying value map from L{CounterCondition} instances to integers""" 96 97 def __init__ (self, *args): 98 (self.update_instruction, self.values) = args 99 super(UpdateApplicationError, self).__init__(*args) 100 101class AutomatonStepError (Exception): 102 """Symbol rejected by L{Configuration_ABC.step}. 103 104 The exception indicates that the proposed symbol either failed to 105 produce a transition (L{UnrecognizedSymbolError}) or produced 106 multiple equally valid transitions 107 (L{NondeterministicSymbolError}).""" 108 109 configuration = None 110 """The instance of L{Configuration_ABC} that raised the exception. 111 From L{Configuration_ABC.acceptableSymbols} you can determine what 112 alternatives might be present.""" 113 114 symbol = None 115 """The symbol that was not accepted.""" 116 117 def __get_acceptable (self): 118 """A list of symbols that the configuration would accept in its current state.""" 119 return self.configuration.acceptableSymbols() 120 acceptable = property(__get_acceptable) 121 122 def __init__ (self, *args): 123 (self.configuration, self.symbol) = args 124 super(AutomatonStepError, self).__init__(*args) 125 126class UnrecognizedSymbolError (AutomatonStepError): 127 """L{Configuration.step} failed to find a valid transition.""" 128 pass 129 130class NondeterministicSymbolError (AutomatonStepError): 131 """L{Configuration.step} found multiple transitions.""" 132 pass 133 134class SymbolMatch_mixin (object): 135 """Mix-in used by symbols to provide a custom match implementation. 136 137 If a L{State.symbol} value is an instance of this mix-in, then it 138 will be used to validate a candidate symbol for a match.""" 139 140 def match (self, symbol): 141 raise NotImplementedError('%s.match' % (type(self).__name__,)) 142 143class State (object): 144 """A thin wrapper around an object reference. 145 146 The state of the automaton corresponds to a position, or marked 147 symbol, in the term tree. Because the same symbol may appear at 148 multiple locations in the tree, and the distinction between these 149 positions is critical, a L{State} wrapper is provided to maintain 150 distinct values.""" 151 152 def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False): 153 """Create a FAC state. 154 155 @param symbol: The symbol associated with the state. 156 Normally initialized from the L{Symbol.metadata} value. The 157 state may be entered if, among other conditions, the L{match} 158 routine accepts the proposed input as being consistent with 159 this value. 160 161 @param is_initial: C{True} iff this state may serve as the 162 first state of the automaton. 163 164 @param final_update: C{None} if this state is not an 165 accepting state of the automaton; otherwise a set of 166 L{UpdateInstruction} values that must be satisfied by the 167 counter values in a configuration as a further restriction of 168 acceptance. 169 170 @param is_unordered_catenation: C{True} if this state has 171 subautomata that must be matched to execute the unordered 172 catenation of an L{All} node; C{False} if this is a regular 173 symbol.""" 174 self.__symbol = symbol 175 self.__isInitial = not not is_initial 176 self.__finalUpdate = final_update 177 self.__isUnorderedCatenation = is_unordered_catenation 178 179 __automaton = None 180 def __get_automaton (self): 181 """Link to the L{Automaton} to which the state belongs.""" 182 return self.__automaton 183 def _set_automaton (self, automaton): 184 """Method invoked during automaton construction to set state owner.""" 185 assert self.__automaton is None 186 self.__automaton = automaton 187 return self 188 automaton = property(__get_automaton) 189 190 __symbol = None 191 def __get_symbol (self): 192 """Application-specific metadata identifying the symbol. 193 194 See also L{match}.""" 195 return self.__symbol 196 symbol = property(__get_symbol) 197 198 __isUnorderedCatenation = None 199 def __get_isUnorderedCatenation (self): 200 """Indicate whether the state has subautomata for unordered 201 catenation. 202 203 To reduce state explosion due to non-determinism, such a state 204 executes internal transitions in subautomata until all terms 205 have matched or a failure is discovered.""" 206 return self.__isUnorderedCatenation 207 isUnorderedCatenation = property(__get_isUnorderedCatenation) 208 209 __subAutomata = None 210 def __get_subAutomata (self): 211 """A sequence of sub-automata supporting internal state transitions. 212 213 This will return C{None} unless L{isUnorderedCatenation} is C{True}.""" 214 return self.__subAutomata 215 def _set_subAutomata (self, *automata): 216 assert self.__subAutomata is None 217 assert self.__isUnorderedCatenation 218 self.__subAutomata = automata 219 subAutomata = property(__get_subAutomata) 220 221 __isInitial = None 222 def __get_isInitial (self): 223 """C{True} iff this state may be the first state the automaton enters.""" 224 return self.__isInitial 225 isInitial = property(__get_isInitial) 226 227 __automatonEntryTransitions = None 228 def __get_automatonEntryTransitions (self): 229 """Return the set of initial transitions allowing entry to the automata through this state. 230 231 These are structurally-permitted transitions only, and must be 232 filtered based on the symbol that might trigger the 233 transition. The results are not filtered based on counter 234 value, since this value is used to determine how the 235 containing automaton might be entered. Consequently the 236 return value is the empty set unless this is an initial state. 237 238 The returned set is closed under entry to sub-automata, 239 i.e. it is guaranteed that each transition includes a 240 consuming state even if it requires a multi-element chain of 241 transitions into subautomata to reach one.""" 242 if self.__automatonEntryTransitions is None: 243 transitions = [] 244 if self.__isInitial: 245 xit = Transition(self, set()) 246 if self.__subAutomata is None: 247 transitions.append(xit) 248 else: 249 for sa in self.__subAutomata: 250 for saxit in sa.initialTransitions: 251 transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition())) 252 self.__automatonEntryTransitions = transitions 253 return self.__automatonEntryTransitions 254 automatonEntryTransitions = property(__get_automatonEntryTransitions) 255 256 __finalUpdate = None 257 def __get_finalUpdate (self): 258 """Return the update instructions that must be satisfied for this to be a final state.""" 259 return self.__finalUpdate 260 finalUpdate = property(__get_finalUpdate) 261 262 def subAutomataInitialTransitions (self, sub_automata=None): 263 """Return the set of candidate transitions to enter a sub-automaton of this state. 264 265 @param sub_automata: A subset of the sub-automata of this 266 state which should contribute to the result. If C{None}, all 267 sub-automata are used. 268 269 @return: A pair C{(nullable, transitions)} where C{nullable} 270 is C{True} iff there is at least one sub-automaton that is in 271 an accepting state on entry, and C{transitions} is a list of 272 L{Transition} instances describing how to reach some state in 273 a sub-automaton via a consumed symbol. 274 """ 275 assert self.__subAutomata is not None 276 is_nullable = True 277 transitions = [] 278 if sub_automata is None: 279 sub_automata = self.__subAutomata 280 for sa in sub_automata: 281 if not sa.nullable: 282 is_nullable = False 283 transitions.extend(sa.initialTransitions) 284 return (is_nullable, transitions) 285 286 def isAccepting (self, counter_values): 287 """C{True} iff this state is an accepting state for the automaton. 288 289 @param counter_values: Counter values that further validate 290 whether the requirements of the automaton have been met. 291 292 @return: C{True} if this is an accepting state and the 293 counter values relevant at it are satisfied.""" 294 if self.__finalUpdate is None: 295 return False 296 return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate) 297 298 __transitionSet = None 299 def __get_transitionSet (self): 300 """Definitions of viable transitions from this state. 301 302 The transition set of a state is a set of L{Transition} nodes 303 identifying a state reachable in a single step from this 304 state, and a set of counter updates that must apply if the 305 transition is taken. 306 307 These transitions may not in themselves consume a symbol. For 308 example, if the destination state represents a match of an 309 L{unordered catenation of terms<All>}, then secondary 310 processing must be done to traverse into the automata for 311 those terms and identify transitions that include a symbol 312 consumption. 313 314 @note: Although conceptually the viable transitions are a set, 315 this implementation maintains them in a list so that order is 316 preserved when automata processing becomes non-deterministic. 317 PyXB is careful to build the transition list so that the 318 states are attempted in the order in which they appear in the 319 schema that define the automata. 320 """ 321 return self.__transitionSet 322 transitionSet = property(__get_transitionSet) 323 324 def _set_transitionSet (self, transition_set): 325 """Method invoked during automaton construction to set the 326 legal transitions from the state. 327 328 The set of transitions cannot be defined until all states that 329 appear in it are available, so the creation of the automaton 330 requires that the association of the transition set be 331 delayed. (Though described as a set, the transitions are a 332 list where order reflects priority.) 333 334 @param transition_set: a list of pairs where the first 335 member is the destination L{State} and the second member is the 336 set of L{UpdateInstruction}s that apply when the automaton 337 transitions to the destination state.""" 338 339 self.__transitionSet = [] 340 seen = set() 341 for xit in transition_set: 342 if not (xit in seen): 343 seen.add(xit) 344 self.__transitionSet.append(xit) 345 346 def match (self, symbol): 347 """Return C{True} iff the symbol matches for this state. 348 349 This may be overridden by subclasses when matching by 350 equivalence does not work. Alternatively, if the symbol 351 stored in this node is a subclass of L{SymbolMatch_mixin}, then 352 its match method will be used. Otherwise C{symbol} matches 353 only if it is equal to the L{symbol} of this state. 354 355 @param symbol: A candidate symbol corresponding to the 356 expression symbol for this state. 357 358 @return: C{True} iff C{symbol} is a match for this state. 359 """ 360 if isinstance(self.__symbol, SymbolMatch_mixin): 361 return self.__symbol.match(symbol) 362 return self.__symbol == symbol 363 364 def __str__ (self): 365 return 'S.%x' % (id(self),) 366 367 def _facText (self): 368 rv = [] 369 rv.extend(map(str, self.__transitionSet)) 370 if self.__finalUpdate is not None: 371 if 0 == len(self.__finalUpdate): 372 rv.append('Final (no conditions)') 373 else: 374 rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate)))) 375 return '\n'.join(rv) 376 377class CounterCondition (object): 378 """A counter condition is a range limit on valid counter values. 379 380 Instances of this class serve as keys for the counters that 381 represent the configuration of a FAC. The instance also maintains 382 a pointer to application-specific L{metadata}.""" 383 384 __min = None 385 def __get_min (self): 386 """The minimum legal value for the counter. 387 388 This is a non-negative integer.""" 389 return self.__min 390 min = property(__get_min) 391 392 __max = None 393 def __get_max (self): 394 """The maximum legal value for the counter. 395 396 This is a positive integer, or C{None} to indicate that the 397 counter is unbounded.""" 398 return self.__max 399 max = property(__get_max) 400 401 __metadata = None 402 def __get_metadata (self): 403 """A pointer to application metadata provided when the condition was created.""" 404 return self.__metadata 405 metadata = property(__get_metadata) 406 407 def __init__ (self, min, max, metadata=None): 408 """Create a counter condition. 409 410 @param min: The value for L{min} 411 @param max: The value for L{max} 412 @param metadata: The value for L{metadata} 413 """ 414 self.__min = min 415 self.__max = max 416 self.__metadata = metadata 417 418 def __hash__ (self): 419 return hash(self.__min) ^ hash(self.__max) ^ hash(self.__metadata) 420 421 def __eq__ (self, other): 422 return (other is not None) \ 423 and (self.__min == other.__min) \ 424 and (self.__max == other.__max) \ 425 and (self.__metadata == other.__metadata) 426 427 def __ne__ (self, other): 428 return not self.__eq__(other) 429 430 def __str__ (self): 431 return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '') 432 433class UpdateInstruction: 434 """An update instruction pairs a counter with a mutation of that 435 counter. 436 437 The instruction is executed during a transition from one state to 438 another, and causes the corresponding counter to be incremented or 439 reset. The instruction may only be applied if doing so does not 440 violate the conditions of the counter it affects.""" 441 442 __counterCondition = None 443 def __get_counterCondition (self): 444 """A reference to the L{CounterCondition} identifying the 445 counter to be updated. 446 447 The counter condition instance is used as a key to the 448 dictionary maintaining current counter values.""" 449 return self.__counterCondition 450 counterCondition = property(__get_counterCondition) 451 452 __doIncrement = None 453 def __get_doIncrement (self): 454 """C{True} if the counter is to be incremented; C{False} if it is to be reset.""" 455 return self.__doIncrement 456 doIncrement = property(__get_doIncrement) 457 458 # Cached values extracted from counter condition 459 __min = None 460 __max = None 461 462 def __init__ (self, counter_condition, do_increment): 463 """Create an update instruction. 464 465 @param counter_condition: A L{CounterCondition} identifying a 466 minimum and maximum value for a counter, and serving as a map 467 key for the value of the corresponding counter. 468 469 @param do_increment: C{True} if the update is to increment 470 the value of the counter; C{False} if the update is to reset 471 the counter. 472 """ 473 self.__counterCondition = counter_condition 474 self.__doIncrement = not not do_increment 475 self.__min = counter_condition.min 476 self.__max = counter_condition.max 477 478 def satisfiedBy (self, counter_values): 479 """Implement a component of definition 5 from B{HOV09}. 480 481 The update instruction is satisfied by the counter values if 482 its action may be legitimately applied to the value of its 483 associated counter. 484 485 @param counter_values: A map from L{CounterCondition}s to 486 non-negative integers 487 488 @return: C{True} or C{False} 489 """ 490 value = counter_values[self.__counterCondition] 491 if self.__doIncrement \ 492 and (self.__max is not None) \ 493 and (value >= self.__max): 494 return False 495 if (not self.__doIncrement) \ 496 and (value < self.__min): 497 return False 498 return True 499 500 @classmethod 501 def Satisfies (cls, counter_values, update_instructions): 502 """Return C{True} iff the counter values satisfy the update 503 instructions. 504 505 @param counter_values: A map from L{CounterCondition} to 506 integer counter values 507 508 @param update_instructions: A set of L{UpdateInstruction} 509 instances 510 511 @return: C{True} iff all instructions are satisfied by the 512 values and limits.""" 513 for psi in update_instructions: 514 if not psi.satisfiedBy(counter_values): 515 return False 516 return True 517 518 def apply (self, counter_values): 519 """Apply the update instruction to the provided counter values. 520 521 @param counter_values: A map from L{CounterCondition} to 522 integer counter values. This map is updated in-place.""" 523 if not self.satisfiedBy(counter_values): 524 raise UpdateApplicationError(self, counter_values) 525 value = counter_values[self.__counterCondition] 526 if self.__doIncrement: 527 value += 1 528 else: 529 value = 1 530 counter_values[self.__counterCondition] = value 531 532 @classmethod 533 def Apply (cls, update_instructions, counter_values): 534 """Apply the update instructions to the counter values. 535 536 @param update_instructions: A set of L{UpdateInstruction} 537 instances. 538 539 @param counter_values: A map from L{CounterCondition} 540 instances to non-negative integers. This map is updated 541 in-place by applying each instruction in 542 C{update_instructions}.""" 543 for psi in update_instructions: 544 psi.apply(counter_values) 545 546 def __hash__ (self): 547 return hash(self.__counterCondition) ^ hash(self.__doIncrement) 548 549 def __eq__ (self, other): 550 return (other is not None) \ 551 and (self.__doIncrement == other.__doIncrement) \ 552 and (self.__counterCondition == other.__counterCondition) 553 554 def __ne__ (self, other): 555 return not self.__eq__(other) 556 557 def __str__ (self): 558 return '%s %s' % (self.__doIncrement and 'inc' or 'reset', self.__counterCondition) 559 560class Transition (object): 561 """Representation of a FAC state transition.""" 562 563 __destination = None 564 def __get_destination (self): 565 """The transition destination state.""" 566 return self.__destination 567 destination = property(__get_destination) 568 569 __updateInstructions = None 570 def __get_updateInstructions (self): 571 """The set of counter updates that are applied when the transition is taken.""" 572 return self.__updateInstructions 573 updateInstructions = property(__get_updateInstructions) 574 575 __nextTransition = None 576 def __get_nextTransition (self): 577 """The next transition to apply in this chain. 578 579 C{None} if this is the last transition in the chain.""" 580 return self.__nextTransition 581 nextTransition = property(__get_nextTransition) 582 583 __layerLink = None 584 def __get_layerLink (self): 585 """A directive relating to changing automaton layer on transition. 586 587 C{None} indicates this transition is from one state to another 588 within a single automaton. 589 590 An instance of L{Configuration} is a transition on completion 591 of a subautomaton back to the configuration in the parent 592 automaton. The L{destination} is the state in the parent automaton. 593 594 An instance of L{Automaton} requires creation of a 595 sub-configuration and initial entry into the automaton. The 596 L{destination} is the state in the sub-automaton. 597 """ 598 return self.__layerLink 599 layerLink = property(__get_layerLink) 600 601 def __init__ (self, destination, update_instructions, layer_link=None): 602 """Create a transition to a state. 603 604 @param destination: the state into which the transition is 605 made 606 607 @param update_instructions: A iterable of L{UpdateInstruction}s 608 denoting the changes that must be made to counters as a 609 consequence of taking the transition. 610 611 @keyword layer_link: The value for L{layerLink}.""" 612 self.__destination = destination 613 if not isinstance(update_instructions, list): 614 update_instructions = list(update_instructions) 615 self.__updateInstructions = update_instructions 616 self.__layerLink = layer_link 617 618 def consumingState (self): 619 """Return the state in this transition chain that must match a symbol.""" 620 621 # Transitions to a state with subautomata never consume anything 622 if self.__destination.subAutomata is not None: 623 if not self.__nextTransition: 624 return None 625 return self.__nextTransition.consumingState() 626 # I don't think there should be subsequent transitions 627 assert self.__nextTransition is None 628 return self.__destination 629 630 def consumedSymbol (self): 631 """Return the L{symbol<State.symbol>} of the L{consumingState}.""" 632 return self.consumingState().symbol 633 634 def satisfiedBy (self, configuration): 635 """Check the transition update instructions against 636 configuration counter values. 637 638 This implementation follows layer changes, updating the 639 configuration used as counter value source as necessary. 640 641 @param configuration: A L{Configuration} instance containing 642 counter data against which update instruction satisfaction is 643 checked. 644 645 @return: C{True} iff all update instructions along the 646 transition chain are satisfied by their relevant 647 configuration.""" 648 # If we're entering an automaton, we know no subsequent 649 # transitions have update instructions 650 if isinstance(self.__layerLink, Automaton): 651 return True 652 # If we're leaving an automaton, switch to the configuration 653 # that is relevant to the destination of the transition. 654 if isinstance(self.__layerLink, Configuration): 655 configuration = self.__layerLink 656 assert self.destination.automaton == configuration.automaton 657 # Blow chunks if the configuration doesn't satisfy the transition 658 if not configuration.satisfies(self): 659 return False 660 # Otherwise try the next transition, or succeed if there isn't one 661 if self.__nextTransition: 662 return self.__nextTransition.satisfiedBy(configuration) 663 return True 664 665 def apply (self, configuration, clone_map=None): 666 """Apply the transitition to a configuration. 667 668 This updates the configuration counter values based on the 669 update instructions, and sets the new configuration state. 670 671 @note: If the transition involves leaving a sub-automaton or 672 creating a new sub-automaton, the returned configuration 673 structure will be different from the one passed in. You 674 should invoke this as:: 675 676 cfg = transition.apply(cfg) 677 678 @param configuration: A L{Configuration} of an executing automaton 679 680 @param clone_map: A map from L{Configuration} to 681 L{Configuration} reflecting the replacements made when the 682 configuration for which the transition was calculated was 683 subsequently cloned into the C{configuration} passed into this 684 method. This is only necessary when the transition includes 685 layer transitions. 686 687 @return: The resulting configuration 688 """ 689 layer_link = self.__layerLink 690 if isinstance(layer_link, Configuration): 691 if clone_map is not None: 692 layer_link = clone_map[layer_link] 693 configuration = layer_link.leaveAutomaton(configuration) 694 elif isinstance(layer_link, Automaton): 695 configuration = configuration.enterAutomaton(layer_link) 696 UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues()) 697 configuration._set_state(self.destination, layer_link is None) 698 if self.__nextTransition is None: 699 return configuration 700 return self.__nextTransition.apply(configuration, clone_map) 701 702 def chainTo (self, next_transition): 703 """Duplicate the state and chain the duplicate to a successor 704 transition. 705 706 This returns a new transition which applies the operation for 707 this transition, then proceeds to apply the next transition in 708 the chain. 709 710 @note: The node that is invoking this must not have successor 711 transitions. 712 713 @param next_transition: A L{Transition} node describing a 714 subsequent transition. 715 716 @return: a clone of this node, augmented with a link to 717 C{next_transition}.""" 718 assert not self.__nextTransition 719 head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink) 720 head.__nextTransition = next_transition 721 return head 722 723 def makeEnterAutomatonTransition (self): 724 """Replicate the transition as a layer link into its automaton. 725 726 This is used on initial transitions into sub-automata where a 727 sub-configuration must be created and recorded.""" 728 assert self.__layerLink is None 729 assert self.__nextTransition is None 730 head = type(self)(self.__destination, self.__updateInstructions) 731 head.__layerLink = self.__destination.automaton 732 return head 733 734 def __hash__ (self): 735 rv = hash(self.__destination) 736 for ui in self.__updateInstructions: 737 rv ^= hash(ui) 738 return rv ^ hash(self.__nextTransition) ^ hash(self.__layerLink) 739 740 def __eq__ (self, other): 741 return (other is not None) \ 742 and (self.__destination == other.__destination) \ 743 and (self.__updateInstructions == other.__updateInstructions) \ 744 and (self.__nextTransition == other.__nextTransition) \ 745 and (self.__layerLink == other.__layerLink) 746 747 def __ne__ (self, other): 748 return not self.__eq__(other) 749 750 def __str__ (self): 751 rv = [] 752 if isinstance(self.__layerLink, Configuration): 753 rv.append('from A%x ' % (id(self.__layerLink.automaton),)) 754 elif isinstance(self.__layerLink, Automaton): 755 rv.append('in A%x ' % (id(self.__layerLink))) 756 rv.append('enter %s ' % (self.destination,)) 757 if (self.consumingState() == self.destination): 758 rv.append('via %s ' % (self.destination.symbol,)) 759 rv.append('with %s' % (' ; '.join(map(str, self.updateInstructions)),)) 760 if self.__nextTransition: 761 rv.append("\n\tthen ") 762 rv.append(str(self.__nextTransition)) 763 return ''.join(rv) 764 765class Configuration_ABC (object): 766 """Base class for something that represents an L{Automaton} in 767 execution. 768 769 For deterministic automata, this is generally a L{Configuration} 770 which records the current automaton state along with its counter 771 values. 772 773 For non-deterministic automata, this is a L{MultiConfiguration} 774 which records a set of L{Configuration}s.""" 775 776 def acceptableSymbols (self): 777 """Return the acceptable L{Symbol}s given the current 778 configuration. 779 780 This method extracts the symbol from all candidate transitions 781 that are permitted based on the current counter values. 782 Because transitions are presented in a preferred order, the 783 symbols are as well.""" 784 raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,)) 785 786 def step (self, symbol): 787 """Execute an automaton transition using the given symbol. 788 789 @param symbol: A symbol from the alphabet of the automaton's 790 language. This is a Python value that should be accepted by 791 the L{SymbolMatch_mixin.match} method of a L{State.symbol}. 792 It is not a L{Symbol} instance. 793 794 @return: The new configuration resulting from the step. 795 796 @raises AutomatonStepError: L{UnrecognizedSymbolError} 797 when no transition compatible with C{symbol} is available, and 798 L{NondeterministicSymbolError} if C{symbol} admits multiple 799 transitions and the subclass does not support 800 non-deterministic steps (see L{MultiConfiguration}). 801 802 @warning: If the step entered or left a sub-automaton the 803 return value will not be the configuration that was used to 804 execute the step. The proper pattern for using this method 805 is:: 806 807 cfg = cfg.step(sym) 808 809 """ 810 raise NotImplementedError('%s.step' % (type(self).__name__,)) 811 812class Configuration (Configuration_ABC): 813 """The state of an L{Automaton} in execution. 814 815 This combines a state node of the automaton with a set of counter 816 values.""" 817 818 __state = None 819 def __get_state (self): 820 """The state of the configuration. 821 822 This is C{None} to indicate an initial state, or one of the underlying automaton's states.""" 823 return self.__state 824 def _set_state (self, state, is_layer_change): 825 """Internal state transition interface. 826 827 @param state: the new destination state 828 829 @param is_layer_change: C{True} iff the transition inducing 830 the state change involves a layer change. 831 """ 832 833 # If the new state and old state are the same, the layer 834 # change has no effect (we're probably leaving a 835 # subconfiguration, and we want to keep the current set of 836 # sub-automata.) 837 if state == self.__state: 838 return 839 840 # Otherwise, discard any unprocessed automata in the former 841 # state, set the state, and if the new state has subautomata 842 # create a set holding them so they can be processed. 843 if is_layer_change: 844 self.__subConfiguration = None 845 self.__subAutomata = None 846 self.__state = state 847 if is_layer_change and (state.subAutomata is not None): 848 assert self.__subAutomata is None 849 self.__subAutomata = list(state.subAutomata) 850 state = property(__get_state) 851 852 __counterValues = None 853 """The values of the counters. 854 855 This is a map from the CounterCondition instances of the 856 underlying automaton to integer values.""" 857 def _get_counterValues (self): 858 return self.__counterValues 859 860 __automaton = None 861 def __get_automaton (self): 862 return self.__automaton 863 automaton = property(__get_automaton) 864 865 __subConfiguration = None 866 def __get_subConfiguration (self): 867 """Reference to configuration being executed in a sub-automaton. 868 869 C{None} if no sub-automaton is active, else a reference to a 870 configuration that is being executed in a sub-automaton. 871 872 Sub-configurations are used to match sub-terms in an 873 L{unordered catenation<All>} term. A configuration may have 874 at most one sub-configuration at a time, and the configuration 875 will be removed and possibly replaced when the term being 876 processed completes.""" 877 return self.__subConfiguration 878 subConfiguration = property(__get_subConfiguration) 879 880 __superConfiguration = None 881 def __get_superConfiguration (self): 882 """Reference to the configuration for which this is a 883 sub-configuration. 884 885 C{None} if no super-automaton is active, else a reference to a 886 configuration that is being executed in a super-automaton. 887 888 The super-configuration relation persists for the lifetime of 889 the configuration.""" 890 return self.__superConfiguration 891 superConfiguration = property(__get_superConfiguration) 892 893 __subAutomata = None 894 def __get_subAutomata (self): 895 """A set of automata that must be satisfied before the current state can complete. 896 897 This is used in unordered catenation. Each sub-automaton 898 represents a term in the catenation. When the configuration 899 enters a state with sub-automata, a set containing references 900 to those automata is assigned to this attribute. 901 Subsequently, until all automata in the state are satisfied, 902 transitions can only occur within an active sub-automaton, out 903 of the active sub-automaton if it is in an accepting state, 904 and into a new sub-automaton if no sub-automaton is active. 905 """ 906 return self.__subAutomata 907 def _set_subAutomata (self, automata): 908 self.__subAutomata = list(automata) 909 subAutomata = property(__get_subAutomata) 910 911 def makeLeaveAutomatonTransition (self): 912 """Create a transition back to the containing configuration. 913 914 This is done when a configuration is in an accepting state and 915 there are candidate transitions to other states that must be 916 considered. The transition does not consume a symbol.""" 917 assert self.__superConfiguration is not None 918 return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration) 919 920 def leaveAutomaton (self, sub_configuration): 921 """Execute steps to leave a sub-automaton. 922 923 @param sub_configuration: The configuration associated with 924 the automata that has completed. 925 926 @return: C{self}""" 927 assert sub_configuration.__superConfiguration == self 928 self.__subConfiguration = None 929 return self 930 931 def enterAutomaton (self, automaton): 932 """Execute steps to enter a new automaton. 933 934 The new automaton is removed from the set of remaining 935 automata for the current state, and a new configuration 936 created. No transition is made in that new configuration. 937 938 @param automaton: The automaton to be entered 939 940 @return: The configuration that executes the new automaton as 941 a sub-configuration of C{self}.""" 942 assert self.__subConfiguration is None 943 assert self.__subAutomata is not None 944 self.__subAutomata.remove(automaton) 945 self.__subConfiguration = Configuration(automaton) 946 self.__subConfiguration.__superConfiguration = self 947 return self.__subConfiguration 948 949 def satisfies (self, transition): 950 return UpdateInstruction.Satisfies(self.__counterValues, transition.updateInstructions) 951 952 def reset (self): 953 fac = self.__automaton 954 self.__state = None 955 self.__counterValues = dict(zip(fac.counterConditions, len(fac.counterConditions) * (1,))) 956 self.__subConfiguration = None 957 self.__subAutomata = None 958 959 def candidateTransitions (self, symbol=None): 960 """Return list of viable transitions on C{symbol} 961 962 The transitions that are structurally permitted from this 963 state, in order, filtering out those transitions where the 964 update instruction is not satisfied by the configuration 965 counter values and optionally those for which the symbol does 966 not match. 967 968 @param symbol: A symbol through which a transition from this 969 state is intended. A value of C{None} indicates that the set 970 of transitions should ignore the symbol; candidates are still 971 filtered based on the counter state of the configuration. 972 973 @return: A list of L{Transition} instances permitted from the 974 current configuration. If C{symbol} is not C{None}, 975 transitions that would not accept the symbol are excluded. 976 Any transition that would require an unsatisfied counter 977 update is also excluded. Non-deterministic automata may 978 result in a lits with multiple members. """ 979 980 fac = self.__automaton 981 transitions = [] 982 if symbol is None: 983 match_filter = lambda _xit: True 984 else: 985 match_filter = lambda _xit: _xit.consumingState().match(symbol) 986 update_filter = lambda _xit: _xit.satisfiedBy(self) 987 988 if self.__state is None: 989 # Special-case the initial entry to the topmost configuration 990 transitions.extend(fac.initialTransitions) 991 elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting(): 992 # If there's an active subconfiguration that is not in an 993 # accepting state, we can't do anything at this level. 994 pass 995 else: 996 # Normally include transitions at this level, but in some 997 # cases they are not permitted. 998 include_local = True 999 if self.__subAutomata: 1000 # Disallow transitions in this level if there are 1001 # subautomata that require symbols before a transition 1002 # out of this node is allowed. 1003 (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata) 1004 transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial)) 1005 if include_local: 1006 # Transitions within this layer 1007 for xit in filter(update_filter, self.__state.transitionSet): 1008 if xit.consumingState() is not None: 1009 transitions.append(xit) 1010 else: 1011 # The transition did not consume a symbol, so we have to find 1012 # one that does, from among the subautomata of the destination. 1013 # We do not care if the destination is nullable; alternatives 1014 # to it are already being handled with different transitions. 1015 (_, sub_initial) = xit.destination.subAutomataInitialTransitions() 1016 transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial)) 1017 if (self.__superConfiguration is not None) and self.isAccepting(): 1018 # Transitions that leave this automaton 1019 lxit = self.makeLeaveAutomatonTransition() 1020 supxit = self.__superConfiguration.candidateTransitions(symbol) 1021 transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit)) 1022 assert len(frozenset(transitions)) == len(transitions) 1023 return list(filter(update_filter, filter(match_filter, transitions))) 1024 1025 def acceptableSymbols (self): 1026 return [ _xit.consumedSymbol() for _xit in self.candidateTransitions()] 1027 1028 def step (self, symbol): 1029 transitions = self.candidateTransitions(symbol) 1030 if 0 == len(transitions): 1031 raise UnrecognizedSymbolError(self, symbol) 1032 if 1 < len(transitions): 1033 raise NondeterministicSymbolError(self, symbol) 1034 return transitions[0].apply(self) 1035 1036 def isInitial (self): 1037 """Return C{True} iff no transitions have ever been made.""" 1038 return self.__state is None 1039 1040 def isAccepting (self): 1041 """Return C{True} iff the automaton is in an accepting state.""" 1042 if self.__state is not None: 1043 # Any active sub-configuration must be accepting 1044 if (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting(): 1045 return False 1046 # Any unprocessed sub-automata must be nullable 1047 if self.__subAutomata is not None: 1048 if not functools.reduce(operator.and_, map(lambda _sa: _sa.nullable, self.__subAutomata), True): 1049 return False 1050 # This state must be accepting 1051 return self.__state.isAccepting(self.__counterValues) 1052 # Accepting without any action requires nullable automaton 1053 return self.__automaton.nullable 1054 1055 def __init__ (self, automaton, super_configuration=None): 1056 self.__automaton = automaton 1057 self.__superConfiguration = super_configuration 1058 self.reset() 1059 1060 def clone (self, clone_map=None): 1061 """Clone a configuration and its descendents. 1062 1063 This is used for parallel execution where a configuration has 1064 multiple candidate transitions and must follow all of them. 1065 It clones the entire chain of configurations through 1066 multiple layers. 1067 1068 @param clone_map: Optional map into which the translation from 1069 the original configuration object to the corresponding cloned 1070 configuration object can be reconstructed, e.g. when applying 1071 a transition that includes automata exits referencing 1072 superconfigurations from the original configuration. 1073 """ 1074 if clone_map is None: 1075 clone_map = {} 1076 root = self 1077 while root.__superConfiguration is not None: 1078 root = root.__superConfiguration 1079 root = root._clone(clone_map, None) 1080 return clone_map.get(self) 1081 1082 def _clone (self, clone_map, super_configuration): 1083 assert not self in clone_map 1084 other = type(self)(self.__automaton) 1085 clone_map[self] = other 1086 other.__state = self.__state 1087 other.__counterValues = self.__counterValues.copy() 1088 other.__superConfiguration = super_configuration 1089 if self.__subAutomata is not None: 1090 other.__subAutomata = self.__subAutomata[:] 1091 if self.__subConfiguration: 1092 other.__subConfiguration = self.__subConfiguration._clone(clone_map, other) 1093 return other 1094 1095 def __str__ (self): 1096 return '%s: %s' % (self.__state, ' ; '.join([ '%s=%u' % (_c,_v) for (_c,_v) in six.iteritems(self.__counterValues)])) 1097 1098class MultiConfiguration (Configuration_ABC): 1099 """Support parallel execution of state machine. 1100 1101 This holds a set of configurations, and executes each transition 1102 on each one. Configurations which fail to accept a step are 1103 silently dropped; only if this results in no remaining 1104 configurations will L{UnrecognizedSymbolError} be raised. If a 1105 step admits multiple valid transitions, a configuration is added 1106 for each one. 1107 1108 See L{pyxb.binding.content.AutomatonConfiguration} for an 1109 alternative solution which holds actions associated with the 1110 transition until the non-determinism is resolved.""" 1111 1112 __configurations = None 1113 1114 def __init__ (self, configuration): 1115 self.__configurations = [ configuration] 1116 1117 def acceptableSymbols (self): 1118 acceptable = [] 1119 for cfg in self.__configurations: 1120 acceptable.extend(cfg.acceptableSymbols()) 1121 return acceptable 1122 1123 def step (self, symbol): 1124 next_configs = [] 1125 for cfg in self.__configurations: 1126 transitions = cfg.candidateTransitions(symbol) 1127 if 0 == len(transitions): 1128 pass 1129 elif 1 == len(transitions): 1130 next_configs.append(transitions[0].apply(cfg)) 1131 else: 1132 for transition in transitions: 1133 clone_map = {} 1134 ccfg = cfg.clone(clone_map) 1135 next_configs.append(transition.apply(ccfg, clone_map)) 1136 if 0 == len(next_configs): 1137 raise UnrecognizedSymbolError(self, symbol) 1138 assert len(frozenset(next_configs)) == len(next_configs) 1139 self.__configurations = next_configs 1140 return self 1141 1142 def acceptingConfigurations (self): 1143 """Return the set of configurations that are in an accepting state. 1144 1145 Note that some of the configurations may be within a 1146 sub-automaton; their presence in the return value is because 1147 the root configuration is also accepting.""" 1148 accepting = [] 1149 for cfg in self.__configurations: 1150 rcfg = cfg 1151 # Rule out configurations that are accepting within their 1152 # automaton, but not in the containing automaton. 1153 while rcfg.superConfiguration is not None: 1154 rcfg = rcfg.superConfiguration 1155 if rcfg.isAccepting(): 1156 accepting.append(cfg) 1157 return accepting 1158 1159class Automaton (object): 1160 """Representation of a Finite Automaton with Counters. 1161 1162 This has all the standard FAC elements, plus links to other 1163 states/automata as required to support the nested automata 1164 construct used for matching unordered catenation terms.""" 1165 __states = None 1166 def __get_states (self): 1167 """The set of L{State}s in the automaton. 1168 1169 These correspond essentially to marked symbols in the original 1170 regular expression, or L{element 1171 declarations<pyxb.xmlschema.structures.ElementDeclaration>} in 1172 an XML schema. 1173 1174 @note: These are conceptually a set and are stored that way. 1175 When an L{Automaton} is constructed the incoming states should 1176 be passed as a list so the calculated initial transitions are 1177 executed in a deterministic order.""" 1178 return self.__states 1179 states = property(__get_states) 1180 1181 __counterConditions = None 1182 def __get_counterConditions (self): 1183 """The set of L{CounterCondition}s in the automaton. 1184 1185 These are marked positions in the regular expression, or 1186 L{particles<pyxb.xmlschema.structures.Particle>} in an XML 1187 schema, paired with their occurrence constraints.""" 1188 return self.__counterConditions 1189 counterConditions = property(__get_counterConditions) 1190 1191 __nullable = None 1192 def __get_nullable (self): 1193 """C{True} iff the automaton accepts the empty string.""" 1194 return self.__nullable 1195 nullable = property(__get_nullable) 1196 1197 __initialTransitions = None 1198 def __get_initialTransitions (self): 1199 """The set of transitions that may be made to enter the automaton. 1200 1201 These are full transitions, including chains into subautomata 1202 if an initial state represents a node with sub-automata. 1203 1204 @note: As with L{State.transitionSet}, the set is represented 1205 as a list to preserve priority when resolving 1206 non-deterministic matches.""" 1207 return self.__initialTransitions 1208 initialTransitions = property(__get_initialTransitions) 1209 1210 __containingState = None 1211 def __get_containingState (self): 1212 """The L{State} instance for which this is a sub-automaton. 1213 1214 C{None} if this is not a sub-automaton.""" 1215 return self.__containingState 1216 containingState = property(__get_containingState) 1217 1218 __finalStates = None 1219 def __get_finalStates (self): 1220 """The set of L{State} members which can terminate a match.""" 1221 return self.__finalStates 1222 finalStates = property(__get_finalStates) 1223 1224 def __init__ (self, states, counter_conditions, nullable, containing_state=None): 1225 self.__states = frozenset(states) 1226 for st in self.__states: 1227 st._set_automaton(self) 1228 self.__counterConditions = frozenset(counter_conditions) 1229 self.__nullable = nullable 1230 self.__containingState = containing_state 1231 xit = [] 1232 fnl = set() 1233 # Iterate over states, not self.__states, in case the input was a list. 1234 # This way we preserve the priority for initial transitions. 1235 for s in states: 1236 if s.isInitial: 1237 xit.extend(s.automatonEntryTransitions) 1238 if s.finalUpdate is not None: 1239 fnl.add(s) 1240 self.__initialTransitions = xit 1241 self.__finalStates = frozenset(fnl) 1242 1243 def newConfiguration (self): 1244 """Return a new L{Configuration} instance for this automaton.""" 1245 return Configuration(self) 1246 1247 def __str__ (self): 1248 rv = [] 1249 rv.append('sigma = %s' % (' '.join(map(lambda _s: str(_s.symbol), self.__states)))) 1250 rv.append('states = %s' % (' '.join(map(str, self.__states)))) 1251 for s in self.__states: 1252 if s.subAutomata is not None: 1253 for i in xrange(len(s.subAutomata)): 1254 rv.append('SA %s.%u is %x:\n ' % (str(s), i, id(s.subAutomata[i])) + '\n '.join(str(s.subAutomata[i]).split('\n'))) 1255 rv.append('counters = %s' % (' '.join(map(str, self.__counterConditions)))) 1256 rv.append('initial = %s' % (' ; '.join([ '%s on %s' % (_s, _s.symbol) for _s in filter(lambda _s: _s.isInitial, self.__states)]))) 1257 rv.append('initial transitions:\n%s' % ('\n'.join(map(str, self.initialTransitions)))) 1258 rv.append('States:') 1259 for s in self.__states: 1260 rv.append('%s: %s' % (s, s._facText())) 1261 return '\n'.join(rv) 1262 1263class Node (object): 1264 """Abstract class for any node in the term tree. 1265 1266 In its original form a B{position} (C{pos}) is a tuple of 1267 non-negative integers comprising a path from a node in the term 1268 tree. It identifies a node in the tree. After the FAC has been 1269 constructed, only positions that are leaf nodes in the term tree 1270 remain, and the corresponding symbol value (Python instance) is 1271 used as the position. 1272 1273 An B{update instruction} (C{psi}) is a map from positions to 1274 either L{Node.RESET} or L{Node.INCREMENT}. It identifies actions 1275 to be taken on the counter states corresponding to the positions 1276 in its domain. 1277 1278 A B{transition} is a pair containing a position and an update 1279 instruction. It identifies a potential next node in the state and 1280 the updates that are to be performed if the transition is taken. 1281 1282 A B{follow value} is a map from a position to a set of transitions 1283 that may originate from the pos. This set is represented as a 1284 Python list since update instructions are dicts and cannot be 1285 hashed. 1286 """ 1287 1288 _Precedence = None 1289 """An integral value used for parenthesizing expressions. 1290 1291 A subterm that has a precedence less than that of its containing 1292 term must be enclosed in parentheses when forming a text 1293 expression representing the containing term.""" 1294 1295 RESET = False 1296 """An arbitrary value representing reset of a counter.""" 1297 1298 INCREMENT = True 1299 """An arbitrary value representing increment of a counter.""" 1300 1301 def __init__ (self, **kw): 1302 """Create a FAC term-tree node. 1303 1304 @keyword metadata: Any application-specific metadata retained in 1305 the term tree for transfer to the resulting automaton.""" 1306 self.__metadata = kw.get('metadata') 1307 1308 def clone (self, *args, **kw): 1309 """Create a deep copy of the node. 1310 1311 All term-tree--related attributes and properties are replaced 1312 with deep clones. Other attributes are preserved. 1313 1314 @param args: A tuple of arguments to be passed to the instance 1315 constructor. 1316 1317 @param kw: A dict of keywords to be passed to the instance 1318 constructor. 1319 1320 @note: Subclasses should pre-extend this method to augment the 1321 C{args} and C{kw} parameters as necessary to match the 1322 expectations of the C{__init__} method of the class being 1323 cloned.""" 1324 kw.setdefault('metadata', self.metadata) 1325 return type(self)(*args, **kw) 1326 1327 __metadata = None 1328 def __get_metadata (self): 1329 """Application-specific metadata provided during construction.""" 1330 return self.__metadata 1331 metadata = property(__get_metadata) 1332 1333 __first = None 1334 def __get_first (self): 1335 """The I{first} set for the node. 1336 1337 This is the set of positions leading to symbols that can 1338 appear first in a string matched by an execution starting at 1339 the node.""" 1340 if self.__first is None: 1341 self.__first = frozenset(self._first()) 1342 return self.__first 1343 first = property(__get_first) 1344 1345 def _first (self): 1346 """Abstract method that defines L{first} for the subclass. 1347 1348 The return value should be an iterable of tuples of integers 1349 denoting paths from this node through the term tree to a 1350 symbol.""" 1351 raise NotImplementedError('%s.first' % (type(self).__name__,)) 1352 1353 __last = None 1354 def __get_last (self): 1355 """The I{last} set for the node. 1356 1357 This is the set of positions leading to symbols that can 1358 appear last in a string matched by an execution starting at 1359 the node.""" 1360 if self.__last is None: 1361 self.__last = frozenset(self._last()) 1362 return self.__last 1363 last = property(__get_last) 1364 1365 def _last (self): 1366 """Abstract method that defines L{last} for the subclass. 1367 1368 The return value should be an iterable of tuples of integers 1369 denoting paths from this node through the term tree to a 1370 symbol.""" 1371 raise NotImplementedError('%s.last' % (type(self).__name__,)) 1372 1373 __nullable = None 1374 def __get_nullable (self): 1375 """C{True} iff the empty string is accepted by this node.""" 1376 if self.__nullable is None: 1377 self.__nullable = self._nullable() 1378 return self.__nullable 1379 nullable = property(__get_nullable) 1380 1381 def _nullable (self): 1382 """Abstract method that defines L{nullable} for the subclass. 1383 1384 The return value should be C{True} or C{False}.""" 1385 raise NotImplementedError('%s.nullable' % (type(self).__name__,)) 1386 1387 __follow = None 1388 def __get_follow (self): 1389 """The I{follow} map for the node.""" 1390 if self.__follow is None: 1391 self.__follow = self._follow() 1392 return self.__follow 1393 follow = property(__get_follow) 1394 1395 def _follow (self): 1396 """Abstract method that defines L{follow} for the subclass. 1397 1398 The return value should be a map from tuples of integers (positions) 1399 to a list of transitions, where a transition is a position and 1400 an update instruction.""" 1401 raise NotImplementedError('%s.follow' % (type(self).__name__,)) 1402 1403 def reset (self): 1404 """Reset any term-tree state associated with the node. 1405 1406 Any change to the structure of the term tree in which the node 1407 appears invalidates memoized first/follow sets and related 1408 information. This method clears all that data so it can be 1409 recalculated. It does not clear the L{metadata} link, or any 1410 existing structural data.""" 1411 self.__first = None 1412 self.__last = None 1413 self.__nullable = None 1414 self.__follow = None 1415 self.__counterPositions = None 1416 1417 def walkTermTree (self, pre, post, arg): 1418 """Utility function for term tree processing. 1419 1420 @param pre: a callable that, unless C{None}, is invoked at 1421 each node C{n} with parameters C{n}, C{pos}, and C{arg}, where 1422 C{pos} is the tuple of integers identifying the path from the 1423 node at on which this method was invoked to the node being 1424 processed. The invocation occurs before processing any 1425 subordinate nodes. 1426 1427 @param post: as with C{pre} but invocation occurs after 1428 processing any subordinate nodes. 1429 1430 @param arg: a value passed to invocations of C{pre} and 1431 C{post}.""" 1432 self._walkTermTree((), pre, post, arg) 1433 1434 def _walkTermTree (self, position, pre, post, arg): 1435 """Abstract method implementing L{walkTermTree} for the subclass.""" 1436 raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,)) 1437 1438 __posNodeMap = None 1439 def __get_posNodeMap (self): 1440 """A map from positions to nodes in the term tree.""" 1441 if self.__posNodeMap is None: 1442 pnm = { } 1443 self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm) 1444 self.__posNodeMap = pnm 1445 return self.__posNodeMap 1446 posNodeMap = property(__get_posNodeMap) 1447 1448 __nodePosMap = None 1449 def __get_nodePosMap (self): 1450 """A map from nodes to their position in the term tree.""" 1451 if self.__nodePosMap is None: 1452 npm = {} 1453 for (p,n) in six.iteritems(self.posNodeMap): 1454 npm[n] = p 1455 self.__nodePosMap = npm 1456 return self.__nodePosMap 1457 nodePosMap = property(__get_nodePosMap) 1458 1459 @classmethod 1460 def _PosConcatPosSet (cls, pos, pos_set): 1461 """Implement definition 11.1 in B{HOV09}.""" 1462 return frozenset([ pos + _mp for _mp in pos_set ]) 1463 1464 @classmethod 1465 def _PosConcatUpdateInstruction (cls, pos, psi): 1466 """Implement definition 11.2 in B{HOV09}""" 1467 rv = {} 1468 for (q, v) in six.iteritems(psi): 1469 rv[pos + q] = v 1470 return rv 1471 1472 @classmethod 1473 def _PosConcatTransitionSet (cls, pos, transition_set): 1474 """Implement definition 11.3 in B{HOV09}""" 1475 ts = [] 1476 for (q, psi) in transition_set: 1477 ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) )) 1478 return ts 1479 1480 def __resetAndValidate (self, node, pos, visited_nodes): 1481 if node in visited_nodes: 1482 raise InvalidTermTreeError(self, node) 1483 node.reset() 1484 visited_nodes.add(node) 1485 1486 def buildAutomaton (self, state_ctor=State, ctr_cond_ctor=CounterCondition, containing_state=None): 1487 # Validate that the term tree is in fact a tree. A DAG does 1488 # not work. If the tree had cycles, the automaton build 1489 # wouldn't even return. 1490 self.walkTermTree(self.__resetAndValidate, None, set()) 1491 1492 counter_map = { } 1493 for pos in self.counterPositions: 1494 nci = self.posNodeMap.get(pos) 1495 assert isinstance(nci, NumericalConstraint) 1496 assert nci not in counter_map 1497 counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata) 1498 counters = list(six.itervalues(counter_map)) 1499 1500 state_map = { } 1501 for pos in six.iterkeys(self.follow): 1502 sym = self.posNodeMap.get(pos) 1503 assert isinstance(sym, LeafNode) 1504 assert sym not in state_map 1505 1506 # The state may be an initial state if it is in the first 1507 # set for the root of the term tree. 1508 is_initial = pos in self.first 1509 1510 # The state may be a final state if it is nullable or is 1511 # in the last set of the term tree. 1512 final_update = None 1513 if (() == pos and sym.nullable) or (pos in self.last): 1514 # Acceptance is further constrained by the counter 1515 # values satisfying an update rule that would reset 1516 # all counters that are relevant at the state. 1517 final_update = set() 1518 for nci in map(counter_map.get, self.counterSubPositions(pos)): 1519 final_update.add(UpdateInstruction(nci, False)) 1520 state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All)) 1521 if isinstance(sym, All): 1522 state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms)) 1523 states = list(six.itervalues(state_map)) 1524 1525 for (spos, transition_set) in six.iteritems(self.follow): 1526 src = state_map[spos] 1527 phi = [] 1528 for (dpos, psi) in transition_set: 1529 dst = state_map[dpos] 1530 uiset = set() 1531 for (counter, action) in six.iteritems(psi): 1532 uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action)) 1533 phi.append(Transition(dst, uiset)) 1534 src._set_transitionSet(phi) 1535 1536 return Automaton(states, counters, self.nullable, containing_state=containing_state) 1537 1538 __counterPositions = None 1539 def __get_counterPositions (self): 1540 """Implement definition 13.1 from B{HOV09}. 1541 1542 The return value is the set of all positions leading to 1543 L{NumericalConstraint} nodes for which either the minimum 1544 value is not 1 or the maximum value is not unbounded.""" 1545 if self.__counterPositions is None: 1546 cpos = [] 1547 self.walkTermTree(lambda _n,_p,_a: \ 1548 isinstance(_n, NumericalConstraint) \ 1549 and ((1 != _n.min) \ 1550 or (_n.max is not None)) \ 1551 and _a.append(_p), 1552 None, cpos) 1553 self.__counterPositions = frozenset(cpos) 1554 return self.__counterPositions 1555 counterPositions = property(__get_counterPositions) 1556 1557 def counterSubPositions (self, pos): 1558 """Implement definition 13.2 from B{HOV09}. 1559 1560 This is the subset of L{counterPositions} that occur along the 1561 path to C{pos}.""" 1562 rv = set() 1563 for cpos in self.counterPositions: 1564 if cpos == pos[:len(cpos)]: 1565 rv.add(cpos) 1566 return frozenset(rv) 1567 1568 def _facToString (self): 1569 """Obtain a description of the FAC in text format. 1570 1571 This is a diagnostic tool, returning first, last, and follow 1572 maps using positions.""" 1573 rv = [] 1574 rv.append('r\t= %s' % (str(self),)) 1575 states = list(six.iterkeys(self.follow)) 1576 rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states))))) 1577 rv.append('first(r)\t= %s' % (' '.join(map(str, self.first)))) 1578 rv.append('last(r)\t= %s' % (' '.join(map(str, self.last)))) 1579 rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions)))) 1580 for pos in self.first: 1581 rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos))) 1582 for spos in states: 1583 for (dpos, transition_set) in self.follow[spos]: 1584 dst = self.posNodeMap[dpos] 1585 uv = [] 1586 for (c, u) in six.iteritems(transition_set): 1587 uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c))) 1588 rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv))) 1589 return '\n'.join(rv) 1590 1591class MultiTermNode (Node): 1592 """Intermediary for nodes that have multiple child nodes.""" 1593 1594 __terms = None 1595 def __get_terms (self): 1596 """The set of subordinate terms of the current node.""" 1597 return self.__terms 1598 terms = property(__get_terms) 1599 1600 def __init__ (self, *terms, **kw): 1601 """Term that collects an ordered sequence of terms. 1602 1603 The terms are provided as arguments. All must be instances of 1604 a subclass of L{Node}.""" 1605 super(MultiTermNode, self).__init__(**kw) 1606 self.__terms = terms 1607 1608 def clone (self): 1609 cterms = map(lambda _s: _s.clone(), self.__terms) 1610 return super(MultiTermNode, self).clone(*cterms) 1611 1612 def _walkTermTree (self, position, pre, post, arg): 1613 if pre is not None: 1614 pre(self, position, arg) 1615 for c in xrange(len(self.__terms)): 1616 self.__terms[c]._walkTermTree(position + (c,), pre, post, arg) 1617 if post is not None: 1618 post(self, position, arg) 1619 1620class LeafNode (Node): 1621 """Intermediary for nodes that have no child nodes.""" 1622 def _first (self): 1623 return [()] 1624 def _last (self): 1625 return [()] 1626 def _nullable (self): 1627 return False 1628 def _follow (self): 1629 return { (): frozenset() } 1630 1631 def _walkTermTree (self, position, pre, post, arg): 1632 if pre is not None: 1633 pre(self, position, arg) 1634 if post is not None: 1635 post(self, position, arg) 1636 1637class NumericalConstraint (Node): 1638 """A term with a numeric range constraint. 1639 1640 This corresponds to a "particle" in the XML Schema content model.""" 1641 1642 _Precedence = -1 1643 1644 __min = None 1645 def __get_min (self): 1646 return self.__min 1647 min = property(__get_min) 1648 1649 __max = None 1650 def __get_max (self): 1651 return self.__max 1652 max = property(__get_max) 1653 1654 __term = None 1655 def __get_term (self): 1656 return self.__term 1657 term = property(__get_term) 1658 1659 def __init__ (self, term, min=0, max=1, **kw): 1660 """Term with a numerical constraint. 1661 1662 @param term: A term, the number of appearances of which is 1663 constrained in this term. 1664 @type term: L{Node} 1665 1666 @keyword min: The minimum number of occurrences of C{term}. 1667 The value must be non-negative. 1668 1669 @keyword max: The maximum number of occurrences of C{term}. 1670 The value must be positive (in which case it must also be no 1671 smaller than C{min}), or C{None} to indicate an unbounded 1672 number of occurrences.""" 1673 super(NumericalConstraint, self).__init__(**kw) 1674 self.__term = term 1675 self.__min = min 1676 self.__max = max 1677 1678 def clone (self): 1679 return super(NumericalConstraint, self).clone(self.__term, self.__min, self.__max) 1680 1681 def _first (self): 1682 return [ (0,) + _fc for _fc in self.__term.first ] 1683 1684 def _last (self): 1685 return [ (0,) + _lc for _lc in self.__term.last ] 1686 1687 def _nullable (self): 1688 return (0 == self.__min) or self.__term.nullable 1689 1690 def _follow (self): 1691 rv = {} 1692 pp = (0,) 1693 last_r1 = set(self.__term.last) 1694 for (q, transition_set) in six.iteritems(self.__term.follow): 1695 rv[pp+q] = self._PosConcatTransitionSet(pp, transition_set) 1696 if q in last_r1: 1697 last_r1.remove(q) 1698 for sq1 in self.__term.first: 1699 q1 = pp+sq1 1700 psi = {} 1701 for p1 in self.__term.counterSubPositions(q): 1702 psi[pp+p1] = self.RESET 1703 if (1 != self.min) or (self.max is not None): 1704 psi[()] = self.INCREMENT 1705 rv[pp+q].append((q1, psi)) 1706 assert not last_r1 1707 return rv 1708 1709 def _walkTermTree (self, position, pre, post, arg): 1710 if pre is not None: 1711 pre(self, position, arg) 1712 self.__term._walkTermTree(position + (0,), pre, post, arg) 1713 if post is not None: 1714 post(self, position, arg) 1715 1716 def __str__ (self): 1717 rv = str(self.__term) 1718 if self.__term._Precedence < self._Precedence: 1719 rv = '(' + rv + ')' 1720 rv += '^(%u,' % (self.__min,) 1721 if self.__max is not None: 1722 rv += '%u' % (self.__max) 1723 return rv + ')' 1724 1725class Choice (MultiTermNode): 1726 """A term that may be any one of a set of terms. 1727 1728 This term matches if any one of its contained terms matches.""" 1729 1730 _Precedence = -3 1731 1732 def __init__ (self, *terms, **kw): 1733 """Term that selects one of a set of terms. 1734 1735 The terms are provided as arguments. All must be instances of 1736 a subclass of L{Node}.""" 1737 super(Choice, self).__init__(*terms, **kw) 1738 1739 def _first (self): 1740 rv = set() 1741 for c in xrange(len(self.terms)): 1742 rv.update([ (c,) + _fc for _fc in self.terms[c].first]) 1743 return rv 1744 1745 def _last (self): 1746 rv = set() 1747 for c in xrange(len(self.terms)): 1748 rv.update([ (c,) + _lc for _lc in self.terms[c].last]) 1749 return rv 1750 1751 def _nullable (self): 1752 for t in self.terms: 1753 if t.nullable: 1754 return True 1755 return False 1756 1757 def _follow (self): 1758 rv = {} 1759 for c in xrange(len(self.terms)): 1760 for (q, transition_set) in six.iteritems(self.terms[c].follow): 1761 pp = (c,) 1762 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set) 1763 return rv 1764 1765 def __str__ (self): 1766 elts = [] 1767 for t in self.terms: 1768 if t._Precedence < self._Precedence: 1769 elts.append('(' + str(t) + ')') 1770 else: 1771 elts.append(str(t)) 1772 return '+'.join(elts) 1773 1774class Sequence (MultiTermNode): 1775 """A term that is an ordered sequence of terms.""" 1776 1777 _Precedence = -2 1778 1779 def __init__ (self, *terms, **kw): 1780 """Term that collects an ordered sequence of terms. 1781 1782 The terms are provided as arguments. All must be instances of 1783 a subclass of L{Node}.""" 1784 super(Sequence, self).__init__(*terms, **kw) 1785 1786 def _first (self): 1787 rv = set() 1788 c = 0 1789 while c < len(self.terms): 1790 t = self.terms[c] 1791 rv.update([ (c,) + _fc for _fc in t.first]) 1792 if not t.nullable: 1793 break 1794 c += 1 1795 return rv 1796 1797 def _last (self): 1798 rv = set() 1799 c = len(self.terms) - 1 1800 while 0 <= c: 1801 t = self.terms[c] 1802 rv.update([ (c,) + _lc for _lc in t.last]) 1803 if not t.nullable: 1804 break 1805 c -= 1 1806 return rv 1807 1808 def _nullable (self): 1809 for t in self.terms: 1810 if not t.nullable: 1811 return False 1812 return True 1813 1814 def _follow (self): 1815 rv = {} 1816 for c in xrange(len(self.terms)): 1817 pp = (c,) 1818 for (q, transition_set) in six.iteritems(self.terms[c].follow): 1819 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set) 1820 for c in xrange(len(self.terms)-1): 1821 t = self.terms[c] 1822 pp = (c,) 1823 # Link from the last of one term to the first of the next term. 1824 # Repeat while the destination term is nullable and there are 1825 # successor terms. 1826 for q in t.last: 1827 psi = {} 1828 for p1 in t.counterSubPositions(q): 1829 psi[pp + p1] = self.RESET 1830 nc = c 1831 while nc+1 < len(self.terms): 1832 nc += 1 1833 nt = self.terms[nc] 1834 for sq1 in nt.first: 1835 q1 = (nc,) + sq1 1836 rv[pp+q].append((q1, psi)) 1837 if not nt.nullable: 1838 break 1839 return rv 1840 1841 def __str__ (self): 1842 elts = [] 1843 for t in self.terms: 1844 if t._Precedence < self._Precedence: 1845 elts.append('(' + str(t) + ')') 1846 else: 1847 elts.append(str(t)) 1848 return '.'.join(elts) 1849 1850class All (MultiTermNode, LeafNode): 1851 """A term that is an unordered sequence of terms. 1852 1853 Note that the inheritance structure for this node is unusual. It 1854 has multiple children when it is treated as a term tree, but is 1855 considered a leaf node when constructing an automaton. 1856 """ 1857 1858 _Precedence = 0 1859 1860 def __init__ (self, *terms, **kw): 1861 """Term that collects an unordered sequence of terms. 1862 1863 The terms are provided as arguments. All must be instances of 1864 a subclass of L{Node}.""" 1865 super(All, self).__init__(*terms, **kw) 1866 1867 def _nullable (self): 1868 for t in self.terms: 1869 if not t.nullable: 1870 return False 1871 return True 1872 1873 @classmethod 1874 def CreateTermTree (cls, *terms): 1875 """Create a term tree that implements unordered catenation of 1876 the terms. 1877 1878 This expansion results in a standard choice/sequence term 1879 tree, at the cost of quadratic state expansion because terms 1880 are L{cloned<Node.clone>} as required to satisfy the tree 1881 requirements of the term tree. 1882 1883 @param terms: The tuple of terms that are elements of an 1884 accepted sequence. 1885 1886 @return: A term tree comprising a choice between sequences 1887 that connect each term to the unordered catenation of the 1888 remaining terms.""" 1889 if 1 == len(terms): 1890 return terms[0] 1891 disjuncts = [] 1892 for i in xrange(len(terms)): 1893 n = terms[i] 1894 rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:]) 1895 disjuncts.append(Sequence(n, cls.CreateTermTree(*rem))) 1896 return Choice(*disjuncts) 1897 1898 def __str__ (self): 1899 return six.u('&(') + six.u(',').join([str(_t) for _t in self.terms]) + ')' 1900 1901class Symbol (LeafNode): 1902 """A leaf term that is a symbol. 1903 1904 The symbol is represented by the L{metadata} field.""" 1905 1906 _Precedence = 0 1907 1908 def __init__ (self, symbol, **kw): 1909 kw['metadata'] = symbol 1910 super(Symbol, self).__init__(**kw) 1911 1912 def clone (self): 1913 return super(Symbol, self).clone(self.metadata) 1914 1915 def __str__ (self): 1916 return str(self.metadata) 1917