1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Copyright 1999-2004 The Apache Software Foundation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.xs.models;
22 
23 import com.sun.org.apache.xerces.internal.xni.QName;
24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
25 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet;
26 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
27 import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler;
28 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
29 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
30 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
31 import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl;
32 import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException;
33 import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints;
34 
35 import java.util.Vector;
36 import java.util.ArrayList;
37 import java.util.HashMap;
38 
39 /**
40  * DFAContentModel is the implementation of XSCMValidator that does
41  * all of the non-trivial element content validation. This class does
42  * the conversion from the regular expression to the DFA that
43  * it then uses in its validation algorithm.
44  *
45  * @xerces.internal
46  *
47  * @author Neil Graham, IBM
48  * @version $Id: XSDFACM.java,v 1.9 2010/08/06 23:49:43 joehw Exp $
49  */
50 public class XSDFACM
51     implements XSCMValidator {
52 
53     //
54     // Constants
55     //
56     private static final boolean DEBUG = false;
57 
58     // special strings
59 
60     // debugging
61 
62     /** Set to true to debug content model validation. */
63     private static final boolean DEBUG_VALIDATE_CONTENT = false;
64 
65     //
66     // Data
67     //
68 
69     /**
70      * This is the map of unique input symbol elements to indices into
71      * each state's per-input symbol transition table entry. This is part
72      * of the built DFA information that must be kept around to do the
73      * actual validation.  Note tat since either XSElementDecl or XSParticleDecl object
74      * can live here, we've got to use an Object.
75      */
76     private Object fElemMap[] = null;
77 
78     /**
79      * This is a map of whether the element map contains information
80      * related to ANY models.
81      */
82     private int fElemMapType[] = null;
83 
84     /**
85      * id of the unique input symbol
86      */
87     private int fElemMapId[] = null;
88 
89     /** The element map size. */
90     private int fElemMapSize = 0;
91 
92     /**
93      * This is an array of booleans, one per state (there are
94      * fTransTableSize states in the DFA) that indicates whether that
95      * state is a final state.
96      */
97     private boolean fFinalStateFlags[] = null;
98 
99     /**
100      * The list of follow positions for each NFA position (i.e. for each
101      * non-epsilon leaf node.) This is only used during the building of
102      * the DFA, and is let go afterwards.
103      */
104     private CMStateSet fFollowList[] = null;
105 
106     /**
107      * This is the head node of our intermediate representation. It is
108      * only non-null during the building of the DFA (just so that it
109      * does not have to be passed all around.) Once the DFA is built,
110      * this is no longer required so its nulled out.
111      */
112     private CMNode fHeadNode = null;
113 
114     /**
115      * The count of leaf nodes. This is an important number that set some
116      * limits on the sizes of data structures in the DFA process.
117      */
118     private int fLeafCount = 0;
119 
120     /**
121      * An array of non-epsilon leaf nodes, which is used during the DFA
122      * build operation, then dropped.
123      */
124     private XSCMLeaf fLeafList[] = null;
125 
126     /** Array mapping ANY types to the leaf list. */
127     private int fLeafListType[] = null;
128 
129     /**
130      * This is the transition table that is the main by product of all
131      * of the effort here. It is an array of arrays of ints. The first
132      * dimension is the number of states we end up with in the DFA. The
133      * second dimensions is the number of unique elements in the content
134      * model (fElemMapSize). Each entry in the second dimension indicates
135      * the new state given that input for the first dimension's start
136      * state.
137      * <p>
138      * The fElemMap array handles mapping from element indexes to
139      * positions in the second dimension of the transition table.
140      */
141     private int fTransTable[][] = null;
142     /**
143      * Array containing occurence information for looping states
144      * which use counters to check minOccurs/maxOccurs.
145      */
146     private Occurence [] fCountingStates = null;
147     static final class Occurence {
148         final int minOccurs;
149         final int maxOccurs;
150         final int elemIndex;
Occurence(XSCMRepeatingLeaf leaf, int elemIndex)151         public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) {
152             minOccurs = leaf.getMinOccurs();
153             maxOccurs = leaf.getMaxOccurs();
154             this.elemIndex = elemIndex;
155         }
toString()156         public String toString() {
157             return "minOccurs=" + minOccurs
158                 + ";maxOccurs=" +
159                 ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED)
160                         ? Integer.toString(maxOccurs) : "unbounded");
161         }
162     }
163 
164     /**
165      * The number of valid entries in the transition table, and in the other
166      * related tables such as fFinalStateFlags.
167      */
168     private int fTransTableSize = 0;
169 
170     /**
171      * Array of counters for all the for elements (or wildcards)
172      * of the form a{n,m} where n > 1 and m <= unbounded. Used
173      * to count the a's to later check against n and m. Counter
174      * set to -1 if element (or wildcard) not optimized by
175      * constant space algorithm.
176      */
177     private int fElemMapCounter[];
178 
179     /**
180      * Array of lower bounds for all the for elements (or wildcards)
181      * of the form a{n,m} where n > 1 and m <= unbounded. This array
182      * stores the n's for those elements (or wildcards) for which
183      * the constant space algorithm applies (or -1 otherwise).
184      */
185     private int fElemMapCounterLowerBound[];
186 
187     /**
188      * Array of upper bounds for all the for elements (or wildcards)
189      * of the form a{n,m} where n > 1 and m <= unbounded. This array
190      * stores the n's for those elements (or wildcards) for which
191      * the constant space algorithm applies, or -1 if algorithm does
192      * not apply or m = unbounded.
193      */
194     private int fElemMapCounterUpperBound[];   // -1 if no upper bound
195 
196     // temp variables
197 
198     //
199     // Constructors
200     //
201 
202     /**
203      * Constructs a DFA content model.
204      *
205      * @param syntaxTree    The syntax tree of the content model.
206      * @param leafCount     The number of leaves.
207      *
208      * @exception RuntimeException Thrown if DFA can't be built.
209      */
210 
XSDFACM(CMNode syntaxTree, int leafCount)211    public XSDFACM(CMNode syntaxTree, int leafCount) {
212 
213         // Store away our index and pools in members
214         fLeafCount = leafCount;
215 
216         //
217         //  Create some string pool indexes that represent the names of some
218         //  magical nodes in the syntax tree.
219         //  (already done in static initialization...
220         //
221 
222         //
223         //  Ok, so lets grind through the building of the DFA. This method
224         //  handles the high level logic of the algorithm, but it uses a
225         //  number of helper classes to do its thing.
226         //
227         //  In order to avoid having hundreds of references to the error and
228         //  string handlers around, this guy and all of his helper classes
229         //  just throw a simple exception and we then pass it along.
230         //
231 
232         if(DEBUG_VALIDATE_CONTENT) {
233             XSDFACM.time -= System.currentTimeMillis();
234         }
235 
236         buildDFA(syntaxTree);
237 
238         if(DEBUG_VALIDATE_CONTENT) {
239             XSDFACM.time += System.currentTimeMillis();
240             System.out.println("DFA build: " + XSDFACM.time + "ms");
241         }
242     }
243 
244     private static long time = 0;
245 
246     //
247     // XSCMValidator methods
248     //
249 
250     /**
251      * check whether the given state is one of the final states
252      *
253      * @param state       the state to check
254      *
255      * @return whether it's a final state
256      */
isFinalState(int state)257     public boolean isFinalState (int state) {
258         return (state < 0)? false :
259             fFinalStateFlags[state];
260     }
261 
262     /**
263      * one transition only
264      *
265      * @param curElem The current element's QName
266      * @param state stack to store the previous state
267      * @param subGroupHandler the substitution group handler
268      *
269      * @return  null if transition is invalid; otherwise the Object corresponding to the
270      *      XSElementDecl or XSWildcardDecl identified.  Also, the
271      *      state array will be modified to include the new state; this so that the validator can
272      *      store it away.
273      *
274      * @exception RuntimeException thrown on error
275      */
oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler)276     public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) {
277         int curState = state[0];
278 
279         if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) {
280             // there was an error last time; so just go find correct Object in fElemmMap.
281             // ... after resetting state[0].
282             if(curState == XSCMValidator.FIRST_ERROR)
283                 state[0] = XSCMValidator.SUBSEQUENT_ERROR;
284 
285             return findMatchingDecl(curElem, subGroupHandler);
286         }
287 
288         int nextState = 0;
289         int elemIndex = 0;
290         Object matchingDecl = null;
291 
292         for (; elemIndex < fElemMapSize; elemIndex++) {
293             nextState = fTransTable[curState][elemIndex];
294             if (nextState == -1)
295                 continue;
296             int type = fElemMapType[elemIndex] ;
297             if (type == XSParticleDecl.PARTICLE_ELEMENT) {
298                 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
299                 if (matchingDecl != null) {
300                     // Increment counter if constant space algorithm applies
301                     if (fElemMapCounter[elemIndex] >= 0) {
302                         fElemMapCounter[elemIndex]++;
303                     }
304                     break;
305                 }
306             }
307             else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
308                 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
309                     matchingDecl = fElemMap[elemIndex];
310                     // Increment counter if constant space algorithm applies
311                     if (fElemMapCounter[elemIndex] >= 0) {
312                         fElemMapCounter[elemIndex]++;
313                     }
314                     break;
315                 }
316             }
317         }
318 
319         // if we still can't find a match, set the state to first_error
320         // and return null
321         if (elemIndex == fElemMapSize) {
322             state[1] = state[0];
323             state[0] = XSCMValidator.FIRST_ERROR;
324             return findMatchingDecl(curElem, subGroupHandler);
325         }
326 
327         if (fCountingStates != null) {
328             Occurence o = fCountingStates[curState];
329             if (o != null) {
330                 if (curState == nextState) {
331                     if (++state[2] > o.maxOccurs &&
332                         o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
333                         // It's likely that we looped too many times on the current state
334                         // however it's possible that we actually matched another particle
335                         // which allows the same name.
336                         //
337                         // Consider:
338                         //
339                         // <xs:sequence>
340                         //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
341                         //  <xs:element name="foo" type="xs:string" fixed="bar"/>
342                         // </xs:sequence>
343                         //
344                         // and
345                         //
346                         // <xs:sequence>
347                         //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
348                         //  <xs:any namespace="##any" processContents="skip"/>
349                         // </xs:sequence>
350                         //
351                         // In the DFA there will be two transitions from the current state which
352                         // allow "foo". Note that this is not a UPA violation. The ambiguity of which
353                         // transition to take is resolved by the current value of the counter. Since
354                         // we've already seen enough instances of the first "foo" perhaps there is
355                         // another element declaration or wildcard deeper in the element map which
356                         // matches.
357                         return findMatchingDecl(curElem, state, subGroupHandler, elemIndex);
358                     }
359                 }
360                 else if (state[2] < o.minOccurs) {
361                     // not enough loops on the current state.
362                     state[1] = state[0];
363                     state[0] = XSCMValidator.FIRST_ERROR;
364                     return findMatchingDecl(curElem, subGroupHandler);
365                 }
366                 else {
367                     // Exiting a counting state. If we're entering a new
368                     // counting state, reset the counter.
369                     o = fCountingStates[nextState];
370                     if (o != null) {
371                         state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
372                     }
373                 }
374             }
375             else {
376                 o = fCountingStates[nextState];
377                 if (o != null) {
378                     // Entering a new counting state. Reset the counter.
379                     // If we've already seen one instance of the looping
380                     // particle set the counter to 1, otherwise set it
381                     // to 0.
382                     state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
383                 }
384             }
385         }
386 
387         state[0] = nextState;
388         return matchingDecl;
389     } // oneTransition(QName, int[], SubstitutionGroupHandler):  Object
390 
findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler)391     Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
392         Object matchingDecl = null;
393 
394         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
395             int type = fElemMapType[elemIndex] ;
396             if (type == XSParticleDecl.PARTICLE_ELEMENT) {
397                 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
398                 if (matchingDecl != null) {
399                     return matchingDecl;
400                 }
401             }
402             else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
403                 if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri))
404                     return fElemMap[elemIndex];
405             }
406         }
407 
408         return null;
409     } // findMatchingDecl(QName, SubstitutionGroupHandler): Object
410 
findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex)411     Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) {
412 
413         int curState = state[0];
414         int nextState = 0;
415         Object matchingDecl = null;
416 
417         while (++elemIndex < fElemMapSize) {
418             nextState = fTransTable[curState][elemIndex];
419             if (nextState == -1)
420                 continue;
421             int type = fElemMapType[elemIndex] ;
422             if (type == XSParticleDecl.PARTICLE_ELEMENT) {
423                 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
424                 if (matchingDecl != null) {
425                     break;
426                 }
427             }
428             else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
429                 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
430                     matchingDecl = fElemMap[elemIndex];
431                     break;
432                 }
433             }
434         }
435 
436         // if we still can't find a match, set the state to FIRST_ERROR and return null
437         if (elemIndex == fElemMapSize) {
438             state[1] = state[0];
439             state[0] = XSCMValidator.FIRST_ERROR;
440             return findMatchingDecl(curElem, subGroupHandler);
441         }
442 
443         // if we found a match, set the next state and reset the
444         // counter if the next state is a counting state.
445         state[0] = nextState;
446         final Occurence o = fCountingStates[nextState];
447         if (o != null) {
448             state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
449         }
450         return matchingDecl;
451     } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object
452 
453     // This method returns the start states of the content model.
startContentModel()454     public int[] startContentModel() {
455         // Clear all constant space algorithm counters in use
456         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
457             if (fElemMapCounter[elemIndex] != -1) {
458                 fElemMapCounter[elemIndex] = 0;
459             }
460         }
461         // [0] : the current state
462         // [1] : if [0] is an error state then the
463         //       last valid state before the error
464         // [2] : occurence counter for counting states
465         return new int [3];
466     } // startContentModel():int[]
467 
468     // this method returns whether the last state was a valid final state
endContentModel(int[] state)469     public boolean endContentModel(int[] state) {
470         final int curState = state[0];
471         if (fFinalStateFlags[curState]) {
472             if (fCountingStates != null) {
473                 Occurence o = fCountingStates[curState];
474                 if (o != null && state[2] < o.minOccurs) {
475                     // not enough loops on the current state to be considered final.
476                     return false;
477                 }
478             }
479             return true;
480         }
481         return false;
482     } // endContentModel(int[]):  boolean
483 
484     // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
485     // but we can put it back later.
486 
487     //
488     // Private methods
489     //
490 
491     /**
492      * Builds the internal DFA transition table from the given syntax tree.
493      *
494      * @param syntaxTree The syntax tree.
495      *
496      * @exception RuntimeException Thrown if DFA cannot be built.
497      */
buildDFA(CMNode syntaxTree)498     private void buildDFA(CMNode syntaxTree) {
499         //
500         //  The first step we need to take is to rewrite the content model
501         //  using our CMNode objects, and in the process get rid of any
502         //  repetition short cuts, converting them into '*' style repetitions
503         //  or getting rid of repetitions altogether.
504         //
505         //  The conversions done are:
506         //
507         //  x+ -> (x|x*)
508         //  x? -> (x|epsilon)
509         //
510         //  This is a relatively complex scenario. What is happening is that
511         //  we create a top level binary node of which the special EOC value
512         //  is set as the right side node. The the left side is set to the
513         //  rewritten syntax tree. The source is the original content model
514         //  info from the decl pool. The rewrite is done by buildSyntaxTree()
515         //  which recurses the decl pool's content of the element and builds
516         //  a new tree in the process.
517         //
518         //  Note that, during this operation, we set each non-epsilon leaf
519         //  node's DFA state position and count the number of such leafs, which
520         //  is left in the fLeafCount member.
521         //
522         //  The nodeTmp object is passed in just as a temp node to use during
523         //  the recursion. Otherwise, we'd have to create a new node on every
524         //  level of recursion, which would be piggy in Java (as is everything
525         //  for that matter.)
526         //
527 
528         /* MODIFIED (Jan, 2001)
529          *
530          * Use following rules.
531          *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
532          *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
533          *
534          * The same computation of follow as x* is applied to x+
535          *
536          * The modification drastically reduces computation time of
537          * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
538          */
539 
540         //
541         //  And handle specially the EOC node, which also must be numbered
542         //  and counted as a non-epsilon leaf node. It could not be handled
543         //  in the above tree build because it was created before all that
544         //  started. We save the EOC position since its used during the DFA
545         //  building loop.
546         //
547         int EOCPos = fLeafCount;
548         XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
549         fHeadNode = new XSCMBinOp(
550             XSModelGroupImpl.MODELGROUP_SEQUENCE,
551             syntaxTree,
552             nodeEOC
553         );
554 
555         //
556         //  Ok, so now we have to iterate the new tree and do a little more
557         //  work now that we know the leaf count. One thing we need to do is
558         //  to calculate the first and last position sets of each node. This
559         //  is cached away in each of the nodes.
560         //
561         //  Along the way we also set the leaf count in each node as the
562         //  maximum state count. They must know this in order to create their
563         //  first/last pos sets.
564         //
565         //  We also need to build an array of references to the non-epsilon
566         //  leaf nodes. Since we iterate it in the same way as before, this
567         //  will put them in the array according to their position values.
568         //
569         fLeafList = new XSCMLeaf[fLeafCount];
570         fLeafListType = new int[fLeafCount];
571         postTreeBuildInit(fHeadNode);
572 
573         //
574         //  And, moving onward... We now need to build the follow position
575         //  sets for all the nodes. So we allocate an array of state sets,
576         //  one for each leaf node (i.e. each DFA position.)
577         //
578         fFollowList = new CMStateSet[fLeafCount];
579         for (int index = 0; index < fLeafCount; index++)
580             fFollowList[index] = new CMStateSet(fLeafCount);
581         calcFollowList(fHeadNode);
582         //
583         //  And finally the big push... Now we build the DFA using all the
584         //  states and the tree we've built up. First we set up the various
585         //  data structures we are going to use while we do this.
586         //
587         //  First of all we need an array of unique element names in our
588         //  content model. For each transition table entry, we need a set of
589         //  contiguous indices to represent the transitions for a particular
590         //  input element. So we need to a zero based range of indexes that
591         //  map to element types. This element map provides that mapping.
592         //
593         fElemMap = new Object[fLeafCount];
594         fElemMapType = new int[fLeafCount];
595         fElemMapId = new int[fLeafCount];
596 
597         fElemMapCounter = new int[fLeafCount];
598         fElemMapCounterLowerBound = new int[fLeafCount];
599         fElemMapCounterUpperBound = new int[fLeafCount];
600 
601         fElemMapSize = 0;
602         Occurence [] elemOccurenceMap = null;
603 
604         for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
605             // optimization from Henry Zongaro:
606             //fElemMap[outIndex] = new Object ();
607             fElemMap[outIndex] = null;
608 
609             int inIndex = 0;
610             final int id = fLeafList[outIndex].getParticleId();
611             for (; inIndex < fElemMapSize; inIndex++) {
612                 if (id == fElemMapId[inIndex])
613                     break;
614             }
615 
616             // If it was not in the list, then add it, if not the EOC node
617             if (inIndex == fElemMapSize) {
618                 XSCMLeaf leaf = fLeafList[outIndex];
619                 fElemMap[fElemMapSize] = leaf.getLeaf();
620                 if (leaf instanceof XSCMRepeatingLeaf) {
621                     if (elemOccurenceMap == null) {
622                         elemOccurenceMap = new Occurence[fLeafCount];
623                     }
624                     elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize);
625                 }
626 
627                 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
628                 fElemMapId[fElemMapSize] = id;
629 
630                 // Init counters and bounds for a{n,m} algorithm
631                 int[] bounds = (int[]) leaf.getUserData();
632                 if (bounds != null) {
633                     fElemMapCounter[fElemMapSize] = 0;
634                     fElemMapCounterLowerBound[fElemMapSize] = bounds[0];
635                     fElemMapCounterUpperBound[fElemMapSize] = bounds[1];
636                 } else {
637                     fElemMapCounter[fElemMapSize] = -1;
638                     fElemMapCounterLowerBound[fElemMapSize] = -1;
639                     fElemMapCounterUpperBound[fElemMapSize] = -1;
640                 }
641 
642                 fElemMapSize++;
643             }
644         }
645 
646         // the last entry in the element map must be the EOC element.
647         // remove it from the map.
648         if (DEBUG) {
649             if (fElemMapId[fElemMapSize-1] != -1)
650                 System.err.println("interal error in DFA: last element is not EOC.");
651         }
652         fElemMapSize--;
653 
654         /***
655          * Optimization(Jan, 2001); We sort fLeafList according to
656          * elemIndex which is *uniquely* associated to each leaf.
657          * We are *assuming* that each element appears in at least one leaf.
658          **/
659 
660         int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
661         int fSortCount = 0;
662 
663         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
664             final int id = fElemMapId[elemIndex];
665             for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
666                 if (id == fLeafList[leafIndex].getParticleId())
667                     fLeafSorter[fSortCount++] = leafIndex;
668             }
669             fLeafSorter[fSortCount++] = -1;
670         }
671 
672         /* Optimization(Jan, 2001) */
673 
674         //
675         //  Next lets create some arrays, some that hold transient
676         //  information during the DFA build and some that are permament.
677         //  These are kind of sticky since we cannot know how big they will
678         //  get, but we don't want to use any Java collections because of
679         //  performance.
680         //
681         //  Basically they will probably be about fLeafCount*2 on average,
682         //  but can be as large as 2^(fLeafCount*2), worst case. So we start
683         //  with fLeafCount*4 as a middle ground. This will be very unlikely
684         //  to ever have to expand, though it if does, the overhead will be
685         //  somewhat ugly.
686         //
687         int curArraySize = fLeafCount * 4;
688         CMStateSet[] statesToDo = new CMStateSet[curArraySize];
689         fFinalStateFlags = new boolean[curArraySize];
690         fTransTable = new int[curArraySize][];
691 
692         //
693         //  Ok we start with the initial set as the first pos set of the
694         //  head node (which is the seq node that holds the content model
695         //  and the EOC node.)
696         //
697         CMStateSet setT = fHeadNode.firstPos();
698 
699         //
700         //  Init our two state flags. Basically the unmarked state counter
701         //  is always chasing the current state counter. When it catches up,
702         //  that means we made a pass through that did not add any new states
703         //  to the lists, at which time we are done. We could have used a
704         //  expanding array of flags which we used to mark off states as we
705         //  complete them, but this is easier though less readable maybe.
706         //
707         int unmarkedState = 0;
708         int curState = 0;
709 
710         //
711         //  Init the first transition table entry, and put the initial state
712         //  into the states to do list, then bump the current state.
713         //
714         fTransTable[curState] = makeDefStateList();
715         statesToDo[curState] = setT;
716         curState++;
717 
718         /* Optimization(Jan, 2001); This is faster for
719          * a large content model such as, "(t001+|t002+|.... |t500+)".
720          */
721 
722         HashMap stateTable = new HashMap();
723 
724         /* Optimization(Jan, 2001) */
725 
726         //
727         //  Ok, almost done with the algorithm... We now enter the
728         //  loop where we go until the states done counter catches up with
729         //  the states to do counter.
730         //
731         while (unmarkedState < curState) {
732             //
733             //  Get the first unmarked state out of the list of states to do.
734             //  And get the associated transition table entry.
735             //
736             setT = statesToDo[unmarkedState];
737             int[] transEntry = fTransTable[unmarkedState];
738 
739             // Mark this one final if it contains the EOC state
740             fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
741 
742             // Bump up the unmarked state count, marking this state done
743             unmarkedState++;
744 
745             // Loop through each possible input symbol in the element map
746             CMStateSet newSet = null;
747             /* Optimization(Jan, 2001) */
748             int sorterIndex = 0;
749             /* Optimization(Jan, 2001) */
750             for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
751                 //
752                 //  Build up a set of states which is the union of all of
753                 //  the follow sets of DFA positions that are in the current
754                 //  state. If we gave away the new set last time through then
755                 //  create a new one. Otherwise, zero out the existing one.
756                 //
757                 if (newSet == null)
758                     newSet = new CMStateSet(fLeafCount);
759                 else
760                     newSet.zeroBits();
761 
762                 /* Optimization(Jan, 2001) */
763                 int leafIndex = fLeafSorter[sorterIndex++];
764 
765                 while (leafIndex != -1) {
766                     // If this leaf index (DFA position) is in the current set...
767                     if (setT.getBit(leafIndex)) {
768                         //
769                         //  If this leaf is the current input symbol, then we
770                         //  want to add its follow list to the set of states to
771                         //  transition to from the current state.
772                         //
773                         newSet.union(fFollowList[leafIndex]);
774                     }
775 
776                    leafIndex = fLeafSorter[sorterIndex++];
777                 }
778                 /* Optimization(Jan, 2001) */
779 
780                 //
781                 //  If this new set is not empty, then see if its in the list
782                 //  of states to do. If not, then add it.
783                 //
784                 if (!newSet.isEmpty()) {
785                     //
786                     //  Search the 'states to do' list to see if this new
787                     //  state set is already in there.
788                     //
789 
790                     /* Optimization(Jan, 2001) */
791                     Integer stateObj = (Integer)stateTable.get(newSet);
792                     int stateIndex = (stateObj == null ? curState : stateObj.intValue());
793                     /* Optimization(Jan, 2001) */
794 
795                     // If we did not find it, then add it
796                     if (stateIndex == curState) {
797                         //
798                         //  Put this new state into the states to do and init
799                         //  a new entry at the same index in the transition
800                         //  table.
801                         //
802                         statesToDo[curState] = newSet;
803                         fTransTable[curState] = makeDefStateList();
804 
805                         /* Optimization(Jan, 2001) */
806                         stateTable.put(newSet, new Integer(curState));
807                         /* Optimization(Jan, 2001) */
808 
809                         // We now have a new state to do so bump the count
810                         curState++;
811 
812                         //
813                         //  Null out the new set to indicate we adopted it.
814                         //  This will cause the creation of a new set on the
815                         //  next time around the loop.
816                         //
817                         newSet = null;
818                     }
819 
820                     //
821                     //  Now set this state in the transition table's entry
822                     //  for this element (using its index), with the DFA
823                     //  state we will move to from the current state when we
824                     //  see this input element.
825                     //
826                     transEntry[elemIndex] = stateIndex;
827 
828                     // Expand the arrays if we're full
829                     if (curState == curArraySize) {
830                         //
831                         //  Yikes, we overflowed the initial array size, so
832                         //  we've got to expand all of these arrays. So adjust
833                         //  up the size by 50% and allocate new arrays.
834                         //
835                         final int newSize = (int)(curArraySize * 1.5);
836                         CMStateSet[] newToDo = new CMStateSet[newSize];
837                         boolean[] newFinalFlags = new boolean[newSize];
838                         int[][] newTransTable = new int[newSize][];
839 
840                         // Copy over all of the existing content
841                         System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
842                         System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
843                         System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
844 
845                         // Store the new array size
846                         curArraySize = newSize;
847                         statesToDo = newToDo;
848                         fFinalStateFlags = newFinalFlags;
849                         fTransTable = newTransTable;
850                     }
851                 }
852             }
853         }
854 
855         //
856         // Fill in the occurence information for each looping state
857         // if we're using counters.
858         //
859         if (elemOccurenceMap != null) {
860             fCountingStates = new Occurence[curState];
861             for (int i = 0; i < curState; ++i) {
862                 int [] transitions = fTransTable[i];
863                 for (int j = 0; j < transitions.length; ++j) {
864                     if (i == transitions[j]) {
865                         fCountingStates[i] = elemOccurenceMap[j];
866                         break;
867                     }
868                 }
869             }
870         }
871 
872         //
873         //  And now we can say bye bye to the temp representation since we've
874         //  built the DFA.
875         //
876         if (DEBUG_VALIDATE_CONTENT)
877             dumpTree(fHeadNode, 0);
878         fHeadNode = null;
879         fLeafList = null;
880         fFollowList = null;
881         fLeafListType = null;
882         fElemMapId = null;
883     }
884 
885     /**
886      * Calculates the follow list of the current node.
887      *
888      * @param nodeCur The curent node.
889      *
890      * @exception RuntimeException Thrown if follow list cannot be calculated.
891      */
calcFollowList(CMNode nodeCur)892     private void calcFollowList(CMNode nodeCur) {
893         // Recurse as required
894         if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
895             // Recurse only
896             calcFollowList(((XSCMBinOp)nodeCur).getLeft());
897             calcFollowList(((XSCMBinOp)nodeCur).getRight());
898         }
899          else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
900             // Recurse first
901             calcFollowList(((XSCMBinOp)nodeCur).getLeft());
902             calcFollowList(((XSCMBinOp)nodeCur).getRight());
903 
904             //
905             //  Now handle our level. We use our left child's last pos
906             //  set and our right child's first pos set, so go ahead and
907             //  get them ahead of time.
908             //
909             final CMStateSet last  = ((XSCMBinOp)nodeCur).getLeft().lastPos();
910             final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos();
911 
912             //
913             //  Now, for every position which is in our left child's last set
914             //  add all of the states in our right child's first set to the
915             //  follow set for that position.
916             //
917             for (int index = 0; index < fLeafCount; index++) {
918                 if (last.getBit(index))
919                     fFollowList[index].union(first);
920             }
921         }
922          else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
923         || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
924             // Recurse first
925             calcFollowList(((XSCMUniOp)nodeCur).getChild());
926 
927             //
928             //  Now handle our level. We use our own first and last position
929             //  sets, so get them up front.
930             //
931             final CMStateSet first = nodeCur.firstPos();
932             final CMStateSet last  = nodeCur.lastPos();
933 
934             //
935             //  For every position which is in our last position set, add all
936             //  of our first position states to the follow set for that
937             //  position.
938             //
939             for (int index = 0; index < fLeafCount; index++) {
940                 if (last.getBit(index))
941                     fFollowList[index].union(first);
942             }
943         }
944 
945         else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
946             // Recurse only
947             calcFollowList(((XSCMUniOp)nodeCur).getChild());
948         }
949 
950     }
951 
952     /**
953      * Dumps the tree of the current node to standard output.
954      *
955      * @param nodeCur The current node.
956      * @param level   The maximum levels to output.
957      *
958      * @exception RuntimeException Thrown on error.
959      */
dumpTree(CMNode nodeCur, int level)960     private void dumpTree(CMNode nodeCur, int level) {
961         for (int index = 0; index < level; index++)
962             System.out.print("   ");
963 
964         int type = nodeCur.type();
965 
966         switch(type ) {
967 
968         case XSModelGroupImpl.MODELGROUP_CHOICE:
969         case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
970             if (type == XSModelGroupImpl.MODELGROUP_CHOICE)
971                 System.out.print("Choice Node ");
972             else
973                 System.out.print("Seq Node ");
974 
975             if (nodeCur.isNullable())
976                 System.out.print("Nullable ");
977 
978             System.out.print("firstPos=");
979             System.out.print(nodeCur.firstPos().toString());
980             System.out.print(" lastPos=");
981             System.out.println(nodeCur.lastPos().toString());
982 
983             dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1);
984             dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1);
985             break;
986         }
987         case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
988         case XSParticleDecl.PARTICLE_ONE_OR_MORE:
989         case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
990             System.out.print("Rep Node ");
991 
992             if (nodeCur.isNullable())
993                 System.out.print("Nullable ");
994 
995             System.out.print("firstPos=");
996             System.out.print(nodeCur.firstPos().toString());
997             System.out.print(" lastPos=");
998             System.out.println(nodeCur.lastPos().toString());
999 
1000             dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1);
1001             break;
1002         }
1003         case XSParticleDecl.PARTICLE_ELEMENT: {
1004             System.out.print
1005             (
1006                 "Leaf: (pos="
1007                 + ((XSCMLeaf)nodeCur).getPosition()
1008                 + "), "
1009                 + "(elemIndex="
1010                 + ((XSCMLeaf)nodeCur).getLeaf()
1011                 + ") "
1012             );
1013 
1014             if (nodeCur.isNullable())
1015                 System.out.print(" Nullable ");
1016 
1017             System.out.print("firstPos=");
1018             System.out.print(nodeCur.firstPos().toString());
1019             System.out.print(" lastPos=");
1020             System.out.println(nodeCur.lastPos().toString());
1021             break;
1022         }
1023         case XSParticleDecl.PARTICLE_WILDCARD:
1024               System.out.print("Any Node: ");
1025 
1026             System.out.print("firstPos=");
1027             System.out.print(nodeCur.firstPos().toString());
1028             System.out.print(" lastPos=");
1029             System.out.println(nodeCur.lastPos().toString());
1030             break;
1031         default: {
1032             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
1033         }
1034         }
1035 
1036     }
1037 
1038 
1039     /**
1040      * -1 is used to represent bad transitions in the transition table
1041      * entry for each state. So each entry is initialized to an all -1
1042      * array. This method creates a new entry and initializes it.
1043      */
makeDefStateList()1044     private int[] makeDefStateList()
1045     {
1046         int[] retArray = new int[fElemMapSize];
1047         for (int index = 0; index < fElemMapSize; index++)
1048             retArray[index] = -1;
1049         return retArray;
1050     }
1051 
1052     /** Post tree build initialization. */
postTreeBuildInit(CMNode nodeCur)1053     private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException {
1054         // Set the maximum states on this node
1055         nodeCur.setMaxStates(fLeafCount);
1056 
1057         XSCMLeaf leaf = null;
1058         int pos = 0;
1059         // Recurse as required
1060         if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
1061             leaf = (XSCMLeaf)nodeCur;
1062             pos = leaf.getPosition();
1063             fLeafList[pos] = leaf;
1064             fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
1065         }
1066         else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) ||
1067                  (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
1068             postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft());
1069             postTreeBuildInit(((XSCMBinOp)nodeCur).getRight());
1070         }
1071         else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
1072                  nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
1073                  nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
1074             postTreeBuildInit(((XSCMUniOp)nodeCur).getChild());
1075         }
1076         else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
1077             //  Put this node in the leaf list at the current index if its
1078             //  a non-epsilon leaf.
1079             leaf = (XSCMLeaf)nodeCur;
1080             pos = leaf.getPosition();
1081             fLeafList[pos] = leaf;
1082             fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
1083         }
1084         else {
1085             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
1086         }
1087     }
1088 
1089     /**
1090      * check whether this content violates UPA constraint.
1091      *
1092      * @param subGroupHandler the substitution group handler
1093      * @return true if this content model contains other or list wildcard
1094      */
checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler)1095     public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException {
1096         // Unique Particle Attribution
1097         // store the conflict results between any two elements in fElemMap
1098         // 0: not compared; -1: no conflict; 1: conflict
1099         // initialize the conflict table (all 0 initially)
1100         byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize];
1101 
1102         // for each state, check whether it has overlap transitions
1103         for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) {
1104             for (int j = 0; j < fElemMapSize; j++) {
1105                 for (int k = j+1; k < fElemMapSize; k++) {
1106                     if (fTransTable[i][j] != -1 &&
1107                         fTransTable[i][k] != -1) {
1108                         if (conflictTable[j][k] == 0) {
1109                             if (XSConstraints.overlapUPA
1110                                     (fElemMap[j], fElemMap[k],
1111                                             subGroupHandler)) {
1112                                 if (fCountingStates != null) {
1113                                     Occurence o = fCountingStates[i];
1114                                     // If "i" is a counting state and exactly one of the transitions
1115                                     // loops back to "i" then the two particles do not overlap if
1116                                     // minOccurs == maxOccurs.
1117                                     if (o != null &&
1118                                         fTransTable[i][j] == i ^ fTransTable[i][k] == i &&
1119                                         o.minOccurs == o.maxOccurs) {
1120                                         conflictTable[j][k] = (byte) -1;
1121                                         continue;
1122                                     }
1123                                 }
1124                                 conflictTable[j][k] = (byte) 1;
1125                             }
1126                             else {
1127                                 conflictTable[j][k] = (byte) -1;
1128                             }
1129                         }
1130                     }
1131                 }
1132             }
1133         }
1134 
1135         // report all errors
1136         for (int i = 0; i < fElemMapSize; i++) {
1137             for (int j = 0; j < fElemMapSize; j++) {
1138                 if (conflictTable[i][j] == 1) {
1139                     //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
1140                     //                                             fElemMap[j].toString()});
1141                     // REVISIT: do we want to report all errors? or just one?
1142                     throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(),
1143                                                                               fElemMap[j].toString()});
1144                 }
1145             }
1146         }
1147 
1148         // if there is a other or list wildcard, we need to check this CM
1149         // again, if this grammar is cached.
1150         for (int i = 0; i < fElemMapSize; i++) {
1151             if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) {
1152                 XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i];
1153                 if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST ||
1154                     wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
1155                     return true;
1156                 }
1157             }
1158         }
1159 
1160         return false;
1161     }
1162 
1163     /**
1164      * Check which elements are valid to appear at this point. This method also
1165      * works if the state is in error, in which case it returns what should
1166      * have been seen.
1167      *
1168      * @param state  the current state
1169      * @return       a Vector whose entries are instances of
1170      *               either XSWildcardDecl or XSElementDecl.
1171      */
whatCanGoHere(int[] state)1172     public Vector whatCanGoHere(int[] state) {
1173         int curState = state[0];
1174         if (curState < 0)
1175             curState = state[1];
1176         Occurence o = (fCountingStates != null) ?
1177                 fCountingStates[curState] : null;
1178         int count = state[2];
1179 
1180         Vector ret = new Vector();
1181         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1182             int nextState = fTransTable[curState][elemIndex];
1183             if (nextState != -1) {
1184                 if (o != null) {
1185                     if (curState == nextState) {
1186                         // Do not include transitions which loop back to the
1187                         // current state if we've looped the maximum number
1188                         // of times or greater.
1189                         if (count >= o.maxOccurs &&
1190                             o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
1191                             continue;
1192                         }
1193                     }
1194                     // Do not include transitions which advance past the
1195                     // current state if we have not looped enough times.
1196                     else if (count < o.minOccurs) {
1197                         continue;
1198                     }
1199                 }
1200                 ret.addElement(fElemMap[elemIndex]);
1201             }
1202         }
1203         return ret;
1204     }
1205 
1206     /**
1207      * Used by constant space algorithm for a{n,m} for n > 1 and
1208      * m <= unbounded. Called by a validator if validation of
1209      * countent model succeeds after subsuming a{n,m} to a*
1210      * (or a+) to check the n and m bounds.
1211      * Returns <code>null</code> if validation of bounds is
1212      * successful. Returns a list of strings with error info
1213      * if not. Even entries in list returned are error codes
1214      * (used to look up properties) and odd entries are parameters
1215      * to be passed when formatting error message. Each parameter
1216      * is associated with the error code that preceeds it in
1217      * the list.
1218      */
checkMinMaxBounds()1219     public ArrayList checkMinMaxBounds() {
1220         ArrayList result = null;
1221         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1222             int count = fElemMapCounter[elemIndex];
1223             if (count == -1) {
1224                 continue;
1225             }
1226             final int minOccurs = fElemMapCounterLowerBound[elemIndex];
1227             final int maxOccurs = fElemMapCounterUpperBound[elemIndex];
1228             if (count < minOccurs) {
1229                 if (result == null) result = new ArrayList();
1230                 result.add("cvc-complex-type.2.4.b");
1231                 result.add("{" + fElemMap[elemIndex] + "}");
1232             }
1233             if (maxOccurs != -1 && count > maxOccurs) {
1234                 if (result == null) result = new ArrayList();
1235                 result.add("cvc-complex-type.2.4.e");
1236                 result.add("{" + fElemMap[elemIndex] + "}");
1237             }
1238         }
1239         return result;
1240     }
1241 
1242 } // class DFAContentModel
1243