1 /*
2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.dtd.models;
22 
23 import com.sun.org.apache.xerces.internal.impl.dtd.XMLContentSpec;
24 import com.sun.org.apache.xerces.internal.xni.QName;
25 import java.util.HashMap;
26 import java.util.Map;
27 
28 /**
29 
30  * DFAContentModel is the derivative of ContentModel that does
31  * all of the non-trivial element content validation. This class does
32  * the conversion from the regular expression to the DFA that
33  * it then uses in its validation algorithm.
34  * <p>
35  * <b>Note:</b> Upstream work insures that this class will never see
36  * a content model with PCDATA in it. Any model with PCDATA is 'mixed'
37  * and is handled via the MixedContentModel class since mixed models
38  * are very constrained in form and easily handled via a special case.
39  * This also makes implementation of this class much easier.
40  *
41  * @xerces.internal
42  *
43  * @LastModified: Oct 2017
44  */
45 public class DFAContentModel
46     implements ContentModelValidator {
47 
48     //
49     // Constants
50     //
51     // special strings
52 
53     /** Epsilon string. */
54     private static String fEpsilonString = "<<CMNODE_EPSILON>>";
55 
56     /** End-of-content string. */
57     private static String fEOCString = "<<CMNODE_EOC>>";
58 
59     /** initializing static members **/
60     static {
61         fEpsilonString = fEpsilonString.intern();
62         fEOCString = fEOCString.intern();
63     }
64 
65     // debugging
66 
67     /** Set to true to debug content model validation. */
68     private static final boolean DEBUG_VALIDATE_CONTENT = false;
69 
70     //
71     // Data
72     //
73 
74     /* this is the EquivClassComparator object */
75     //private EquivClassComparator comparator = null;
76 
77     /**
78      * This is the map of unique input symbol elements to indices into
79      * each state's per-input symbol transition table entry. This is part
80      * of the built DFA information that must be kept around to do the
81      * actual validation.
82      */
83     private QName fElemMap[] = null;
84 
85     /**
86      * This is a map of whether the element map contains information
87      * related to ANY models.
88      */
89     private int fElemMapType[] = null;
90 
91     /** The element map size. */
92     private int fElemMapSize = 0;
93 
94     /** Boolean to distinguish Schema Mixed Content */
95     private boolean fMixed;
96 
97     /**
98      * The NFA position of the special EOC (end of content) node. This
99      * is saved away since it's used during the DFA build.
100      */
101     private int fEOCPos = 0;
102 
103 
104     /**
105      * This is an array of booleans, one per state (there are
106      * fTransTableSize states in the DFA) that indicates whether that
107      * state is a final state.
108      */
109     private boolean fFinalStateFlags[] = null;
110 
111     /**
112      * The list of follow positions for each NFA position (i.e. for each
113      * non-epsilon leaf node.) This is only used during the building of
114      * the DFA, and is let go afterwards.
115      */
116     private CMStateSet fFollowList[] = null;
117 
118     /**
119      * This is the head node of our intermediate representation. It is
120      * only non-null during the building of the DFA (just so that it
121      * does not have to be passed all around.) Once the DFA is built,
122      * this is no longer required so its nulled out.
123      */
124     private CMNode fHeadNode = null;
125 
126     /**
127      * The count of leaf nodes. This is an important number that set some
128      * limits on the sizes of data structures in the DFA process.
129      */
130     private int fLeafCount = 0;
131 
132     /**
133      * An array of non-epsilon leaf nodes, which is used during the DFA
134      * build operation, then dropped.
135      */
136     private CMLeaf fLeafList[] = null;
137 
138     /** Array mapping ANY types to the leaf list. */
139     private int fLeafListType[] = null;
140 
141     //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
142 
143     /**
144      * The string pool of our parser session. This is set during construction
145      * and kept around.
146      */
147     //private StringPool fStringPool = null;
148 
149     /**
150      * This is the transition table that is the main by product of all
151      * of the effort here. It is an array of arrays of ints. The first
152      * dimension is the number of states we end up with in the DFA. The
153      * second dimensions is the number of unique elements in the content
154      * model (fElemMapSize). Each entry in the second dimension indicates
155      * the new state given that input for the first dimension's start
156      * state.
157      * <p>
158      * The fElemMap array handles mapping from element indexes to
159      * positions in the second dimension of the transition table.
160      */
161     private int fTransTable[][] = null;
162 
163     /**
164      * The number of valid entries in the transition table, and in the other
165      * related tables such as fFinalStateFlags.
166      */
167     private int fTransTableSize = 0;
168 
169     /**
170      * Flag that indicates that even though we have a "complicated"
171      * content model, it is valid to have no content. In other words,
172      * all parts of the content model are optional. For example:
173      * <pre>
174      *      &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
175      * </pre>
176      */
177     private boolean fEmptyContentIsValid = false;
178 
179     // temp variables
180 
181     /** Temporary qualified name. */
182     private final QName fQName = new QName();
183 
184     //
185     // Constructors
186     //
187 
188 
189     //
190     // Constructors
191     //
192 
193     /**
194      * Constructs a DFA content model.
195      *
196      * @param syntaxTree    The syntax tree of the content model.
197      * @param leafCount     The number of leaves.
198      * @param mixed
199      *
200      */
DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed)201     public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
202         // Store away our index and pools in members
203         //fStringPool = stringPool;
204         fLeafCount = leafCount;
205 
206 
207         // this is for Schema Mixed Content
208         fMixed = mixed;
209 
210         //
211         //  Ok, so lets grind through the building of the DFA. This method
212         //  handles the high level logic of the algorithm, but it uses a
213         //  number of helper classes to do its thing.
214         //
215         //  In order to avoid having hundreds of references to the error and
216         //  string handlers around, this guy and all of his helper classes
217         //  just throw a simple exception and we then pass it along.
218         //
219         buildDFA(syntaxTree);
220     }
221 
222     //
223     // ContentModelValidator methods
224     //
225 
226     /**
227      * Check that the specified content is valid according to this
228      * content model. This method can also be called to do 'what if'
229      * testing of content models just to see if they would be valid.
230      * <p>
231      * A value of -1 in the children array indicates a PCDATA node. All other
232      * indexes will be positive and represent child elements. The count can be
233      * zero, since some elements have the EMPTY content model and that must be
234      * confirmed.
235      *
236      * @param children The children of this element.  Each integer is an index within
237      *                 the <code>StringPool</code> of the child element name.  An index
238      *                 of -1 is used to indicate an occurrence of non-whitespace character
239      *                 data.
240      * @param offset Offset into the array where the children starts.
241      * @param length The number of entries in the <code>children</code> array.
242      *
243      * @return The value -1 if fully valid, else the 0 based index of the child
244      *         that first failed. If the value returned is equal to the number
245      *         of children, then the specified children are valid but additional
246      *         content is required to reach a valid ending state.
247      *
248      */
validate(QName[] children, int offset, int length)249     public int validate(QName[] children, int offset, int length) {
250 
251         if (DEBUG_VALIDATE_CONTENT)
252             System.out.println("DFAContentModel#validateContent");
253 
254         //
255         // A DFA content model must *always* have at least 1 child
256         // so a failure is given if no children present.
257         //
258         // Defect 782: This is an incorrect statement because a DFA
259         // content model is also used for constructions such as:
260         //
261         //     (Optional*,NotRequired?)
262         //
263         // where a perfectly valid content would be NO CHILDREN.
264         // Therefore, if there are no children, we must check to
265         // see if the CMNODE_EOC marker is a valid start state! -Ac
266         //
267         if (length == 0) {
268             if (DEBUG_VALIDATE_CONTENT) {
269                 System.out.println("!!! no children");
270                 System.out.println("elemMap="+fElemMap);
271                 for (int i = 0; i < fElemMap.length; i++) {
272                     String uri = fElemMap[i].uri;
273                     String localpart = fElemMap[i].localpart;
274 
275                     System.out.println("fElemMap["+i+"]="+uri+","+
276                                        localpart+" ("+
277                                        uri+", "+
278                                        localpart+
279                                        ')');
280 
281                 }
282                 System.out.println("EOCIndex="+fEOCString);
283             }
284 
285             return fEmptyContentIsValid ? -1 : 0;
286 
287         } // if child count == 0
288 
289         //
290         //  Lets loop through the children in the array and move our way
291         //  through the states. Note that we use the fElemMap array to map
292         //  an element index to a state index.
293         //
294         int curState = 0;
295         for (int childIndex = 0; childIndex < length; childIndex++)
296         {
297             // Get the current element index out
298             final QName curElem = children[offset + childIndex];
299             // ignore mixed text
300             if (fMixed && curElem.localpart == null) {
301                 continue;
302             }
303 
304             // Look up this child in our element map
305             int elemIndex = 0;
306             for (; elemIndex < fElemMapSize; elemIndex++)
307             {
308                 int type = fElemMapType[elemIndex] & 0x0f ;
309                 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
310                     //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
311                     if (fElemMap[elemIndex].rawname == curElem.rawname) {
312                         break;
313                     }
314                 }
315                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
316                     String uri = fElemMap[elemIndex].uri;
317                     if (uri == null || uri == curElem.uri) {
318                         break;
319                     }
320                 }
321                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
322                     if (curElem.uri == null) {
323                         break;
324                     }
325                 }
326                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
327                     if (fElemMap[elemIndex].uri != curElem.uri) {
328                         break;
329                     }
330                 }
331             }
332 
333             // If we didn't find it, then obviously not valid
334             if (elemIndex == fElemMapSize) {
335                 if (DEBUG_VALIDATE_CONTENT) {
336                     System.out.println("!!! didn't find it");
337 
338                     System.out.println("curElem : " +curElem );
339                     for (int i=0; i<fElemMapSize; i++) {
340                         System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
341                         System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
342                     }
343                 }
344 
345                 return childIndex;
346             }
347 
348             //
349             //  Look up the next state for this input symbol when in the
350             //  current state.
351             //
352             curState = fTransTable[curState][elemIndex];
353 
354             // If its not a legal transition, then invalid
355             if (curState == -1) {
356                 if (DEBUG_VALIDATE_CONTENT)
357                     System.out.println("!!! not a legal transition");
358                 return childIndex;
359             }
360         }
361 
362         //
363         //  We transitioned all the way through the input list. However, that
364         //  does not mean that we ended in a final state. So check whether
365         //  our ending state is a final state.
366         //
367         if (DEBUG_VALIDATE_CONTENT)
368             System.out.println("curState="+curState+", childCount="+length);
369         if (!fFinalStateFlags[curState])
370             return length;
371 
372         // success!
373         return -1;
374     } // validate
375 
376 
377     //
378     // Private methods
379     //
380 
381     /**
382      * Builds the internal DFA transition table from the given syntax tree.
383      *
384      * @param syntaxTree The syntax tree.
385      *
386      * @exception CMException Thrown if DFA cannot be built.
387      */
buildDFA(CMNode syntaxTree)388     private void buildDFA(CMNode syntaxTree)
389     {
390         //
391         //  The first step we need to take is to rewrite the content model
392         //  using our CMNode objects, and in the process get rid of any
393         //  repetition short cuts, converting them into '*' style repetitions
394         //  or getting rid of repetitions altogether.
395         //
396         //  The conversions done are:
397         //
398         //  x+ -> (x|x*)
399         //  x? -> (x|epsilon)
400         //
401         //  This is a relatively complex scenario. What is happening is that
402         //  we create a top level binary node of which the special EOC value
403         //  is set as the right side node. The the left side is set to the
404         //  rewritten syntax tree. The source is the original content model
405         //  info from the decl pool. The rewrite is done by buildSyntaxTree()
406         //  which recurses the decl pool's content of the element and builds
407         //  a new tree in the process.
408         //
409         //  Note that, during this operation, we set each non-epsilon leaf
410         //  node's DFA state position and count the number of such leafs, which
411         //  is left in the fLeafCount member.
412         //
413         //  The nodeTmp object is passed in just as a temp node to use during
414         //  the recursion. Otherwise, we'd have to create a new node on every
415         //  level of recursion, which would be piggy in Java (as is everything
416         //  for that matter.)
417         //
418 
419         /* MODIFIED (Jan, 2001)
420          *
421          * Use following rules.
422          *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
423          *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
424          *
425          * The same computation of follow as x* is applied to x+
426          *
427          * The modification drastically reduces computation time of
428          * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
429          */
430 
431         fQName.setValues(null, fEOCString, fEOCString, null);
432         CMLeaf nodeEOC = new CMLeaf(fQName);
433         fHeadNode = new CMBinOp
434         (
435             XMLContentSpec.CONTENTSPECNODE_SEQ
436             , syntaxTree
437             , nodeEOC
438         );
439 
440         //
441         //  And handle specially the EOC node, which also must be numbered
442         //  and counted as a non-epsilon leaf node. It could not be handled
443         //  in the above tree build because it was created before all that
444         //  started. We save the EOC position since its used during the DFA
445         //  building loop.
446         //
447         fEOCPos = fLeafCount;
448         nodeEOC.setPosition(fLeafCount++);
449 
450         //
451         //  Ok, so now we have to iterate the new tree and do a little more
452         //  work now that we know the leaf count. One thing we need to do is
453         //  to calculate the first and last position sets of each node. This
454         //  is cached away in each of the nodes.
455         //
456         //  Along the way we also set the leaf count in each node as the
457         //  maximum state count. They must know this in order to create their
458         //  first/last pos sets.
459         //
460         //  We also need to build an array of references to the non-epsilon
461         //  leaf nodes. Since we iterate it in the same way as before, this
462         //  will put them in the array according to their position values.
463         //
464         fLeafList = new CMLeaf[fLeafCount];
465         fLeafListType = new int[fLeafCount];
466         postTreeBuildInit(fHeadNode, 0);
467 
468         //
469         //  And, moving onward... We now need to build the follow position
470         //  sets for all the nodes. So we allocate an array of state sets,
471         //  one for each leaf node (i.e. each DFA position.)
472         //
473         fFollowList = new CMStateSet[fLeafCount];
474         for (int index = 0; index < fLeafCount; index++)
475             fFollowList[index] = new CMStateSet(fLeafCount);
476         calcFollowList(fHeadNode);
477         //
478         //  And finally the big push... Now we build the DFA using all the
479         //  states and the tree we've built up. First we set up the various
480         //  data structures we are going to use while we do this.
481         //
482         //  First of all we need an array of unique element names in our
483         //  content model. For each transition table entry, we need a set of
484         //  contiguous indices to represent the transitions for a particular
485         //  input element. So we need to a zero based range of indexes that
486         //  map to element types. This element map provides that mapping.
487         //
488         fElemMap = new QName[fLeafCount];
489         fElemMapType = new int[fLeafCount];
490         fElemMapSize = 0;
491         for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
492         {
493             fElemMap[outIndex] = new QName();
494 
495             /****
496             if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
497                 if (fLeafNameTypeVector == null) {
498                     fLeafNameTypeVector = new ContentLeafNameTypeVector();
499                 }
500             }
501             /****/
502 
503             // Get the current leaf's element index
504             final QName element = fLeafList[outIndex].getElement();
505 
506             // See if the current leaf node's element index is in the list
507             int inIndex = 0;
508             for (; inIndex < fElemMapSize; inIndex++)
509             {
510                 if (fElemMap[inIndex].rawname == element.rawname) {
511                     break;
512                 }
513             }
514 
515             // If it was not in the list, then add it, if not the EOC node
516             if (inIndex == fElemMapSize) {
517                 fElemMap[fElemMapSize].setValues(element);
518                 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
519                 fElemMapSize++;
520             }
521         }
522         // set up the fLeafNameTypeVector object if there is one.
523         /*****
524         if (fLeafNameTypeVector != null) {
525             fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
526         }
527 
528         /***
529         * Optimization(Jan, 2001); We sort fLeafList according to
530         * elemIndex which is *uniquely* associated to each leaf.
531         * We are *assuming* that each element appears in at least one leaf.
532         **/
533 
534         int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
535         int fSortCount = 0;
536 
537         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
538             for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
539                     final QName leaf = fLeafList[leafIndex].getElement();
540                     final QName element = fElemMap[elemIndex];
541                     if (leaf.rawname == element.rawname) {
542                             fLeafSorter[fSortCount++] = leafIndex;
543                     }
544             }
545             fLeafSorter[fSortCount++] = -1;
546         }
547 
548         /* Optimization(Jan, 2001) */
549 
550         //
551         //  Next lets create some arrays, some that that hold transient
552         //  information during the DFA build and some that are permament.
553         //  These are kind of sticky since we cannot know how big they will
554         //  get, but we don't want to use any Java collections because of
555         //  performance.
556         //
557         //  Basically they will probably be about fLeafCount*2 on average,
558         //  but can be as large as 2^(fLeafCount*2), worst case. So we start
559         //  with fLeafCount*4 as a middle ground. This will be very unlikely
560         //  to ever have to expand, though it if does, the overhead will be
561         //  somewhat ugly.
562         //
563         int curArraySize = fLeafCount * 4;
564         CMStateSet[] statesToDo = new CMStateSet[curArraySize];
565         fFinalStateFlags = new boolean[curArraySize];
566         fTransTable = new int[curArraySize][];
567 
568         //
569         //  Ok we start with the initial set as the first pos set of the
570         //  head node (which is the seq node that holds the content model
571         //  and the EOC node.)
572         //
573         CMStateSet setT = fHeadNode.firstPos();
574 
575         //
576         //  Init our two state flags. Basically the unmarked state counter
577         //  is always chasing the current state counter. When it catches up,
578         //  that means we made a pass through that did not add any new states
579         //  to the lists, at which time we are done. We could have used a
580         //  expanding array of flags which we used to mark off states as we
581         //  complete them, but this is easier though less readable maybe.
582         //
583         int unmarkedState = 0;
584         int curState = 0;
585 
586         //
587         //  Init the first transition table entry, and put the initial state
588         //  into the states to do list, then bump the current state.
589         //
590         fTransTable[curState] = makeDefStateList();
591         statesToDo[curState] = setT;
592         curState++;
593 
594             /* Optimization(Jan, 2001); This is faster for
595              * a large content model such as, "(t001+|t002+|.... |t500+)".
596              */
597 
598         Map<CMStateSet, Integer> stateTable = new HashMap<>();
599 
600             /* Optimization(Jan, 2001) */
601 
602         //
603         //  Ok, almost done with the algorithm... We now enter the
604         //  loop where we go until the states done counter catches up with
605         //  the states to do counter.
606         //
607         while (unmarkedState < curState)
608         {
609             //
610             //  Get the first unmarked state out of the list of states to do.
611             //  And get the associated transition table entry.
612             //
613             setT = statesToDo[unmarkedState];
614             int[] transEntry = fTransTable[unmarkedState];
615 
616             // Mark this one final if it contains the EOC state
617             fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
618 
619             // Bump up the unmarked state count, marking this state done
620             unmarkedState++;
621 
622             // Loop through each possible input symbol in the element map
623             CMStateSet newSet = null;
624             /* Optimization(Jan, 2001) */
625             int sorterIndex = 0;
626             /* Optimization(Jan, 2001) */
627             for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
628             {
629                 //
630                 //  Build up a set of states which is the union of all of
631                 //  the follow sets of DFA positions that are in the current
632                 //  state. If we gave away the new set last time through then
633                 //  create a new one. Otherwise, zero out the existing one.
634                 //
635                 if (newSet == null)
636                     newSet = new CMStateSet(fLeafCount);
637                 else
638                     newSet.zeroBits();
639 
640             /* Optimization(Jan, 2001) */
641                 int leafIndex = fLeafSorter[sorterIndex++];
642 
643                 while (leafIndex != -1) {
644                 // If this leaf index (DFA position) is in the current set...
645                     if (setT.getBit(leafIndex))
646                     {
647                         //
648                         //  If this leaf is the current input symbol, then we
649                         //  want to add its follow list to the set of states to
650                         //  transition to from the current state.
651                         //
652                                 newSet.union(fFollowList[leafIndex]);
653                             }
654 
655                    leafIndex = fLeafSorter[sorterIndex++];
656         }
657             /* Optimization(Jan, 2001) */
658 
659                 //
660                 //  If this new set is not empty, then see if its in the list
661                 //  of states to do. If not, then add it.
662                 //
663                 if (!newSet.isEmpty())
664                 {
665                     //
666                     //  Search the 'states to do' list to see if this new
667                     //  state set is already in there.
668                     //
669 
670             /* Optimization(Jan, 2001) */
671             Integer stateObj = stateTable.get(newSet);
672             int stateIndex = (stateObj == null ? curState : stateObj.intValue());
673             /* Optimization(Jan, 2001) */
674 
675                     // If we did not find it, then add it
676                     if (stateIndex == curState)
677                     {
678                         //
679                         //  Put this new state into the states to do and init
680                         //  a new entry at the same index in the transition
681                         //  table.
682                         //
683                         statesToDo[curState] = newSet;
684                         fTransTable[curState] = makeDefStateList();
685 
686             /* Optimization(Jan, 2001) */
687                         stateTable.put(newSet, curState);
688             /* Optimization(Jan, 2001) */
689 
690                         // We now have a new state to do so bump the count
691                         curState++;
692 
693                         //
694                         //  Null out the new set to indicate we adopted it.
695                         //  This will cause the creation of a new set on the
696                         //  next time around the loop.
697                         //
698                         newSet = null;
699                     }
700 
701                     //
702                     //  Now set this state in the transition table's entry
703                     //  for this element (using its index), with the DFA
704                     //  state we will move to from the current state when we
705                     //  see this input element.
706                     //
707                     transEntry[elemIndex] = stateIndex;
708 
709                     // Expand the arrays if we're full
710                     if (curState == curArraySize)
711                     {
712                         //
713                         //  Yikes, we overflowed the initial array size, so
714                         //  we've got to expand all of these arrays. So adjust
715                         //  up the size by 50% and allocate new arrays.
716                         //
717                         final int newSize = (int)(curArraySize * 1.5);
718                         CMStateSet[] newToDo = new CMStateSet[newSize];
719                         boolean[] newFinalFlags = new boolean[newSize];
720                         int[][] newTransTable = new int[newSize][];
721 
722                         // Copy over all of the existing content
723                         System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
724                         System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
725                         System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
726 
727                         // Store the new array size
728                         curArraySize = newSize;
729                         statesToDo = newToDo;
730                         fFinalStateFlags = newFinalFlags;
731                         fTransTable = newTransTable;
732                     }
733                 }
734             }
735         }
736 
737         // Check to see if we can set the fEmptyContentIsValid flag.
738         fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
739 
740         //
741         //  And now we can say bye bye to the temp representation since we've
742         //  built the DFA.
743         //
744         if (DEBUG_VALIDATE_CONTENT)
745             dumpTree(fHeadNode, 0);
746         fHeadNode = null;
747         fLeafList = null;
748         fFollowList = null;
749 
750     }
751 
752     /**
753      * Calculates the follow list of the current node.
754      *
755      * @param nodeCur The curent node.
756      *
757      * @exception CMException Thrown if follow list cannot be calculated.
758      */
calcFollowList(CMNode nodeCur)759     private void calcFollowList(CMNode nodeCur)
760     {
761         // Recurse as required
762         if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
763         {
764             // Recurse only
765             calcFollowList(((CMBinOp)nodeCur).getLeft());
766             calcFollowList(((CMBinOp)nodeCur).getRight());
767         }
768          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
769         {
770             // Recurse first
771             calcFollowList(((CMBinOp)nodeCur).getLeft());
772             calcFollowList(((CMBinOp)nodeCur).getRight());
773 
774             //
775             //  Now handle our level. We use our left child's last pos
776             //  set and our right child's first pos set, so go ahead and
777             //  get them ahead of time.
778             //
779             final CMStateSet last  = ((CMBinOp)nodeCur).getLeft().lastPos();
780             final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
781 
782             //
783             //  Now, for every position which is in our left child's last set
784             //  add all of the states in our right child's first set to the
785             //  follow set for that position.
786             //
787             for (int index = 0; index < fLeafCount; index++)
788             {
789                 if (last.getBit(index))
790                     fFollowList[index].union(first);
791             }
792         }
793          /***
794          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
795         {
796             // Recurse first
797             calcFollowList(((CMUniOp)nodeCur).getChild());
798 
799             //
800             //  Now handle our level. We use our own first and last position
801             //  sets, so get them up front.
802             //
803             final CMStateSet first = nodeCur.firstPos();
804             final CMStateSet last  = nodeCur.lastPos();
805 
806             //
807             //  For every position which is in our last position set, add all
808             //  of our first position states to the follow set for that
809             //  position.
810             //
811             for (int index = 0; index < fLeafCount; index++)
812             {
813                 if (last.getBit(index))
814                     fFollowList[index].union(first);
815             }
816         }
817          else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
818               ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
819         {
820             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
821         }
822         /***/
823          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
824             || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
825         {
826             // Recurse first
827             calcFollowList(((CMUniOp)nodeCur).getChild());
828 
829             //
830             //  Now handle our level. We use our own first and last position
831             //  sets, so get them up front.
832             //
833             final CMStateSet first = nodeCur.firstPos();
834             final CMStateSet last  = nodeCur.lastPos();
835 
836             //
837             //  For every position which is in our last position set, add all
838             //  of our first position states to the follow set for that
839             //  position.
840             //
841             for (int index = 0; index < fLeafCount; index++)
842             {
843                 if (last.getBit(index))
844                     fFollowList[index].union(first);
845             }
846         }
847 
848         else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
849             // Recurse only
850             calcFollowList(((CMUniOp)nodeCur).getChild());
851         }
852          /***/
853     }
854 
855     /**
856      * Dumps the tree of the current node to standard output.
857      *
858      * @param nodeCur The current node.
859      * @param level   The maximum levels to output.
860      *
861      * @exception CMException Thrown on error.
862      */
dumpTree(CMNode nodeCur, int level)863     private void dumpTree(CMNode nodeCur, int level)
864     {
865         for (int index = 0; index < level; index++)
866             System.out.print("   ");
867 
868         int type = nodeCur.type();
869         if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
870         ||  (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
871         {
872             if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
873                 System.out.print("Choice Node ");
874             else
875                 System.out.print("Seq Node ");
876 
877             if (nodeCur.isNullable())
878                 System.out.print("Nullable ");
879 
880             System.out.print("firstPos=");
881             System.out.print(nodeCur.firstPos().toString());
882             System.out.print(" lastPos=");
883             System.out.println(nodeCur.lastPos().toString());
884 
885             dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
886             dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
887         }
888          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
889         {
890             System.out.print("Rep Node ");
891 
892             if (nodeCur.isNullable())
893                 System.out.print("Nullable ");
894 
895             System.out.print("firstPos=");
896             System.out.print(nodeCur.firstPos().toString());
897             System.out.print(" lastPos=");
898             System.out.println(nodeCur.lastPos().toString());
899 
900             dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
901         }
902          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
903         {
904             System.out.print
905             (
906                 "Leaf: (pos="
907                 + ((CMLeaf)nodeCur).getPosition()
908                 + "), "
909                 + ((CMLeaf)nodeCur).getElement()
910                 + "(elemIndex="
911                 + ((CMLeaf)nodeCur).getElement()
912                 + ") "
913             );
914 
915             if (nodeCur.isNullable())
916                 System.out.print(" Nullable ");
917 
918             System.out.print("firstPos=");
919             System.out.print(nodeCur.firstPos().toString());
920             System.out.print(" lastPos=");
921             System.out.println(nodeCur.lastPos().toString());
922         }
923          else
924         {
925             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
926         }
927     }
928 
929 
930     /**
931      * -1 is used to represent bad transitions in the transition table
932      * entry for each state. So each entry is initialized to an all -1
933      * array. This method creates a new entry and initializes it.
934      */
makeDefStateList()935     private int[] makeDefStateList()
936     {
937         int[] retArray = new int[fElemMapSize];
938         for (int index = 0; index < fElemMapSize; index++)
939             retArray[index] = -1;
940         return retArray;
941     }
942 
943     /** Post tree build initialization. */
postTreeBuildInit(CMNode nodeCur, int curIndex)944     private int postTreeBuildInit(CMNode nodeCur, int curIndex)
945     {
946         // Set the maximum states on this node
947         nodeCur.setMaxStates(fLeafCount);
948 
949         // Recurse as required
950         if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
951             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
952             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
953             // REVISIT: Don't waste these structures.
954             QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI());
955             fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
956             fLeafListType[curIndex] = nodeCur.type();
957             curIndex++;
958         }
959         else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
960         ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
961         {
962             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
963             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
964         }
965          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
966              || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
967              || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)
968         {
969             curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
970         }
971          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
972         {
973             //
974             //  Put this node in the leaf list at the current index if its
975             //  a non-epsilon leaf.
976             //
977              final QName node = ((CMLeaf)nodeCur).getElement();
978             if (node.localpart != fEpsilonString) {
979                 fLeafList[curIndex] = (CMLeaf)nodeCur;
980                 fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
981                 curIndex++;
982             }
983         }
984          else
985         {
986             throw new RuntimeException("ImplementationMessages.VAL_NIICM: type="+nodeCur.type());
987         }
988         return curIndex;
989     }
990 
991 } // class DFAContentModel
992