1 /*
2  * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.xs.models;
22 
23 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet;
25 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
26 import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler;
27 import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException;
28 import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints;
29 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
30 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
31 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
32 import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl;
33 import com.sun.org.apache.xerces.internal.xni.QName;
34 import java.util.ArrayList;
35 import java.util.HashMap;
36 import java.util.List;
37 import java.util.Map;
38 
39 /**
40  * DFAContentModel is the implementation of XSCMValidator that does
41  * all of the non-trivial element content validation. This class does
42  * the conversion from the regular expression to the DFA that
43  * it then uses in its validation algorithm.
44  *
45  * @xerces.internal
46  *
47  * @author Neil Graham, IBM
48  * @LastModified: Oct 2017
49  */
50 public class XSDFACM
51     implements XSCMValidator {
52 
53     //
54     // Constants
55     //
56     private static final boolean DEBUG = false;
57 
58     // special strings
59 
60     // debugging
61 
62     /** Set to true to debug content model validation. */
63     private static final boolean DEBUG_VALIDATE_CONTENT = false;
64 
65     //
66     // Data
67     //
68 
69     /**
70      * This is the map of unique input symbol elements to indices into
71      * each state's per-input symbol transition table entry. This is part
72      * of the built DFA information that must be kept around to do the
73      * actual validation.  Note tat since either XSElementDecl or XSParticleDecl object
74      * can live here, we've got to use an Object.
75      */
76     private Object fElemMap[] = null;
77 
78     /**
79      * This is a map of whether the element map contains information
80      * related to ANY models.
81      */
82     private int fElemMapType[] = null;
83 
84     /**
85      * id of the unique input symbol
86      */
87     private int fElemMapId[] = null;
88 
89     /** The element map size. */
90     private int fElemMapSize = 0;
91 
92     /**
93      * This is an array of booleans, one per state (there are
94      * fTransTableSize states in the DFA) that indicates whether that
95      * state is a final state.
96      */
97     private boolean fFinalStateFlags[] = null;
98 
99     /**
100      * The list of follow positions for each NFA position (i.e. for each
101      * non-epsilon leaf node.) This is only used during the building of
102      * the DFA, and is let go afterwards.
103      */
104     private CMStateSet fFollowList[] = null;
105 
106     /**
107      * This is the head node of our intermediate representation. It is
108      * only non-null during the building of the DFA (just so that it
109      * does not have to be passed all around.) Once the DFA is built,
110      * this is no longer required so its nulled out.
111      */
112     private CMNode fHeadNode = null;
113 
114     /**
115      * The count of leaf nodes. This is an important number that set some
116      * limits on the sizes of data structures in the DFA process.
117      */
118     private int fLeafCount = 0;
119 
120     /**
121      * An array of non-epsilon leaf nodes, which is used during the DFA
122      * build operation, then dropped.
123      */
124     private XSCMLeaf fLeafList[] = null;
125 
126     /** Array mapping ANY types to the leaf list. */
127     private int fLeafListType[] = null;
128 
129     /**
130      * This is the transition table that is the main by product of all
131      * of the effort here. It is an array of arrays of ints. The first
132      * dimension is the number of states we end up with in the DFA. The
133      * second dimensions is the number of unique elements in the content
134      * model (fElemMapSize). Each entry in the second dimension indicates
135      * the new state given that input for the first dimension's start
136      * state.
137      * <p>
138      * The fElemMap array handles mapping from element indexes to
139      * positions in the second dimension of the transition table.
140      */
141     private int fTransTable[][] = null;
142 
143     /**
144      * Array containing occurence information for looping states
145      * which use counters to check minOccurs/maxOccurs.
146      */
147     private Occurence [] fCountingStates = null;
148     static final class Occurence {
149         final int minOccurs;
150         final int maxOccurs;
151         final int elemIndex;
Occurence(XSCMRepeatingLeaf leaf, int elemIndex)152         public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) {
153             minOccurs = leaf.getMinOccurs();
154             maxOccurs = leaf.getMaxOccurs();
155             this.elemIndex = elemIndex;
156         }
toString()157         public String toString() {
158             return "minOccurs=" + minOccurs
159                 + ";maxOccurs=" +
160                 ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED)
161                         ? Integer.toString(maxOccurs) : "unbounded");
162         }
163     }
164 
165     /**
166      * The number of valid entries in the transition table, and in the other
167      * related tables such as fFinalStateFlags.
168      */
169     private int fTransTableSize = 0;
170 
171     private boolean fIsCompactedForUPA;
172 
173     /**
174      * Array of counters for all the for elements (or wildcards)
175      * of the form a{n,m} where n > 1 and m <= unbounded. Used
176      * to count the a's to later check against n and m. Counter
177      * set to -1 if element (or wildcard) not optimized by
178      * constant space algorithm.
179      */
180     private int fElemMapCounter[];
181 
182     /**
183      * Array of lower bounds for all the for elements (or wildcards)
184      * of the form a{n,m} where n > 1 and m <= unbounded. This array
185      * stores the n's for those elements (or wildcards) for which
186      * the constant space algorithm applies (or -1 otherwise).
187      */
188     private int fElemMapCounterLowerBound[];
189 
190     /**
191      * Array of upper bounds for all the for elements (or wildcards)
192      * of the form a{n,m} where n > 1 and m <= unbounded. This array
193      * stores the n's for those elements (or wildcards) for which
194      * the constant space algorithm applies, or -1 if algorithm does
195      * not apply or m = unbounded.
196      */
197     private int fElemMapCounterUpperBound[];   // -1 if no upper bound
198 
199     // temp variables
200 
201     //
202     // Constructors
203     //
204 
205     /**
206      * Constructs a DFA content model.
207      *
208      * @param syntaxTree    The syntax tree of the content model.
209      * @param leafCount     The number of leaves.
210      *
211      * @exception RuntimeException Thrown if DFA can't be built.
212      */
213 
XSDFACM(CMNode syntaxTree, int leafCount)214    public XSDFACM(CMNode syntaxTree, int leafCount) {
215 
216         // Store away our index and pools in members
217         fLeafCount = leafCount;
218         fIsCompactedForUPA = syntaxTree.isCompactedForUPA();
219 
220         //
221         //  Create some string pool indexes that represent the names of some
222         //  magical nodes in the syntax tree.
223         //  (already done in static initialization...
224         //
225 
226         //
227         //  Ok, so lets grind through the building of the DFA. This method
228         //  handles the high level logic of the algorithm, but it uses a
229         //  number of helper classes to do its thing.
230         //
231         //  In order to avoid having hundreds of references to the error and
232         //  string handlers around, this guy and all of his helper classes
233         //  just throw a simple exception and we then pass it along.
234         //
235 
236         if(DEBUG_VALIDATE_CONTENT) {
237             XSDFACM.time -= System.currentTimeMillis();
238         }
239 
240         buildDFA(syntaxTree);
241 
242         if(DEBUG_VALIDATE_CONTENT) {
243             XSDFACM.time += System.currentTimeMillis();
244             System.out.println("DFA build: " + XSDFACM.time + "ms");
245         }
246     }
247 
248     private static long time = 0;
249 
250     //
251     // XSCMValidator methods
252     //
253 
254     /**
255      * check whether the given state is one of the final states
256      *
257      * @param state       the state to check
258      *
259      * @return whether it's a final state
260      */
isFinalState(int state)261     public boolean isFinalState (int state) {
262         return (state < 0)? false :
263             fFinalStateFlags[state];
264     }
265 
266     /**
267      * one transition only
268      *
269      * @param curElem The current element's QName
270      * @param state stack to store the previous state
271      * @param subGroupHandler the substitution group handler
272      *
273      * @return  null if transition is invalid; otherwise the Object corresponding to the
274      *      XSElementDecl or XSWildcardDecl identified.  Also, the
275      *      state array will be modified to include the new state; this so that the validator can
276      *      store it away.
277      *
278      * @exception RuntimeException thrown on error
279      */
oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler)280     public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) {
281         int curState = state[0];
282 
283         if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) {
284             // there was an error last time; so just go find correct Object in fElemmMap.
285             // ... after resetting state[0].
286             if(curState == XSCMValidator.FIRST_ERROR)
287                 state[0] = XSCMValidator.SUBSEQUENT_ERROR;
288 
289             return findMatchingDecl(curElem, subGroupHandler);
290         }
291 
292         int nextState = 0;
293         int elemIndex = 0;
294         Object matchingDecl = null;
295 
296         for (; elemIndex < fElemMapSize; elemIndex++) {
297             nextState = fTransTable[curState][elemIndex];
298             if (nextState == -1)
299                 continue;
300             int type = fElemMapType[elemIndex] ;
301             if (type == XSParticleDecl.PARTICLE_ELEMENT) {
302                 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
303                 if (matchingDecl != null) {
304                     // Increment counter if constant space algorithm applies
305                     if (fElemMapCounter[elemIndex] >= 0) {
306                         fElemMapCounter[elemIndex]++;
307                     }
308                     break;
309                 }
310             }
311             else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
312                 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
313                     matchingDecl = fElemMap[elemIndex];
314                     // Increment counter if constant space algorithm applies
315                     if (fElemMapCounter[elemIndex] >= 0) {
316                         fElemMapCounter[elemIndex]++;
317                     }
318                     break;
319                 }
320             }
321         }
322 
323         // if we still can't find a match, set the state to first_error
324         // and return null
325         if (elemIndex == fElemMapSize) {
326             state[1] = state[0];
327             state[0] = XSCMValidator.FIRST_ERROR;
328             return findMatchingDecl(curElem, subGroupHandler);
329         }
330 
331         if (fCountingStates != null) {
332             Occurence o = fCountingStates[curState];
333             if (o != null) {
334                 if (curState == nextState) {
335                     if (++state[2] > o.maxOccurs &&
336                         o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
337                         // It's likely that we looped too many times on the current state
338                         // however it's possible that we actually matched another particle
339                         // which allows the same name.
340                         //
341                         // Consider:
342                         //
343                         // <xs:sequence>
344                         //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
345                         //  <xs:element name="foo" type="xs:string" fixed="bar"/>
346                         // </xs:sequence>
347                         //
348                         // and
349                         //
350                         // <xs:sequence>
351                         //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
352                         //  <xs:any namespace="##any" processContents="skip"/>
353                         // </xs:sequence>
354                         //
355                         // In the DFA there will be two transitions from the current state which
356                         // allow "foo". Note that this is not a UPA violation. The ambiguity of which
357                         // transition to take is resolved by the current value of the counter. Since
358                         // we've already seen enough instances of the first "foo" perhaps there is
359                         // another element declaration or wildcard deeper in the element map which
360                         // matches.
361                         return findMatchingDecl(curElem, state, subGroupHandler, elemIndex);
362                     }
363                 }
364                 else if (state[2] < o.minOccurs) {
365                     // not enough loops on the current state.
366                     state[1] = state[0];
367                     state[0] = XSCMValidator.FIRST_ERROR;
368                     return findMatchingDecl(curElem, subGroupHandler);
369                 }
370                 else {
371                     // Exiting a counting state. If we're entering a new
372                     // counting state, reset the counter.
373                     o = fCountingStates[nextState];
374                     if (o != null) {
375                         state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
376                     }
377                 }
378             }
379             else {
380                 o = fCountingStates[nextState];
381                 if (o != null) {
382                     // Entering a new counting state. Reset the counter.
383                     // If we've already seen one instance of the looping
384                     // particle set the counter to 1, otherwise set it
385                     // to 0.
386                     state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
387                 }
388             }
389         }
390 
391         state[0] = nextState;
392         return matchingDecl;
393     } // oneTransition(QName, int[], SubstitutionGroupHandler):  Object
394 
findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler)395     Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
396         Object matchingDecl = null;
397 
398         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
399             int type = fElemMapType[elemIndex] ;
400             if (type == XSParticleDecl.PARTICLE_ELEMENT) {
401                 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
402                 if (matchingDecl != null) {
403                     return matchingDecl;
404                 }
405             }
406             else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
407                 if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri))
408                     return fElemMap[elemIndex];
409             }
410         }
411 
412         return null;
413     } // findMatchingDecl(QName, SubstitutionGroupHandler): Object
414 
findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex)415     Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) {
416 
417         int curState = state[0];
418         int nextState = 0;
419         Object matchingDecl = null;
420 
421         while (++elemIndex < fElemMapSize) {
422             nextState = fTransTable[curState][elemIndex];
423             if (nextState == -1)
424                 continue;
425             int type = fElemMapType[elemIndex] ;
426             if (type == XSParticleDecl.PARTICLE_ELEMENT) {
427                 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
428                 if (matchingDecl != null) {
429                     break;
430                 }
431             }
432             else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
433                 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
434                     matchingDecl = fElemMap[elemIndex];
435                     break;
436                 }
437             }
438         }
439 
440         // if we still can't find a match, set the state to FIRST_ERROR and return null
441         if (elemIndex == fElemMapSize) {
442             state[1] = state[0];
443             state[0] = XSCMValidator.FIRST_ERROR;
444             return findMatchingDecl(curElem, subGroupHandler);
445         }
446 
447         // if we found a match, set the next state and reset the
448         // counter if the next state is a counting state.
449         state[0] = nextState;
450         final Occurence o = fCountingStates[nextState];
451         if (o != null) {
452             state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
453         }
454         return matchingDecl;
455     } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object
456 
457     // This method returns the start states of the content model.
startContentModel()458     public int[] startContentModel() {
459         // Clear all constant space algorithm counters in use
460         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
461             if (fElemMapCounter[elemIndex] != -1) {
462                 fElemMapCounter[elemIndex] = 0;
463             }
464         }
465         // [0] : the current state
466         // [1] : if [0] is an error state then the
467         //       last valid state before the error
468         // [2] : occurence counter for counting states
469         return new int [3];
470     } // startContentModel():int[]
471 
472     // this method returns whether the last state was a valid final state
endContentModel(int[] state)473     public boolean endContentModel(int[] state) {
474         final int curState = state[0];
475         if (fFinalStateFlags[curState]) {
476             if (fCountingStates != null) {
477                 Occurence o = fCountingStates[curState];
478                 if (o != null && state[2] < o.minOccurs) {
479                     // not enough loops on the current state to be considered final.
480                     return false;
481                 }
482             }
483             return true;
484         }
485         return false;
486     } // endContentModel(int[]):  boolean
487 
488     // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
489     // but we can put it back later.
490 
491     //
492     // Private methods
493     //
494 
495     /**
496      * Builds the internal DFA transition table from the given syntax tree.
497      *
498      * @param syntaxTree The syntax tree.
499      *
500      * @exception RuntimeException Thrown if DFA cannot be built.
501      */
buildDFA(CMNode syntaxTree)502     private void buildDFA(CMNode syntaxTree) {
503         //
504         //  The first step we need to take is to rewrite the content model
505         //  using our CMNode objects, and in the process get rid of any
506         //  repetition short cuts, converting them into '*' style repetitions
507         //  or getting rid of repetitions altogether.
508         //
509         //  The conversions done are:
510         //
511         //  x+ -> (x|x*)
512         //  x? -> (x|epsilon)
513         //
514         //  This is a relatively complex scenario. What is happening is that
515         //  we create a top level binary node of which the special EOC value
516         //  is set as the right side node. The the left side is set to the
517         //  rewritten syntax tree. The source is the original content model
518         //  info from the decl pool. The rewrite is done by buildSyntaxTree()
519         //  which recurses the decl pool's content of the element and builds
520         //  a new tree in the process.
521         //
522         //  Note that, during this operation, we set each non-epsilon leaf
523         //  node's DFA state position and count the number of such leafs, which
524         //  is left in the fLeafCount member.
525         //
526         //  The nodeTmp object is passed in just as a temp node to use during
527         //  the recursion. Otherwise, we'd have to create a new node on every
528         //  level of recursion, which would be piggy in Java (as is everything
529         //  for that matter.)
530         //
531 
532         /* MODIFIED (Jan, 2001)
533          *
534          * Use following rules.
535          *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
536          *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
537          *
538          * The same computation of follow as x* is applied to x+
539          *
540          * The modification drastically reduces computation time of
541          * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
542          */
543 
544         //
545         //  And handle specially the EOC node, which also must be numbered
546         //  and counted as a non-epsilon leaf node. It could not be handled
547         //  in the above tree build because it was created before all that
548         //  started. We save the EOC position since its used during the DFA
549         //  building loop.
550         //
551         int EOCPos = fLeafCount;
552         XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
553         fHeadNode = new XSCMBinOp(
554             XSModelGroupImpl.MODELGROUP_SEQUENCE,
555             syntaxTree,
556             nodeEOC
557         );
558 
559         //
560         //  Ok, so now we have to iterate the new tree and do a little more
561         //  work now that we know the leaf count. One thing we need to do is
562         //  to calculate the first and last position sets of each node. This
563         //  is cached away in each of the nodes.
564         //
565         //  Along the way we also set the leaf count in each node as the
566         //  maximum state count. They must know this in order to create their
567         //  first/last pos sets.
568         //
569         //  We also need to build an array of references to the non-epsilon
570         //  leaf nodes. Since we iterate it in the same way as before, this
571         //  will put them in the array according to their position values.
572         //
573         fLeafList = new XSCMLeaf[fLeafCount];
574         fLeafListType = new int[fLeafCount];
575         postTreeBuildInit(fHeadNode);
576 
577         //
578         //  And, moving onward... We now need to build the follow position
579         //  sets for all the nodes. So we allocate an array of state sets,
580         //  one for each leaf node (i.e. each DFA position.)
581         //
582         fFollowList = new CMStateSet[fLeafCount];
583         for (int index = 0; index < fLeafCount; index++)
584             fFollowList[index] = new CMStateSet(fLeafCount);
585         calcFollowList(fHeadNode);
586         //
587         //  And finally the big push... Now we build the DFA using all the
588         //  states and the tree we've built up. First we set up the various
589         //  data structures we are going to use while we do this.
590         //
591         //  First of all we need an array of unique element names in our
592         //  content model. For each transition table entry, we need a set of
593         //  contiguous indices to represent the transitions for a particular
594         //  input element. So we need to a zero based range of indexes that
595         //  map to element types. This element map provides that mapping.
596         //
597         fElemMap = new Object[fLeafCount];
598         fElemMapType = new int[fLeafCount];
599         fElemMapId = new int[fLeafCount];
600 
601         fElemMapCounter = new int[fLeafCount];
602         fElemMapCounterLowerBound = new int[fLeafCount];
603         fElemMapCounterUpperBound = new int[fLeafCount];
604 
605         fElemMapSize = 0;
606         Occurence [] elemOccurenceMap = null;
607 
608         for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
609             // optimization from Henry Zongaro:
610             //fElemMap[outIndex] = new Object ();
611             fElemMap[outIndex] = null;
612 
613             int inIndex = 0;
614             final int id = fLeafList[outIndex].getParticleId();
615             for (; inIndex < fElemMapSize; inIndex++) {
616                 if (id == fElemMapId[inIndex])
617                     break;
618             }
619 
620             // If it was not in the list, then add it, if not the EOC node
621             if (inIndex == fElemMapSize) {
622                 XSCMLeaf leaf = fLeafList[outIndex];
623                 fElemMap[fElemMapSize] = leaf.getLeaf();
624                 if (leaf instanceof XSCMRepeatingLeaf) {
625                     if (elemOccurenceMap == null) {
626                         elemOccurenceMap = new Occurence[fLeafCount];
627                     }
628                     elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize);
629                 }
630 
631                 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
632                 fElemMapId[fElemMapSize] = id;
633 
634                 // Init counters and bounds for a{n,m} algorithm
635                 int[] bounds = (int[]) leaf.getUserData();
636                 if (bounds != null) {
637                     fElemMapCounter[fElemMapSize] = 0;
638                     fElemMapCounterLowerBound[fElemMapSize] = bounds[0];
639                     fElemMapCounterUpperBound[fElemMapSize] = bounds[1];
640                 } else {
641                     fElemMapCounter[fElemMapSize] = -1;
642                     fElemMapCounterLowerBound[fElemMapSize] = -1;
643                     fElemMapCounterUpperBound[fElemMapSize] = -1;
644                 }
645 
646                 fElemMapSize++;
647             }
648         }
649 
650         // the last entry in the element map must be the EOC element.
651         // remove it from the map.
652         if (DEBUG) {
653             if (fElemMapId[fElemMapSize-1] != -1)
654                 System.err.println("interal error in DFA: last element is not EOC.");
655         }
656         fElemMapSize--;
657 
658         /***
659          * Optimization(Jan, 2001); We sort fLeafList according to
660          * elemIndex which is *uniquely* associated to each leaf.
661          * We are *assuming* that each element appears in at least one leaf.
662          **/
663 
664         int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
665         int fSortCount = 0;
666 
667         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
668             final int id = fElemMapId[elemIndex];
669             for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
670                 if (id == fLeafList[leafIndex].getParticleId())
671                     fLeafSorter[fSortCount++] = leafIndex;
672             }
673             fLeafSorter[fSortCount++] = -1;
674         }
675 
676         /* Optimization(Jan, 2001) */
677 
678         //
679         //  Next lets create some arrays, some that hold transient
680         //  information during the DFA build and some that are permament.
681         //  These are kind of sticky since we cannot know how big they will
682         //  get, but we don't want to use any Java collections because of
683         //  performance.
684         //
685         //  Basically they will probably be about fLeafCount*2 on average,
686         //  but can be as large as 2^(fLeafCount*2), worst case. So we start
687         //  with fLeafCount*4 as a middle ground. This will be very unlikely
688         //  to ever have to expand, though it if does, the overhead will be
689         //  somewhat ugly.
690         //
691         int curArraySize = fLeafCount * 4;
692         CMStateSet[] statesToDo = new CMStateSet[curArraySize];
693         fFinalStateFlags = new boolean[curArraySize];
694         fTransTable = new int[curArraySize][];
695 
696         //
697         //  Ok we start with the initial set as the first pos set of the
698         //  head node (which is the seq node that holds the content model
699         //  and the EOC node.)
700         //
701         CMStateSet setT = fHeadNode.firstPos();
702 
703         //
704         //  Init our two state flags. Basically the unmarked state counter
705         //  is always chasing the current state counter. When it catches up,
706         //  that means we made a pass through that did not add any new states
707         //  to the lists, at which time we are done. We could have used a
708         //  expanding array of flags which we used to mark off states as we
709         //  complete them, but this is easier though less readable maybe.
710         //
711         int unmarkedState = 0;
712         int curState = 0;
713 
714         //
715         //  Init the first transition table entry, and put the initial state
716         //  into the states to do list, then bump the current state.
717         //
718         fTransTable[curState] = makeDefStateList();
719         statesToDo[curState] = setT;
720         curState++;
721 
722         /* Optimization(Jan, 2001); This is faster for
723          * a large content model such as, "(t001+|t002+|.... |t500+)".
724          */
725 
726         Map<CMStateSet, Integer> stateTable = new HashMap<>();
727 
728         /* Optimization(Jan, 2001) */
729 
730         //
731         //  Ok, almost done with the algorithm... We now enter the
732         //  loop where we go until the states done counter catches up with
733         //  the states to do counter.
734         //
735         while (unmarkedState < curState) {
736             //
737             //  Get the first unmarked state out of the list of states to do.
738             //  And get the associated transition table entry.
739             //
740             setT = statesToDo[unmarkedState];
741             int[] transEntry = fTransTable[unmarkedState];
742 
743             // Mark this one final if it contains the EOC state
744             fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
745 
746             // Bump up the unmarked state count, marking this state done
747             unmarkedState++;
748 
749             // Loop through each possible input symbol in the element map
750             CMStateSet newSet = null;
751             /* Optimization(Jan, 2001) */
752             int sorterIndex = 0;
753             /* Optimization(Jan, 2001) */
754             for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
755                 //
756                 //  Build up a set of states which is the union of all of
757                 //  the follow sets of DFA positions that are in the current
758                 //  state. If we gave away the new set last time through then
759                 //  create a new one. Otherwise, zero out the existing one.
760                 //
761                 if (newSet == null)
762                     newSet = new CMStateSet(fLeafCount);
763                 else
764                     newSet.zeroBits();
765 
766                 /* Optimization(Jan, 2001) */
767                 int leafIndex = fLeafSorter[sorterIndex++];
768 
769                 while (leafIndex != -1) {
770                     // If this leaf index (DFA position) is in the current set...
771                     if (setT.getBit(leafIndex)) {
772                         //
773                         //  If this leaf is the current input symbol, then we
774                         //  want to add its follow list to the set of states to
775                         //  transition to from the current state.
776                         //
777                         newSet.union(fFollowList[leafIndex]);
778                     }
779 
780                    leafIndex = fLeafSorter[sorterIndex++];
781                 }
782                 /* Optimization(Jan, 2001) */
783 
784                 //
785                 //  If this new set is not empty, then see if its in the list
786                 //  of states to do. If not, then add it.
787                 //
788                 if (!newSet.isEmpty()) {
789                     //
790                     //  Search the 'states to do' list to see if this new
791                     //  state set is already in there.
792                     //
793 
794                     /* Optimization(Jan, 2001) */
795                     Integer stateObj = stateTable.get(newSet);
796                     int stateIndex = (stateObj == null ? curState : stateObj);
797                     /* Optimization(Jan, 2001) */
798 
799                     // If we did not find it, then add it
800                     if (stateIndex == curState) {
801                         //
802                         //  Put this new state into the states to do and init
803                         //  a new entry at the same index in the transition
804                         //  table.
805                         //
806                         statesToDo[curState] = newSet;
807                         fTransTable[curState] = makeDefStateList();
808 
809                         /* Optimization(Jan, 2001) */
810                         stateTable.put(newSet, curState);
811                         /* Optimization(Jan, 2001) */
812 
813                         // We now have a new state to do so bump the count
814                         curState++;
815 
816                         //
817                         //  Null out the new set to indicate we adopted it.
818                         //  This will cause the creation of a new set on the
819                         //  next time around the loop.
820                         //
821                         newSet = null;
822                     }
823 
824                     //
825                     //  Now set this state in the transition table's entry
826                     //  for this element (using its index), with the DFA
827                     //  state we will move to from the current state when we
828                     //  see this input element.
829                     //
830                     transEntry[elemIndex] = stateIndex;
831 
832                     // Expand the arrays if we're full
833                     if (curState == curArraySize) {
834                         //
835                         //  Yikes, we overflowed the initial array size, so
836                         //  we've got to expand all of these arrays. So adjust
837                         //  up the size by 50% and allocate new arrays.
838                         //
839                         final int newSize = (int)(curArraySize * 1.5);
840                         CMStateSet[] newToDo = new CMStateSet[newSize];
841                         boolean[] newFinalFlags = new boolean[newSize];
842                         int[][] newTransTable = new int[newSize][];
843 
844                         // Copy over all of the existing content
845                         System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
846                         System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
847                         System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
848 
849                         // Store the new array size
850                         curArraySize = newSize;
851                         statesToDo = newToDo;
852                         fFinalStateFlags = newFinalFlags;
853                         fTransTable = newTransTable;
854                     }
855                 }
856             }
857         }
858 
859         //
860         // Fill in the occurence information for each looping state
861         // if we're using counters.
862         //
863         if (elemOccurenceMap != null) {
864             fCountingStates = new Occurence[curState];
865             for (int i = 0; i < curState; ++i) {
866                 int [] transitions = fTransTable[i];
867                 for (int j = 0; j < transitions.length; ++j) {
868                     if (i == transitions[j]) {
869                         fCountingStates[i] = elemOccurenceMap[j];
870                         break;
871                     }
872                 }
873             }
874         }
875 
876         //
877         //  And now we can say bye bye to the temp representation since we've
878         //  built the DFA.
879         //
880         if (DEBUG_VALIDATE_CONTENT)
881             dumpTree(fHeadNode, 0);
882         fHeadNode = null;
883         fLeafList = null;
884         fFollowList = null;
885         fLeafListType = null;
886         fElemMapId = null;
887     }
888 
889     /**
890      * Calculates the follow list of the current node.
891      *
892      * @param nodeCur The curent node.
893      *
894      * @exception RuntimeException Thrown if follow list cannot be calculated.
895      */
calcFollowList(CMNode nodeCur)896     private void calcFollowList(CMNode nodeCur) {
897         // Recurse as required
898         if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
899             // Recurse only
900             calcFollowList(((XSCMBinOp)nodeCur).getLeft());
901             calcFollowList(((XSCMBinOp)nodeCur).getRight());
902         }
903          else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
904             // Recurse first
905             calcFollowList(((XSCMBinOp)nodeCur).getLeft());
906             calcFollowList(((XSCMBinOp)nodeCur).getRight());
907 
908             //
909             //  Now handle our level. We use our left child's last pos
910             //  set and our right child's first pos set, so go ahead and
911             //  get them ahead of time.
912             //
913             final CMStateSet last  = ((XSCMBinOp)nodeCur).getLeft().lastPos();
914             final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos();
915 
916             //
917             //  Now, for every position which is in our left child's last set
918             //  add all of the states in our right child's first set to the
919             //  follow set for that position.
920             //
921             for (int index = 0; index < fLeafCount; index++) {
922                 if (last.getBit(index))
923                     fFollowList[index].union(first);
924             }
925         }
926          else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
927         || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
928             // Recurse first
929             calcFollowList(((XSCMUniOp)nodeCur).getChild());
930 
931             //
932             //  Now handle our level. We use our own first and last position
933             //  sets, so get them up front.
934             //
935             final CMStateSet first = nodeCur.firstPos();
936             final CMStateSet last  = nodeCur.lastPos();
937 
938             //
939             //  For every position which is in our last position set, add all
940             //  of our first position states to the follow set for that
941             //  position.
942             //
943             for (int index = 0; index < fLeafCount; index++) {
944                 if (last.getBit(index))
945                     fFollowList[index].union(first);
946             }
947         }
948 
949         else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
950             // Recurse only
951             calcFollowList(((XSCMUniOp)nodeCur).getChild());
952         }
953 
954     }
955 
956     /**
957      * Dumps the tree of the current node to standard output.
958      *
959      * @param nodeCur The current node.
960      * @param level   The maximum levels to output.
961      *
962      * @exception RuntimeException Thrown on error.
963      */
dumpTree(CMNode nodeCur, int level)964     private void dumpTree(CMNode nodeCur, int level) {
965         for (int index = 0; index < level; index++)
966             System.out.print("   ");
967 
968         int type = nodeCur.type();
969 
970         switch(type ) {
971 
972         case XSModelGroupImpl.MODELGROUP_CHOICE:
973         case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
974             if (type == XSModelGroupImpl.MODELGROUP_CHOICE)
975                 System.out.print("Choice Node ");
976             else
977                 System.out.print("Seq Node ");
978 
979             if (nodeCur.isNullable())
980                 System.out.print("Nullable ");
981 
982             System.out.print("firstPos=");
983             System.out.print(nodeCur.firstPos().toString());
984             System.out.print(" lastPos=");
985             System.out.println(nodeCur.lastPos().toString());
986 
987             dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1);
988             dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1);
989             break;
990         }
991         case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
992         case XSParticleDecl.PARTICLE_ONE_OR_MORE:
993         case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
994             System.out.print("Rep Node ");
995 
996             if (nodeCur.isNullable())
997                 System.out.print("Nullable ");
998 
999             System.out.print("firstPos=");
1000             System.out.print(nodeCur.firstPos().toString());
1001             System.out.print(" lastPos=");
1002             System.out.println(nodeCur.lastPos().toString());
1003 
1004             dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1);
1005             break;
1006         }
1007         case XSParticleDecl.PARTICLE_ELEMENT: {
1008             System.out.print
1009             (
1010                 "Leaf: (pos="
1011                 + ((XSCMLeaf)nodeCur).getPosition()
1012                 + "), "
1013                 + "(elemIndex="
1014                 + ((XSCMLeaf)nodeCur).getLeaf()
1015                 + ") "
1016             );
1017 
1018             if (nodeCur.isNullable())
1019                 System.out.print(" Nullable ");
1020 
1021             System.out.print("firstPos=");
1022             System.out.print(nodeCur.firstPos().toString());
1023             System.out.print(" lastPos=");
1024             System.out.println(nodeCur.lastPos().toString());
1025             break;
1026         }
1027         case XSParticleDecl.PARTICLE_WILDCARD:
1028               System.out.print("Any Node: ");
1029 
1030             System.out.print("firstPos=");
1031             System.out.print(nodeCur.firstPos().toString());
1032             System.out.print(" lastPos=");
1033             System.out.println(nodeCur.lastPos().toString());
1034             break;
1035         default: {
1036             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
1037         }
1038         }
1039 
1040     }
1041 
1042 
1043     /**
1044      * -1 is used to represent bad transitions in the transition table
1045      * entry for each state. So each entry is initialized to an all -1
1046      * array. This method creates a new entry and initializes it.
1047      */
makeDefStateList()1048     private int[] makeDefStateList()
1049     {
1050         int[] retArray = new int[fElemMapSize];
1051         for (int index = 0; index < fElemMapSize; index++)
1052             retArray[index] = -1;
1053         return retArray;
1054     }
1055 
1056     /** Post tree build initialization. */
postTreeBuildInit(CMNode nodeCur)1057     private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException {
1058         // Set the maximum states on this node
1059         nodeCur.setMaxStates(fLeafCount);
1060 
1061         XSCMLeaf leaf = null;
1062         int pos = 0;
1063         // Recurse as required
1064         if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
1065             leaf = (XSCMLeaf)nodeCur;
1066             pos = leaf.getPosition();
1067             fLeafList[pos] = leaf;
1068             fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
1069         }
1070         else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) ||
1071                  (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
1072             postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft());
1073             postTreeBuildInit(((XSCMBinOp)nodeCur).getRight());
1074         }
1075         else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
1076                  nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
1077                  nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
1078             postTreeBuildInit(((XSCMUniOp)nodeCur).getChild());
1079         }
1080         else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
1081             //  Put this node in the leaf list at the current index if its
1082             //  a non-epsilon leaf.
1083             leaf = (XSCMLeaf)nodeCur;
1084             pos = leaf.getPosition();
1085             fLeafList[pos] = leaf;
1086             fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
1087         }
1088         else {
1089             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
1090         }
1091     }
1092 
1093     /**
1094      * check whether this content violates UPA constraint.
1095      *
1096      * @param subGroupHandler the substitution group handler
1097      * @return true if this content model contains other or list wildcard
1098      */
checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler)1099     public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException {
1100         // Unique Particle Attribution
1101         // store the conflict results between any two elements in fElemMap
1102         // 0: not compared; -1: no conflict; 1: conflict
1103         // initialize the conflict table (all 0 initially)
1104         byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize];
1105 
1106         // for each state, check whether it has overlap transitions
1107         for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) {
1108             for (int j = 0; j < fElemMapSize; j++) {
1109                 for (int k = j+1; k < fElemMapSize; k++) {
1110                     if (fTransTable[i][j] != -1 &&
1111                         fTransTable[i][k] != -1) {
1112                         if (conflictTable[j][k] == 0) {
1113                             if (XSConstraints.overlapUPA
1114                                     (fElemMap[j], fElemMap[k],
1115                                             subGroupHandler)) {
1116                                 if (fCountingStates != null) {
1117                                     Occurence o = fCountingStates[i];
1118                                     // If "i" is a counting state and exactly one of the transitions
1119                                     // loops back to "i" then the two particles do not overlap if
1120                                     // minOccurs == maxOccurs.
1121                                     if (o != null &&
1122                                         fTransTable[i][j] == i ^ fTransTable[i][k] == i &&
1123                                         o.minOccurs == o.maxOccurs) {
1124                                         conflictTable[j][k] = (byte) -1;
1125                                         continue;
1126                                     }
1127                                 }
1128                                 conflictTable[j][k] = (byte) 1;
1129                             }
1130                             else {
1131                                 conflictTable[j][k] = (byte) -1;
1132                             }
1133                         }
1134                     }
1135                 }
1136             }
1137         }
1138 
1139         // report all errors
1140         for (int i = 0; i < fElemMapSize; i++) {
1141             for (int j = 0; j < fElemMapSize; j++) {
1142                 if (conflictTable[i][j] == 1) {
1143                     //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
1144                     //                                             fElemMap[j].toString()});
1145                     // REVISIT: do we want to report all errors? or just one?
1146                     throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(),
1147                                                                               fElemMap[j].toString()});
1148                 }
1149             }
1150         }
1151 
1152         // if there is a other or list wildcard, we need to check this CM
1153         // again, if this grammar is cached.
1154         for (int i = 0; i < fElemMapSize; i++) {
1155             if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) {
1156                 XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i];
1157                 if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST ||
1158                     wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
1159                     return true;
1160                 }
1161             }
1162         }
1163 
1164         return false;
1165     }
1166 
1167     /**
1168      * Check which elements are valid to appear at this point. This method also
1169      * works if the state is in error, in which case it returns what should
1170      * have been seen.
1171      *
1172      * @param state  the current state
1173      * @return       a list whose entries are instances of
1174      *               either XSWildcardDecl or XSElementDecl.
1175      */
whatCanGoHere(int[] state)1176     public List<Object> whatCanGoHere(int[] state) {
1177         int curState = state[0];
1178         if (curState < 0)
1179             curState = state[1];
1180         Occurence o = (fCountingStates != null) ?
1181                 fCountingStates[curState] : null;
1182         int count = state[2];
1183 
1184         // Can be XSElementDecl or XSWildcardDecl, but eventually the content is
1185         // only used to evaluate toString
1186         List<Object> ret = new ArrayList<>();
1187         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1188             int nextState = fTransTable[curState][elemIndex];
1189             if (nextState != -1) {
1190                 if (o != null) {
1191                     if (curState == nextState) {
1192                         // Do not include transitions which loop back to the
1193                         // current state if we've looped the maximum number
1194                         // of times or greater.
1195                         if (count >= o.maxOccurs &&
1196                             o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
1197                             continue;
1198                         }
1199                     }
1200                     // Do not include transitions which advance past the
1201                     // current state if we have not looped enough times.
1202                     else if (count < o.minOccurs) {
1203                         continue;
1204                     }
1205                 }
1206                 ret.add(fElemMap[elemIndex]);
1207             }
1208         }
1209         return ret;
1210     }
1211 
1212     /**
1213      * Used by constant space algorithm for a{n,m} for n > 1 and
1214      * m <= unbounded. Called by a validator if validation of
1215      * countent model succeeds after subsuming a{n,m} to a*
1216      * (or a+) to check the n and m bounds.
1217      * Returns <code>null</code> if validation of bounds is
1218      * successful. Returns a list of strings with error info
1219      * if not. Even entries in list returned are error codes
1220      * (used to look up properties) and odd entries are parameters
1221      * to be passed when formatting error message. Each parameter
1222      * is associated with the error code that preceeds it in
1223      * the list.
1224      */
checkMinMaxBounds()1225     public List<String> checkMinMaxBounds() {
1226         List<String> result = null;
1227         for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1228             int count = fElemMapCounter[elemIndex];
1229             if (count == -1) {
1230                 continue;
1231             }
1232             final int minOccurs = fElemMapCounterLowerBound[elemIndex];
1233             final int maxOccurs = fElemMapCounterUpperBound[elemIndex];
1234             if (count < minOccurs) {
1235                 if (result == null) result = new ArrayList<>();
1236                 result.add("cvc-complex-type.2.4.b");
1237                 result.add("{" + fElemMap[elemIndex] + "}");
1238             }
1239             if (maxOccurs != -1 && count > maxOccurs) {
1240                 if (result == null) result = new ArrayList<>();
1241                 result.add("cvc-complex-type.2.4.d.1");
1242                 result.add("{" + fElemMap[elemIndex] + "}");
1243             }
1244         }
1245         return result;
1246     }
1247 
occurenceInfo(int[] state)1248     public int [] occurenceInfo(int[] state) {
1249         if (fCountingStates != null) {
1250             int curState = state[0];
1251             if (curState < 0) {
1252                 curState = state[1];
1253             }
1254             Occurence o = fCountingStates[curState];
1255             if (o != null) {
1256                 int [] occurenceInfo = new int[4];
1257                 occurenceInfo[0] = o.minOccurs;
1258                 occurenceInfo[1] = o.maxOccurs;
1259                 occurenceInfo[2] = state[2];
1260                 occurenceInfo[3] = o.elemIndex;
1261                 return occurenceInfo;
1262             }
1263         }
1264         return null;
1265     }
1266 
getTermName(int termId)1267     public String getTermName(int termId) {
1268         Object term = fElemMap[termId];
1269         return (term != null) ? term.toString() : null;
1270     }
1271 
isCompactedForUPA()1272     public boolean isCompactedForUPA() {
1273         return fIsCompactedForUPA;
1274     }
1275 } // class DFAContentModel
1276