1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2004 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.xs.models; 22 23 import com.sun.org.apache.xerces.internal.xni.QName; 24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode; 25 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet; 26 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols; 27 import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler; 28 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl; 29 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl; 30 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl; 31 import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl; 32 import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException; 33 import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints; 34 35 import java.util.Vector; 36 import java.util.ArrayList; 37 import java.util.HashMap; 38 39 /** 40 * DFAContentModel is the implementation of XSCMValidator that does 41 * all of the non-trivial element content validation. This class does 42 * the conversion from the regular expression to the DFA that 43 * it then uses in its validation algorithm. 44 * 45 * @xerces.internal 46 * 47 * @author Neil Graham, IBM 48 * @version $Id: XSDFACM.java,v 1.9 2010/08/06 23:49:43 joehw Exp $ 49 */ 50 public class XSDFACM 51 implements XSCMValidator { 52 53 // 54 // Constants 55 // 56 private static final boolean DEBUG = false; 57 58 // special strings 59 60 // debugging 61 62 /** Set to true to debug content model validation. */ 63 private static final boolean DEBUG_VALIDATE_CONTENT = false; 64 65 // 66 // Data 67 // 68 69 /** 70 * This is the map of unique input symbol elements to indices into 71 * each state's per-input symbol transition table entry. This is part 72 * of the built DFA information that must be kept around to do the 73 * actual validation. Note tat since either XSElementDecl or XSParticleDecl object 74 * can live here, we've got to use an Object. 75 */ 76 private Object fElemMap[] = null; 77 78 /** 79 * This is a map of whether the element map contains information 80 * related to ANY models. 81 */ 82 private int fElemMapType[] = null; 83 84 /** 85 * id of the unique input symbol 86 */ 87 private int fElemMapId[] = null; 88 89 /** The element map size. */ 90 private int fElemMapSize = 0; 91 92 /** 93 * This is an array of booleans, one per state (there are 94 * fTransTableSize states in the DFA) that indicates whether that 95 * state is a final state. 96 */ 97 private boolean fFinalStateFlags[] = null; 98 99 /** 100 * The list of follow positions for each NFA position (i.e. for each 101 * non-epsilon leaf node.) This is only used during the building of 102 * the DFA, and is let go afterwards. 103 */ 104 private CMStateSet fFollowList[] = null; 105 106 /** 107 * This is the head node of our intermediate representation. It is 108 * only non-null during the building of the DFA (just so that it 109 * does not have to be passed all around.) Once the DFA is built, 110 * this is no longer required so its nulled out. 111 */ 112 private CMNode fHeadNode = null; 113 114 /** 115 * The count of leaf nodes. This is an important number that set some 116 * limits on the sizes of data structures in the DFA process. 117 */ 118 private int fLeafCount = 0; 119 120 /** 121 * An array of non-epsilon leaf nodes, which is used during the DFA 122 * build operation, then dropped. 123 */ 124 private XSCMLeaf fLeafList[] = null; 125 126 /** Array mapping ANY types to the leaf list. */ 127 private int fLeafListType[] = null; 128 129 /** 130 * This is the transition table that is the main by product of all 131 * of the effort here. It is an array of arrays of ints. The first 132 * dimension is the number of states we end up with in the DFA. The 133 * second dimensions is the number of unique elements in the content 134 * model (fElemMapSize). Each entry in the second dimension indicates 135 * the new state given that input for the first dimension's start 136 * state. 137 * <p> 138 * The fElemMap array handles mapping from element indexes to 139 * positions in the second dimension of the transition table. 140 */ 141 private int fTransTable[][] = null; 142 /** 143 * Array containing occurence information for looping states 144 * which use counters to check minOccurs/maxOccurs. 145 */ 146 private Occurence [] fCountingStates = null; 147 static final class Occurence { 148 final int minOccurs; 149 final int maxOccurs; 150 final int elemIndex; Occurence(XSCMRepeatingLeaf leaf, int elemIndex)151 public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) { 152 minOccurs = leaf.getMinOccurs(); 153 maxOccurs = leaf.getMaxOccurs(); 154 this.elemIndex = elemIndex; 155 } toString()156 public String toString() { 157 return "minOccurs=" + minOccurs 158 + ";maxOccurs=" + 159 ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) 160 ? Integer.toString(maxOccurs) : "unbounded"); 161 } 162 } 163 164 /** 165 * The number of valid entries in the transition table, and in the other 166 * related tables such as fFinalStateFlags. 167 */ 168 private int fTransTableSize = 0; 169 170 /** 171 * Array of counters for all the for elements (or wildcards) 172 * of the form a{n,m} where n > 1 and m <= unbounded. Used 173 * to count the a's to later check against n and m. Counter 174 * set to -1 if element (or wildcard) not optimized by 175 * constant space algorithm. 176 */ 177 private int fElemMapCounter[]; 178 179 /** 180 * Array of lower bounds for all the for elements (or wildcards) 181 * of the form a{n,m} where n > 1 and m <= unbounded. This array 182 * stores the n's for those elements (or wildcards) for which 183 * the constant space algorithm applies (or -1 otherwise). 184 */ 185 private int fElemMapCounterLowerBound[]; 186 187 /** 188 * Array of upper bounds for all the for elements (or wildcards) 189 * of the form a{n,m} where n > 1 and m <= unbounded. This array 190 * stores the n's for those elements (or wildcards) for which 191 * the constant space algorithm applies, or -1 if algorithm does 192 * not apply or m = unbounded. 193 */ 194 private int fElemMapCounterUpperBound[]; // -1 if no upper bound 195 196 // temp variables 197 198 // 199 // Constructors 200 // 201 202 /** 203 * Constructs a DFA content model. 204 * 205 * @param syntaxTree The syntax tree of the content model. 206 * @param leafCount The number of leaves. 207 * 208 * @exception RuntimeException Thrown if DFA can't be built. 209 */ 210 XSDFACM(CMNode syntaxTree, int leafCount)211 public XSDFACM(CMNode syntaxTree, int leafCount) { 212 213 // Store away our index and pools in members 214 fLeafCount = leafCount; 215 216 // 217 // Create some string pool indexes that represent the names of some 218 // magical nodes in the syntax tree. 219 // (already done in static initialization... 220 // 221 222 // 223 // Ok, so lets grind through the building of the DFA. This method 224 // handles the high level logic of the algorithm, but it uses a 225 // number of helper classes to do its thing. 226 // 227 // In order to avoid having hundreds of references to the error and 228 // string handlers around, this guy and all of his helper classes 229 // just throw a simple exception and we then pass it along. 230 // 231 232 if(DEBUG_VALIDATE_CONTENT) { 233 XSDFACM.time -= System.currentTimeMillis(); 234 } 235 236 buildDFA(syntaxTree); 237 238 if(DEBUG_VALIDATE_CONTENT) { 239 XSDFACM.time += System.currentTimeMillis(); 240 System.out.println("DFA build: " + XSDFACM.time + "ms"); 241 } 242 } 243 244 private static long time = 0; 245 246 // 247 // XSCMValidator methods 248 // 249 250 /** 251 * check whether the given state is one of the final states 252 * 253 * @param state the state to check 254 * 255 * @return whether it's a final state 256 */ isFinalState(int state)257 public boolean isFinalState (int state) { 258 return (state < 0)? false : 259 fFinalStateFlags[state]; 260 } 261 262 /** 263 * one transition only 264 * 265 * @param curElem The current element's QName 266 * @param state stack to store the previous state 267 * @param subGroupHandler the substitution group handler 268 * 269 * @return null if transition is invalid; otherwise the Object corresponding to the 270 * XSElementDecl or XSWildcardDecl identified. Also, the 271 * state array will be modified to include the new state; this so that the validator can 272 * store it away. 273 * 274 * @exception RuntimeException thrown on error 275 */ oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler)276 public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) { 277 int curState = state[0]; 278 279 if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) { 280 // there was an error last time; so just go find correct Object in fElemmMap. 281 // ... after resetting state[0]. 282 if(curState == XSCMValidator.FIRST_ERROR) 283 state[0] = XSCMValidator.SUBSEQUENT_ERROR; 284 285 return findMatchingDecl(curElem, subGroupHandler); 286 } 287 288 int nextState = 0; 289 int elemIndex = 0; 290 Object matchingDecl = null; 291 292 for (; elemIndex < fElemMapSize; elemIndex++) { 293 nextState = fTransTable[curState][elemIndex]; 294 if (nextState == -1) 295 continue; 296 int type = fElemMapType[elemIndex] ; 297 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 298 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 299 if (matchingDecl != null) { 300 // Increment counter if constant space algorithm applies 301 if (fElemMapCounter[elemIndex] >= 0) { 302 fElemMapCounter[elemIndex]++; 303 } 304 break; 305 } 306 } 307 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 308 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { 309 matchingDecl = fElemMap[elemIndex]; 310 // Increment counter if constant space algorithm applies 311 if (fElemMapCounter[elemIndex] >= 0) { 312 fElemMapCounter[elemIndex]++; 313 } 314 break; 315 } 316 } 317 } 318 319 // if we still can't find a match, set the state to first_error 320 // and return null 321 if (elemIndex == fElemMapSize) { 322 state[1] = state[0]; 323 state[0] = XSCMValidator.FIRST_ERROR; 324 return findMatchingDecl(curElem, subGroupHandler); 325 } 326 327 if (fCountingStates != null) { 328 Occurence o = fCountingStates[curState]; 329 if (o != null) { 330 if (curState == nextState) { 331 if (++state[2] > o.maxOccurs && 332 o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { 333 // It's likely that we looped too many times on the current state 334 // however it's possible that we actually matched another particle 335 // which allows the same name. 336 // 337 // Consider: 338 // 339 // <xs:sequence> 340 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 341 // <xs:element name="foo" type="xs:string" fixed="bar"/> 342 // </xs:sequence> 343 // 344 // and 345 // 346 // <xs:sequence> 347 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 348 // <xs:any namespace="##any" processContents="skip"/> 349 // </xs:sequence> 350 // 351 // In the DFA there will be two transitions from the current state which 352 // allow "foo". Note that this is not a UPA violation. The ambiguity of which 353 // transition to take is resolved by the current value of the counter. Since 354 // we've already seen enough instances of the first "foo" perhaps there is 355 // another element declaration or wildcard deeper in the element map which 356 // matches. 357 return findMatchingDecl(curElem, state, subGroupHandler, elemIndex); 358 } 359 } 360 else if (state[2] < o.minOccurs) { 361 // not enough loops on the current state. 362 state[1] = state[0]; 363 state[0] = XSCMValidator.FIRST_ERROR; 364 return findMatchingDecl(curElem, subGroupHandler); 365 } 366 else { 367 // Exiting a counting state. If we're entering a new 368 // counting state, reset the counter. 369 o = fCountingStates[nextState]; 370 if (o != null) { 371 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 372 } 373 } 374 } 375 else { 376 o = fCountingStates[nextState]; 377 if (o != null) { 378 // Entering a new counting state. Reset the counter. 379 // If we've already seen one instance of the looping 380 // particle set the counter to 1, otherwise set it 381 // to 0. 382 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 383 } 384 } 385 } 386 387 state[0] = nextState; 388 return matchingDecl; 389 } // oneTransition(QName, int[], SubstitutionGroupHandler): Object 390 findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler)391 Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) { 392 Object matchingDecl = null; 393 394 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 395 int type = fElemMapType[elemIndex] ; 396 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 397 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 398 if (matchingDecl != null) { 399 return matchingDecl; 400 } 401 } 402 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 403 if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) 404 return fElemMap[elemIndex]; 405 } 406 } 407 408 return null; 409 } // findMatchingDecl(QName, SubstitutionGroupHandler): Object 410 findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex)411 Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) { 412 413 int curState = state[0]; 414 int nextState = 0; 415 Object matchingDecl = null; 416 417 while (++elemIndex < fElemMapSize) { 418 nextState = fTransTable[curState][elemIndex]; 419 if (nextState == -1) 420 continue; 421 int type = fElemMapType[elemIndex] ; 422 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 423 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 424 if (matchingDecl != null) { 425 break; 426 } 427 } 428 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 429 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { 430 matchingDecl = fElemMap[elemIndex]; 431 break; 432 } 433 } 434 } 435 436 // if we still can't find a match, set the state to FIRST_ERROR and return null 437 if (elemIndex == fElemMapSize) { 438 state[1] = state[0]; 439 state[0] = XSCMValidator.FIRST_ERROR; 440 return findMatchingDecl(curElem, subGroupHandler); 441 } 442 443 // if we found a match, set the next state and reset the 444 // counter if the next state is a counting state. 445 state[0] = nextState; 446 final Occurence o = fCountingStates[nextState]; 447 if (o != null) { 448 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 449 } 450 return matchingDecl; 451 } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object 452 453 // This method returns the start states of the content model. startContentModel()454 public int[] startContentModel() { 455 // Clear all constant space algorithm counters in use 456 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 457 if (fElemMapCounter[elemIndex] != -1) { 458 fElemMapCounter[elemIndex] = 0; 459 } 460 } 461 // [0] : the current state 462 // [1] : if [0] is an error state then the 463 // last valid state before the error 464 // [2] : occurence counter for counting states 465 return new int [3]; 466 } // startContentModel():int[] 467 468 // this method returns whether the last state was a valid final state endContentModel(int[] state)469 public boolean endContentModel(int[] state) { 470 final int curState = state[0]; 471 if (fFinalStateFlags[curState]) { 472 if (fCountingStates != null) { 473 Occurence o = fCountingStates[curState]; 474 if (o != null && state[2] < o.minOccurs) { 475 // not enough loops on the current state to be considered final. 476 return false; 477 } 478 } 479 return true; 480 } 481 return false; 482 } // endContentModel(int[]): boolean 483 484 // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc., 485 // but we can put it back later. 486 487 // 488 // Private methods 489 // 490 491 /** 492 * Builds the internal DFA transition table from the given syntax tree. 493 * 494 * @param syntaxTree The syntax tree. 495 * 496 * @exception RuntimeException Thrown if DFA cannot be built. 497 */ buildDFA(CMNode syntaxTree)498 private void buildDFA(CMNode syntaxTree) { 499 // 500 // The first step we need to take is to rewrite the content model 501 // using our CMNode objects, and in the process get rid of any 502 // repetition short cuts, converting them into '*' style repetitions 503 // or getting rid of repetitions altogether. 504 // 505 // The conversions done are: 506 // 507 // x+ -> (x|x*) 508 // x? -> (x|epsilon) 509 // 510 // This is a relatively complex scenario. What is happening is that 511 // we create a top level binary node of which the special EOC value 512 // is set as the right side node. The the left side is set to the 513 // rewritten syntax tree. The source is the original content model 514 // info from the decl pool. The rewrite is done by buildSyntaxTree() 515 // which recurses the decl pool's content of the element and builds 516 // a new tree in the process. 517 // 518 // Note that, during this operation, we set each non-epsilon leaf 519 // node's DFA state position and count the number of such leafs, which 520 // is left in the fLeafCount member. 521 // 522 // The nodeTmp object is passed in just as a temp node to use during 523 // the recursion. Otherwise, we'd have to create a new node on every 524 // level of recursion, which would be piggy in Java (as is everything 525 // for that matter.) 526 // 527 528 /* MODIFIED (Jan, 2001) 529 * 530 * Use following rules. 531 * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) 532 * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) 533 * 534 * The same computation of follow as x* is applied to x+ 535 * 536 * The modification drastically reduces computation time of 537 * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" 538 */ 539 540 // 541 // And handle specially the EOC node, which also must be numbered 542 // and counted as a non-epsilon leaf node. It could not be handled 543 // in the above tree build because it was created before all that 544 // started. We save the EOC position since its used during the DFA 545 // building loop. 546 // 547 int EOCPos = fLeafCount; 548 XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++); 549 fHeadNode = new XSCMBinOp( 550 XSModelGroupImpl.MODELGROUP_SEQUENCE, 551 syntaxTree, 552 nodeEOC 553 ); 554 555 // 556 // Ok, so now we have to iterate the new tree and do a little more 557 // work now that we know the leaf count. One thing we need to do is 558 // to calculate the first and last position sets of each node. This 559 // is cached away in each of the nodes. 560 // 561 // Along the way we also set the leaf count in each node as the 562 // maximum state count. They must know this in order to create their 563 // first/last pos sets. 564 // 565 // We also need to build an array of references to the non-epsilon 566 // leaf nodes. Since we iterate it in the same way as before, this 567 // will put them in the array according to their position values. 568 // 569 fLeafList = new XSCMLeaf[fLeafCount]; 570 fLeafListType = new int[fLeafCount]; 571 postTreeBuildInit(fHeadNode); 572 573 // 574 // And, moving onward... We now need to build the follow position 575 // sets for all the nodes. So we allocate an array of state sets, 576 // one for each leaf node (i.e. each DFA position.) 577 // 578 fFollowList = new CMStateSet[fLeafCount]; 579 for (int index = 0; index < fLeafCount; index++) 580 fFollowList[index] = new CMStateSet(fLeafCount); 581 calcFollowList(fHeadNode); 582 // 583 // And finally the big push... Now we build the DFA using all the 584 // states and the tree we've built up. First we set up the various 585 // data structures we are going to use while we do this. 586 // 587 // First of all we need an array of unique element names in our 588 // content model. For each transition table entry, we need a set of 589 // contiguous indices to represent the transitions for a particular 590 // input element. So we need to a zero based range of indexes that 591 // map to element types. This element map provides that mapping. 592 // 593 fElemMap = new Object[fLeafCount]; 594 fElemMapType = new int[fLeafCount]; 595 fElemMapId = new int[fLeafCount]; 596 597 fElemMapCounter = new int[fLeafCount]; 598 fElemMapCounterLowerBound = new int[fLeafCount]; 599 fElemMapCounterUpperBound = new int[fLeafCount]; 600 601 fElemMapSize = 0; 602 Occurence [] elemOccurenceMap = null; 603 604 for (int outIndex = 0; outIndex < fLeafCount; outIndex++) { 605 // optimization from Henry Zongaro: 606 //fElemMap[outIndex] = new Object (); 607 fElemMap[outIndex] = null; 608 609 int inIndex = 0; 610 final int id = fLeafList[outIndex].getParticleId(); 611 for (; inIndex < fElemMapSize; inIndex++) { 612 if (id == fElemMapId[inIndex]) 613 break; 614 } 615 616 // If it was not in the list, then add it, if not the EOC node 617 if (inIndex == fElemMapSize) { 618 XSCMLeaf leaf = fLeafList[outIndex]; 619 fElemMap[fElemMapSize] = leaf.getLeaf(); 620 if (leaf instanceof XSCMRepeatingLeaf) { 621 if (elemOccurenceMap == null) { 622 elemOccurenceMap = new Occurence[fLeafCount]; 623 } 624 elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize); 625 } 626 627 fElemMapType[fElemMapSize] = fLeafListType[outIndex]; 628 fElemMapId[fElemMapSize] = id; 629 630 // Init counters and bounds for a{n,m} algorithm 631 int[] bounds = (int[]) leaf.getUserData(); 632 if (bounds != null) { 633 fElemMapCounter[fElemMapSize] = 0; 634 fElemMapCounterLowerBound[fElemMapSize] = bounds[0]; 635 fElemMapCounterUpperBound[fElemMapSize] = bounds[1]; 636 } else { 637 fElemMapCounter[fElemMapSize] = -1; 638 fElemMapCounterLowerBound[fElemMapSize] = -1; 639 fElemMapCounterUpperBound[fElemMapSize] = -1; 640 } 641 642 fElemMapSize++; 643 } 644 } 645 646 // the last entry in the element map must be the EOC element. 647 // remove it from the map. 648 if (DEBUG) { 649 if (fElemMapId[fElemMapSize-1] != -1) 650 System.err.println("interal error in DFA: last element is not EOC."); 651 } 652 fElemMapSize--; 653 654 /*** 655 * Optimization(Jan, 2001); We sort fLeafList according to 656 * elemIndex which is *uniquely* associated to each leaf. 657 * We are *assuming* that each element appears in at least one leaf. 658 **/ 659 660 int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; 661 int fSortCount = 0; 662 663 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 664 final int id = fElemMapId[elemIndex]; 665 for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { 666 if (id == fLeafList[leafIndex].getParticleId()) 667 fLeafSorter[fSortCount++] = leafIndex; 668 } 669 fLeafSorter[fSortCount++] = -1; 670 } 671 672 /* Optimization(Jan, 2001) */ 673 674 // 675 // Next lets create some arrays, some that hold transient 676 // information during the DFA build and some that are permament. 677 // These are kind of sticky since we cannot know how big they will 678 // get, but we don't want to use any Java collections because of 679 // performance. 680 // 681 // Basically they will probably be about fLeafCount*2 on average, 682 // but can be as large as 2^(fLeafCount*2), worst case. So we start 683 // with fLeafCount*4 as a middle ground. This will be very unlikely 684 // to ever have to expand, though it if does, the overhead will be 685 // somewhat ugly. 686 // 687 int curArraySize = fLeafCount * 4; 688 CMStateSet[] statesToDo = new CMStateSet[curArraySize]; 689 fFinalStateFlags = new boolean[curArraySize]; 690 fTransTable = new int[curArraySize][]; 691 692 // 693 // Ok we start with the initial set as the first pos set of the 694 // head node (which is the seq node that holds the content model 695 // and the EOC node.) 696 // 697 CMStateSet setT = fHeadNode.firstPos(); 698 699 // 700 // Init our two state flags. Basically the unmarked state counter 701 // is always chasing the current state counter. When it catches up, 702 // that means we made a pass through that did not add any new states 703 // to the lists, at which time we are done. We could have used a 704 // expanding array of flags which we used to mark off states as we 705 // complete them, but this is easier though less readable maybe. 706 // 707 int unmarkedState = 0; 708 int curState = 0; 709 710 // 711 // Init the first transition table entry, and put the initial state 712 // into the states to do list, then bump the current state. 713 // 714 fTransTable[curState] = makeDefStateList(); 715 statesToDo[curState] = setT; 716 curState++; 717 718 /* Optimization(Jan, 2001); This is faster for 719 * a large content model such as, "(t001+|t002+|.... |t500+)". 720 */ 721 722 HashMap stateTable = new HashMap(); 723 724 /* Optimization(Jan, 2001) */ 725 726 // 727 // Ok, almost done with the algorithm... We now enter the 728 // loop where we go until the states done counter catches up with 729 // the states to do counter. 730 // 731 while (unmarkedState < curState) { 732 // 733 // Get the first unmarked state out of the list of states to do. 734 // And get the associated transition table entry. 735 // 736 setT = statesToDo[unmarkedState]; 737 int[] transEntry = fTransTable[unmarkedState]; 738 739 // Mark this one final if it contains the EOC state 740 fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos); 741 742 // Bump up the unmarked state count, marking this state done 743 unmarkedState++; 744 745 // Loop through each possible input symbol in the element map 746 CMStateSet newSet = null; 747 /* Optimization(Jan, 2001) */ 748 int sorterIndex = 0; 749 /* Optimization(Jan, 2001) */ 750 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 751 // 752 // Build up a set of states which is the union of all of 753 // the follow sets of DFA positions that are in the current 754 // state. If we gave away the new set last time through then 755 // create a new one. Otherwise, zero out the existing one. 756 // 757 if (newSet == null) 758 newSet = new CMStateSet(fLeafCount); 759 else 760 newSet.zeroBits(); 761 762 /* Optimization(Jan, 2001) */ 763 int leafIndex = fLeafSorter[sorterIndex++]; 764 765 while (leafIndex != -1) { 766 // If this leaf index (DFA position) is in the current set... 767 if (setT.getBit(leafIndex)) { 768 // 769 // If this leaf is the current input symbol, then we 770 // want to add its follow list to the set of states to 771 // transition to from the current state. 772 // 773 newSet.union(fFollowList[leafIndex]); 774 } 775 776 leafIndex = fLeafSorter[sorterIndex++]; 777 } 778 /* Optimization(Jan, 2001) */ 779 780 // 781 // If this new set is not empty, then see if its in the list 782 // of states to do. If not, then add it. 783 // 784 if (!newSet.isEmpty()) { 785 // 786 // Search the 'states to do' list to see if this new 787 // state set is already in there. 788 // 789 790 /* Optimization(Jan, 2001) */ 791 Integer stateObj = (Integer)stateTable.get(newSet); 792 int stateIndex = (stateObj == null ? curState : stateObj.intValue()); 793 /* Optimization(Jan, 2001) */ 794 795 // If we did not find it, then add it 796 if (stateIndex == curState) { 797 // 798 // Put this new state into the states to do and init 799 // a new entry at the same index in the transition 800 // table. 801 // 802 statesToDo[curState] = newSet; 803 fTransTable[curState] = makeDefStateList(); 804 805 /* Optimization(Jan, 2001) */ 806 stateTable.put(newSet, new Integer(curState)); 807 /* Optimization(Jan, 2001) */ 808 809 // We now have a new state to do so bump the count 810 curState++; 811 812 // 813 // Null out the new set to indicate we adopted it. 814 // This will cause the creation of a new set on the 815 // next time around the loop. 816 // 817 newSet = null; 818 } 819 820 // 821 // Now set this state in the transition table's entry 822 // for this element (using its index), with the DFA 823 // state we will move to from the current state when we 824 // see this input element. 825 // 826 transEntry[elemIndex] = stateIndex; 827 828 // Expand the arrays if we're full 829 if (curState == curArraySize) { 830 // 831 // Yikes, we overflowed the initial array size, so 832 // we've got to expand all of these arrays. So adjust 833 // up the size by 50% and allocate new arrays. 834 // 835 final int newSize = (int)(curArraySize * 1.5); 836 CMStateSet[] newToDo = new CMStateSet[newSize]; 837 boolean[] newFinalFlags = new boolean[newSize]; 838 int[][] newTransTable = new int[newSize][]; 839 840 // Copy over all of the existing content 841 System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); 842 System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); 843 System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); 844 845 // Store the new array size 846 curArraySize = newSize; 847 statesToDo = newToDo; 848 fFinalStateFlags = newFinalFlags; 849 fTransTable = newTransTable; 850 } 851 } 852 } 853 } 854 855 // 856 // Fill in the occurence information for each looping state 857 // if we're using counters. 858 // 859 if (elemOccurenceMap != null) { 860 fCountingStates = new Occurence[curState]; 861 for (int i = 0; i < curState; ++i) { 862 int [] transitions = fTransTable[i]; 863 for (int j = 0; j < transitions.length; ++j) { 864 if (i == transitions[j]) { 865 fCountingStates[i] = elemOccurenceMap[j]; 866 break; 867 } 868 } 869 } 870 } 871 872 // 873 // And now we can say bye bye to the temp representation since we've 874 // built the DFA. 875 // 876 if (DEBUG_VALIDATE_CONTENT) 877 dumpTree(fHeadNode, 0); 878 fHeadNode = null; 879 fLeafList = null; 880 fFollowList = null; 881 fLeafListType = null; 882 fElemMapId = null; 883 } 884 885 /** 886 * Calculates the follow list of the current node. 887 * 888 * @param nodeCur The curent node. 889 * 890 * @exception RuntimeException Thrown if follow list cannot be calculated. 891 */ calcFollowList(CMNode nodeCur)892 private void calcFollowList(CMNode nodeCur) { 893 // Recurse as required 894 if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) { 895 // Recurse only 896 calcFollowList(((XSCMBinOp)nodeCur).getLeft()); 897 calcFollowList(((XSCMBinOp)nodeCur).getRight()); 898 } 899 else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) { 900 // Recurse first 901 calcFollowList(((XSCMBinOp)nodeCur).getLeft()); 902 calcFollowList(((XSCMBinOp)nodeCur).getRight()); 903 904 // 905 // Now handle our level. We use our left child's last pos 906 // set and our right child's first pos set, so go ahead and 907 // get them ahead of time. 908 // 909 final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos(); 910 final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos(); 911 912 // 913 // Now, for every position which is in our left child's last set 914 // add all of the states in our right child's first set to the 915 // follow set for that position. 916 // 917 for (int index = 0; index < fLeafCount; index++) { 918 if (last.getBit(index)) 919 fFollowList[index].union(first); 920 } 921 } 922 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE 923 || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) { 924 // Recurse first 925 calcFollowList(((XSCMUniOp)nodeCur).getChild()); 926 927 // 928 // Now handle our level. We use our own first and last position 929 // sets, so get them up front. 930 // 931 final CMStateSet first = nodeCur.firstPos(); 932 final CMStateSet last = nodeCur.lastPos(); 933 934 // 935 // For every position which is in our last position set, add all 936 // of our first position states to the follow set for that 937 // position. 938 // 939 for (int index = 0; index < fLeafCount; index++) { 940 if (last.getBit(index)) 941 fFollowList[index].union(first); 942 } 943 } 944 945 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 946 // Recurse only 947 calcFollowList(((XSCMUniOp)nodeCur).getChild()); 948 } 949 950 } 951 952 /** 953 * Dumps the tree of the current node to standard output. 954 * 955 * @param nodeCur The current node. 956 * @param level The maximum levels to output. 957 * 958 * @exception RuntimeException Thrown on error. 959 */ dumpTree(CMNode nodeCur, int level)960 private void dumpTree(CMNode nodeCur, int level) { 961 for (int index = 0; index < level; index++) 962 System.out.print(" "); 963 964 int type = nodeCur.type(); 965 966 switch(type ) { 967 968 case XSModelGroupImpl.MODELGROUP_CHOICE: 969 case XSModelGroupImpl.MODELGROUP_SEQUENCE: { 970 if (type == XSModelGroupImpl.MODELGROUP_CHOICE) 971 System.out.print("Choice Node "); 972 else 973 System.out.print("Seq Node "); 974 975 if (nodeCur.isNullable()) 976 System.out.print("Nullable "); 977 978 System.out.print("firstPos="); 979 System.out.print(nodeCur.firstPos().toString()); 980 System.out.print(" lastPos="); 981 System.out.println(nodeCur.lastPos().toString()); 982 983 dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1); 984 dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1); 985 break; 986 } 987 case XSParticleDecl.PARTICLE_ZERO_OR_MORE: 988 case XSParticleDecl.PARTICLE_ONE_OR_MORE: 989 case XSParticleDecl.PARTICLE_ZERO_OR_ONE: { 990 System.out.print("Rep Node "); 991 992 if (nodeCur.isNullable()) 993 System.out.print("Nullable "); 994 995 System.out.print("firstPos="); 996 System.out.print(nodeCur.firstPos().toString()); 997 System.out.print(" lastPos="); 998 System.out.println(nodeCur.lastPos().toString()); 999 1000 dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1); 1001 break; 1002 } 1003 case XSParticleDecl.PARTICLE_ELEMENT: { 1004 System.out.print 1005 ( 1006 "Leaf: (pos=" 1007 + ((XSCMLeaf)nodeCur).getPosition() 1008 + "), " 1009 + "(elemIndex=" 1010 + ((XSCMLeaf)nodeCur).getLeaf() 1011 + ") " 1012 ); 1013 1014 if (nodeCur.isNullable()) 1015 System.out.print(" Nullable "); 1016 1017 System.out.print("firstPos="); 1018 System.out.print(nodeCur.firstPos().toString()); 1019 System.out.print(" lastPos="); 1020 System.out.println(nodeCur.lastPos().toString()); 1021 break; 1022 } 1023 case XSParticleDecl.PARTICLE_WILDCARD: 1024 System.out.print("Any Node: "); 1025 1026 System.out.print("firstPos="); 1027 System.out.print(nodeCur.firstPos().toString()); 1028 System.out.print(" lastPos="); 1029 System.out.println(nodeCur.lastPos().toString()); 1030 break; 1031 default: { 1032 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 1033 } 1034 } 1035 1036 } 1037 1038 1039 /** 1040 * -1 is used to represent bad transitions in the transition table 1041 * entry for each state. So each entry is initialized to an all -1 1042 * array. This method creates a new entry and initializes it. 1043 */ makeDefStateList()1044 private int[] makeDefStateList() 1045 { 1046 int[] retArray = new int[fElemMapSize]; 1047 for (int index = 0; index < fElemMapSize; index++) 1048 retArray[index] = -1; 1049 return retArray; 1050 } 1051 1052 /** Post tree build initialization. */ postTreeBuildInit(CMNode nodeCur)1053 private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException { 1054 // Set the maximum states on this node 1055 nodeCur.setMaxStates(fLeafCount); 1056 1057 XSCMLeaf leaf = null; 1058 int pos = 0; 1059 // Recurse as required 1060 if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) { 1061 leaf = (XSCMLeaf)nodeCur; 1062 pos = leaf.getPosition(); 1063 fLeafList[pos] = leaf; 1064 fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD; 1065 } 1066 else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) || 1067 (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) { 1068 postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft()); 1069 postTreeBuildInit(((XSCMBinOp)nodeCur).getRight()); 1070 } 1071 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE || 1072 nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE || 1073 nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 1074 postTreeBuildInit(((XSCMUniOp)nodeCur).getChild()); 1075 } 1076 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) { 1077 // Put this node in the leaf list at the current index if its 1078 // a non-epsilon leaf. 1079 leaf = (XSCMLeaf)nodeCur; 1080 pos = leaf.getPosition(); 1081 fLeafList[pos] = leaf; 1082 fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT; 1083 } 1084 else { 1085 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 1086 } 1087 } 1088 1089 /** 1090 * check whether this content violates UPA constraint. 1091 * 1092 * @param subGroupHandler the substitution group handler 1093 * @return true if this content model contains other or list wildcard 1094 */ checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler)1095 public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException { 1096 // Unique Particle Attribution 1097 // store the conflict results between any two elements in fElemMap 1098 // 0: not compared; -1: no conflict; 1: conflict 1099 // initialize the conflict table (all 0 initially) 1100 byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize]; 1101 1102 // for each state, check whether it has overlap transitions 1103 for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) { 1104 for (int j = 0; j < fElemMapSize; j++) { 1105 for (int k = j+1; k < fElemMapSize; k++) { 1106 if (fTransTable[i][j] != -1 && 1107 fTransTable[i][k] != -1) { 1108 if (conflictTable[j][k] == 0) { 1109 if (XSConstraints.overlapUPA 1110 (fElemMap[j], fElemMap[k], 1111 subGroupHandler)) { 1112 if (fCountingStates != null) { 1113 Occurence o = fCountingStates[i]; 1114 // If "i" is a counting state and exactly one of the transitions 1115 // loops back to "i" then the two particles do not overlap if 1116 // minOccurs == maxOccurs. 1117 if (o != null && 1118 fTransTable[i][j] == i ^ fTransTable[i][k] == i && 1119 o.minOccurs == o.maxOccurs) { 1120 conflictTable[j][k] = (byte) -1; 1121 continue; 1122 } 1123 } 1124 conflictTable[j][k] = (byte) 1; 1125 } 1126 else { 1127 conflictTable[j][k] = (byte) -1; 1128 } 1129 } 1130 } 1131 } 1132 } 1133 } 1134 1135 // report all errors 1136 for (int i = 0; i < fElemMapSize; i++) { 1137 for (int j = 0; j < fElemMapSize; j++) { 1138 if (conflictTable[i][j] == 1) { 1139 //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(), 1140 // fElemMap[j].toString()}); 1141 // REVISIT: do we want to report all errors? or just one? 1142 throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(), 1143 fElemMap[j].toString()}); 1144 } 1145 } 1146 } 1147 1148 // if there is a other or list wildcard, we need to check this CM 1149 // again, if this grammar is cached. 1150 for (int i = 0; i < fElemMapSize; i++) { 1151 if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) { 1152 XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i]; 1153 if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST || 1154 wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) { 1155 return true; 1156 } 1157 } 1158 } 1159 1160 return false; 1161 } 1162 1163 /** 1164 * Check which elements are valid to appear at this point. This method also 1165 * works if the state is in error, in which case it returns what should 1166 * have been seen. 1167 * 1168 * @param state the current state 1169 * @return a Vector whose entries are instances of 1170 * either XSWildcardDecl or XSElementDecl. 1171 */ whatCanGoHere(int[] state)1172 public Vector whatCanGoHere(int[] state) { 1173 int curState = state[0]; 1174 if (curState < 0) 1175 curState = state[1]; 1176 Occurence o = (fCountingStates != null) ? 1177 fCountingStates[curState] : null; 1178 int count = state[2]; 1179 1180 Vector ret = new Vector(); 1181 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 1182 int nextState = fTransTable[curState][elemIndex]; 1183 if (nextState != -1) { 1184 if (o != null) { 1185 if (curState == nextState) { 1186 // Do not include transitions which loop back to the 1187 // current state if we've looped the maximum number 1188 // of times or greater. 1189 if (count >= o.maxOccurs && 1190 o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { 1191 continue; 1192 } 1193 } 1194 // Do not include transitions which advance past the 1195 // current state if we have not looped enough times. 1196 else if (count < o.minOccurs) { 1197 continue; 1198 } 1199 } 1200 ret.addElement(fElemMap[elemIndex]); 1201 } 1202 } 1203 return ret; 1204 } 1205 1206 /** 1207 * Used by constant space algorithm for a{n,m} for n > 1 and 1208 * m <= unbounded. Called by a validator if validation of 1209 * countent model succeeds after subsuming a{n,m} to a* 1210 * (or a+) to check the n and m bounds. 1211 * Returns <code>null</code> if validation of bounds is 1212 * successful. Returns a list of strings with error info 1213 * if not. Even entries in list returned are error codes 1214 * (used to look up properties) and odd entries are parameters 1215 * to be passed when formatting error message. Each parameter 1216 * is associated with the error code that preceeds it in 1217 * the list. 1218 */ checkMinMaxBounds()1219 public ArrayList checkMinMaxBounds() { 1220 ArrayList result = null; 1221 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 1222 int count = fElemMapCounter[elemIndex]; 1223 if (count == -1) { 1224 continue; 1225 } 1226 final int minOccurs = fElemMapCounterLowerBound[elemIndex]; 1227 final int maxOccurs = fElemMapCounterUpperBound[elemIndex]; 1228 if (count < minOccurs) { 1229 if (result == null) result = new ArrayList(); 1230 result.add("cvc-complex-type.2.4.b"); 1231 result.add("{" + fElemMap[elemIndex] + "}"); 1232 } 1233 if (maxOccurs != -1 && count > maxOccurs) { 1234 if (result == null) result = new ArrayList(); 1235 result.add("cvc-complex-type.2.4.e"); 1236 result.add("{" + fElemMap[elemIndex] + "}"); 1237 } 1238 } 1239 return result; 1240 } 1241 1242 } // class DFAContentModel 1243