1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id: DFAContentModel.cpp 901107 2010-01-20 08:45:02Z borisk $
20 */
21
22
23 // ---------------------------------------------------------------------------
24 // Includes
25 // ---------------------------------------------------------------------------
26 #include <xercesc/util/RuntimeException.hpp>
27 #include <xercesc/framework/XMLBuffer.hpp>
28 #include <xercesc/framework/XMLElementDecl.hpp>
29 #include <xercesc/framework/XMLValidator.hpp>
30 #include <xercesc/validators/common/CMAny.hpp>
31 #include <xercesc/validators/common/CMBinaryOp.hpp>
32 #include <xercesc/validators/common/CMLeaf.hpp>
33 #include <xercesc/validators/common/CMRepeatingLeaf.hpp>
34 #include <xercesc/validators/common/CMUnaryOp.hpp>
35 #include <xercesc/validators/common/DFAContentModel.hpp>
36 #include <xercesc/validators/common/ContentSpecNode.hpp>
37 #include <xercesc/validators/common/Grammar.hpp>
38 #include <xercesc/validators/schema/SchemaSymbols.hpp>
39 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
40 #include <xercesc/validators/schema/XercesElementWildcard.hpp>
41 #include <xercesc/util/RefHashTableOf.hpp>
42 #include <xercesc/util/XMLInteger.hpp>
43 #include <math.h>
44
45 XERCES_CPP_NAMESPACE_BEGIN
46
47 struct CMStateSetHasher
48 {
getHashValCMStateSetHasher49 XMLSize_t getHashVal(const void *const key, XMLSize_t mod)
50 {
51 const CMStateSet* const pkey = (const CMStateSet*) key;
52 return ((pkey->hashCode()) % mod);
53 }
54
equalsCMStateSetHasher55 bool equals(const void *const key1, const void *const key2)
56 {
57 const CMStateSet* const pkey1 = (const CMStateSet*) key1;
58 const CMStateSet* const pkey2 = (const CMStateSet*) key2;
59 return (*pkey1==*pkey2);
60 }
61 };
62
63 // ---------------------------------------------------------------------------
64 // DFAContentModel: Constructors and Destructor
65 // ---------------------------------------------------------------------------
DFAContentModel(const bool dtd,ContentSpecNode * const elemContentSpec,MemoryManager * const manager)66 DFAContentModel::DFAContentModel( const bool dtd
67 , ContentSpecNode* const elemContentSpec
68 , MemoryManager* const manager) :
69
70 fElemMap(0)
71 , fElemMapType(0)
72 , fElemMapSize(0)
73 , fEmptyOk(false)
74 , fEOCPos(0)
75 , fFinalStateFlags(0)
76 , fFollowList(0)
77 , fHeadNode(0)
78 , fLeafCount(0)
79 , fLeafList(0)
80 , fLeafListType(0)
81 , fTransTable(0)
82 , fTransTableSize(0)
83 , fCountingStates(0)
84 , fDTD(dtd)
85 , fIsMixed(false)
86 , fLeafNameTypeVector(0)
87 , fMemoryManager(manager)
88 {
89 // And build the DFA data structures
90 buildDFA(elemContentSpec);
91 }
92
DFAContentModel(const bool dtd,ContentSpecNode * const elemContentSpec,const bool isMixed,MemoryManager * const manager)93 DFAContentModel::DFAContentModel( const bool dtd
94 , ContentSpecNode* const elemContentSpec
95 , const bool isMixed
96 , MemoryManager* const manager):
97
98 fElemMap(0)
99 , fElemMapType(0)
100 , fElemMapSize(0)
101 , fEmptyOk(false)
102 , fEOCPos(0)
103 , fFinalStateFlags(0)
104 , fFollowList(0)
105 , fHeadNode(0)
106 , fLeafCount(0)
107 , fLeafList(0)
108 , fLeafListType(0)
109 , fTransTable(0)
110 , fTransTableSize(0)
111 , fCountingStates(0)
112 , fDTD(dtd)
113 , fIsMixed(isMixed)
114 , fLeafNameTypeVector(0)
115 , fMemoryManager(manager)
116 {
117 // And build the DFA data structures
118 buildDFA(elemContentSpec);
119 }
120
~DFAContentModel()121 DFAContentModel::~DFAContentModel()
122 {
123 //
124 // Clean up all the stuff that is not just temporary representation
125 // data that was cleaned up after building the DFA.
126 //
127 fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags;
128
129 unsigned int index;
130 for (index = 0; index < fTransTableSize; index++)
131 fMemoryManager->deallocate(fTransTable[index]); //delete [] fTransTable[index];
132 fMemoryManager->deallocate(fTransTable); //delete [] fTransTable;
133
134 if(fCountingStates)
135 {
136 for (unsigned int j = 0; j < fTransTableSize; ++j)
137 delete fCountingStates[j];
138 fMemoryManager->deallocate(fCountingStates);
139 }
140
141 for (index = 0; index < fLeafCount; index++)
142 delete fElemMap[index];
143 fMemoryManager->deallocate(fElemMap); //delete [] fElemMap;
144
145 fMemoryManager->deallocate(fElemMapType); //delete [] fElemMapType;
146 fMemoryManager->deallocate(fLeafListType); //delete [] fLeafListType;
147
148 delete fLeafNameTypeVector;
149 }
150
151
152 // ---------------------------------------------------------------------------
153 // DFAContentModel: Implementation of the ContentModel virtual interface
154 // ---------------------------------------------------------------------------
155 bool
validateContent(QName ** const children,XMLSize_t childCount,unsigned int,XMLSize_t * indexFailingChild,MemoryManager * const) const156 DFAContentModel::validateContent( QName** const children
157 , XMLSize_t childCount
158 , unsigned int
159 , XMLSize_t* indexFailingChild
160 , MemoryManager* const) const
161 {
162 //
163 // If there are no children, then either we fail on the 0th element
164 // or we return success. It depends upon whether this content model
165 // accepts empty content, which we determined earlier.
166 //
167 if (!childCount)
168 {
169 // success
170 if(fEmptyOk)
171 return true;
172 *indexFailingChild=0;
173 return false;
174 }
175
176 //
177 // Lets loop through the children in the array and move our way
178 // through the states. Note that we use the fElemMap array to map
179 // an element index to a state index.
180 //
181 unsigned int curState = 0;
182 unsigned int nextState = 0;
183 unsigned int loopCount = 0;
184 unsigned int childIndex = 0;
185 for (; childIndex < childCount; childIndex++)
186 {
187 // Get the current element index out
188 const QName* curElem = children[childIndex];
189 const XMLCh* curElemRawName = 0;
190 if (fDTD)
191 curElemRawName = curElem->getRawName();
192
193 // If this is text in a Schema mixed content model, skip it.
194 if ( fIsMixed &&
195 ( curElem->getURI() == XMLElementDecl::fgPCDataElemId))
196 continue;
197
198 // Look up this child in our element map
199 unsigned int elemIndex = 0;
200 for (; elemIndex < fElemMapSize; elemIndex++)
201 {
202 const QName* inElem = fElemMap[elemIndex];
203 if (fDTD) {
204 if (XMLString::equals(inElem->getRawName(), curElemRawName)) {
205 nextState = fTransTable[curState][elemIndex];
206 if (nextState != XMLContentModel::gInvalidTrans)
207 break;
208 }
209 }
210 else {
211 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
212 if (type == ContentSpecNode::Leaf)
213 {
214 if ((inElem->getURI() == curElem->getURI()) &&
215 (XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) {
216 nextState = fTransTable[curState][elemIndex];
217 if (nextState != XMLContentModel::gInvalidTrans)
218 break;
219 }
220 }
221 else if ((type & 0x0f)== ContentSpecNode::Any)
222 {
223 nextState = fTransTable[curState][elemIndex];
224 if (nextState != XMLContentModel::gInvalidTrans)
225 break;
226 }
227 else if ((type & 0x0f) == ContentSpecNode::Any_NS)
228 {
229 if (inElem->getURI() == curElem->getURI())
230 {
231 nextState = fTransTable[curState][elemIndex];
232 if (nextState != XMLContentModel::gInvalidTrans)
233 break;
234 }
235 }
236 else if ((type & 0x0f) == ContentSpecNode::Any_Other)
237 {
238 // Here we assume that empty string has id 1.
239 //
240 unsigned int uriId = curElem->getURI();
241 if (uriId != 1 && uriId != inElem->getURI()) {
242 nextState = fTransTable[curState][elemIndex];
243 if (nextState != XMLContentModel::gInvalidTrans)
244 break;
245 }
246 }
247 }
248 }//for elemIndex
249
250 // If "nextState" is -1, we found a match, but the transition is invalid
251 if (nextState == XMLContentModel::gInvalidTrans)
252 {
253 *indexFailingChild=childIndex;
254 return false;
255 }
256
257 // If we didn't find it, then obviously not valid
258 if (elemIndex == fElemMapSize)
259 {
260 *indexFailingChild=childIndex;
261 return false;
262 }
263
264 unsigned int nextLoop = 0;
265 if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, 0))
266 {
267 *indexFailingChild=childIndex;
268 return false;
269 }
270
271 curState = nextState;
272 loopCount = nextLoop;
273 nextState = 0;
274
275 }//for childIndex
276
277 //
278 // We transitioned all the way through the input list. However, that
279 // does not mean that we ended in a final state. So check whether
280 // our ending state is a final state.
281 //
282 if (!fFinalStateFlags[curState])
283 {
284 *indexFailingChild=childIndex;
285 return false;
286 }
287
288 // verify if we exited before the minOccurs was satisfied
289 if (fCountingStates != 0) {
290 Occurence* o = fCountingStates[curState];
291 if (o != 0 && loopCount < (unsigned int)o->minOccurs) {
292 // not enough loops on the current state to be considered final.
293 *indexFailingChild=childIndex;
294 return false;
295 }
296 }
297
298 //success
299 return true;
300 }
301
validateContentSpecial(QName ** const children,XMLSize_t childCount,unsigned int,GrammarResolver * const pGrammarResolver,XMLStringPool * const pStringPool,XMLSize_t * indexFailingChild,MemoryManager * const) const302 bool DFAContentModel::validateContentSpecial(QName** const children
303 , XMLSize_t childCount
304 , unsigned int
305 , GrammarResolver* const pGrammarResolver
306 , XMLStringPool* const pStringPool
307 , XMLSize_t* indexFailingChild
308 , MemoryManager* const) const
309 {
310
311 SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);
312
313 if (childCount == 0)
314 {
315 if(fEmptyOk)
316 return true;
317 *indexFailingChild=0;
318 return false;
319 }
320
321 //
322 // Lets loop through the children in the array and move our way
323 // through the states. Note that we use the fElemMap array to map
324 // an element index to a state index.
325 //
326 unsigned int curState = 0;
327 unsigned int loopCount = 0;
328 unsigned int nextState = 0;
329 unsigned int childIndex = 0;
330 for (; childIndex < childCount; childIndex++)
331 {
332 // Get the current element index out
333 QName* curElem = children[childIndex];
334
335 // If this is text in a Schema mixed content model, skip it.
336 if ( fIsMixed &&
337 ( curElem->getURI() == XMLElementDecl::fgPCDataElemId))
338 continue;
339
340 // Look up this child in our element map
341 unsigned int elemIndex = 0;
342 for (; elemIndex < fElemMapSize; elemIndex++)
343 {
344 QName* inElem = fElemMap[elemIndex];
345 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
346 if (type == ContentSpecNode::Leaf)
347 {
348 if (comparator.isEquivalentTo(curElem, inElem) )
349 {
350 nextState = fTransTable[curState][elemIndex];
351 if (nextState != XMLContentModel::gInvalidTrans)
352 break;
353 }
354
355 }
356 else if ((type & 0x0f)== ContentSpecNode::Any)
357 {
358 nextState = fTransTable[curState][elemIndex];
359 if (nextState != XMLContentModel::gInvalidTrans)
360 break;
361 }
362 else if ((type & 0x0f) == ContentSpecNode::Any_NS)
363 {
364 if (inElem->getURI() == curElem->getURI())
365 {
366 nextState = fTransTable[curState][elemIndex];
367 if (nextState != XMLContentModel::gInvalidTrans)
368 break;
369 }
370 }
371 else if ((type & 0x0f) == ContentSpecNode::Any_Other)
372 {
373 // Here we assume that empty string has id 1.
374 //
375 unsigned int uriId = curElem->getURI();
376 if (uriId != 1 && uriId != inElem->getURI())
377 {
378 nextState = fTransTable[curState][elemIndex];
379 if (nextState != XMLContentModel::gInvalidTrans)
380 break;
381 }
382 }
383 }//for elemIndex
384
385 // If "nextState" is -1, we found a match, but the transition is invalid
386 if (nextState == XMLContentModel::gInvalidTrans)
387 {
388 *indexFailingChild=childIndex;
389 return false;
390 }
391
392 // If we didn't find it, then obviously not valid
393 if (elemIndex == fElemMapSize)
394 {
395 *indexFailingChild=childIndex;
396 return false;
397 }
398
399 unsigned int nextLoop = 0;
400 if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, &comparator))
401 {
402 *indexFailingChild=childIndex;
403 return false;
404 }
405
406 curState = nextState;
407 loopCount = nextLoop;
408 nextState = 0;
409
410 }//for childIndex
411
412 //
413 // We transitioned all the way through the input list. However, that
414 // does not mean that we ended in a final state. So check whether
415 // our ending state is a final state.
416 //
417 if (!fFinalStateFlags[curState])
418 {
419 *indexFailingChild=childIndex;
420 return false;
421 }
422
423 // verify if we exited before the minOccurs was satisfied
424 if (fCountingStates != 0) {
425 Occurence* o = fCountingStates[curState];
426 if (o != 0) {
427 if (loopCount < (unsigned int)o->minOccurs) {
428 // not enough loops on the current state.
429 *indexFailingChild=childIndex;
430 return false;
431 }
432 }
433 }
434
435 //success
436 return true;
437 }
438
handleRepetitions(const QName * const curElem,unsigned int curState,unsigned int currentLoop,unsigned int & nextState,unsigned int & nextLoop,XMLSize_t elemIndex,SubstitutionGroupComparator * comparator) const439 bool DFAContentModel::handleRepetitions(const QName* const curElem,
440 unsigned int curState,
441 unsigned int currentLoop,
442 unsigned int& nextState,
443 unsigned int& nextLoop,
444 XMLSize_t elemIndex,
445 SubstitutionGroupComparator * comparator) const
446 {
447 nextLoop = 0;
448 if (fCountingStates != 0) {
449 nextLoop = currentLoop;
450 Occurence* o = fCountingStates[curState];
451 if (o != 0) {
452 if (curState == nextState) {
453 if (++nextLoop > (unsigned int)o->maxOccurs && o->maxOccurs != -1) {
454 // It's likely that we looped too many times on the current state
455 // however it's possible that we actually matched another particle
456 // which allows the same name.
457 //
458 // Consider:
459 //
460 // <xs:sequence>
461 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
462 // <xs:element name="foo" type="xs:string" fixed="bar"/>
463 // </xs:sequence>
464 //
465 // and
466 //
467 // <xs:sequence>
468 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
469 // <xs:any namespace="##any" processContents="skip"/>
470 // </xs:sequence>
471 //
472 // In the DFA there will be two transitions from the current state which
473 // allow "foo". Note that this is not a UPA violation. The ambiguity of which
474 // transition to take is resolved by the current value of the counter. Since
475 // we've already seen enough instances of the first "foo" perhaps there is
476 // another element declaration or wildcard deeper in the element map which
477 // matches.
478 unsigned int tempNextState = 0;
479
480 while (++elemIndex < fElemMapSize) {
481 QName* inElem = fElemMap[elemIndex];
482 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
483 if (type == ContentSpecNode::Leaf)
484 {
485 if(comparator!=0) {
486 if (comparator->isEquivalentTo(curElem, inElem) )
487 {
488 tempNextState = fTransTable[curState][elemIndex];
489 if (tempNextState != XMLContentModel::gInvalidTrans)
490 break;
491 }
492 }
493 else if (fDTD) {
494 if (XMLString::equals(inElem->getRawName(), curElem->getRawName())) {
495 tempNextState = fTransTable[curState][elemIndex];
496 if (tempNextState != XMLContentModel::gInvalidTrans)
497 break;
498 }
499 }
500 else {
501 if ((inElem->getURI() == curElem->getURI()) &&
502 (XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) {
503 tempNextState = fTransTable[curState][elemIndex];
504 if (tempNextState != XMLContentModel::gInvalidTrans)
505 break;
506 }
507 }
508 }
509 else if ((type & 0x0f)== ContentSpecNode::Any)
510 {
511 tempNextState = fTransTable[curState][elemIndex];
512 if (tempNextState != XMLContentModel::gInvalidTrans)
513 break;
514 }
515 else if ((type & 0x0f) == ContentSpecNode::Any_NS)
516 {
517 if (inElem->getURI() == curElem->getURI())
518 {
519 tempNextState = fTransTable[curState][elemIndex];
520 if (tempNextState != XMLContentModel::gInvalidTrans)
521 break;
522 }
523 }
524 else if ((type & 0x0f) == ContentSpecNode::Any_Other)
525 {
526 // Here we assume that empty string has id 1.
527 //
528 unsigned int uriId = curElem->getURI();
529 if (uriId != 1 && uriId != inElem->getURI())
530 {
531 tempNextState = fTransTable[curState][elemIndex];
532 if (tempNextState != XMLContentModel::gInvalidTrans)
533 break;
534 }
535 }
536 }
537
538 // if we still can't find a match, report the error
539 if (elemIndex == fElemMapSize)
540 return false;
541
542 // if we found a match, set the next state and reset the
543 // counter if the next state is a counting state.
544 nextState = tempNextState;
545 Occurence* o = fCountingStates[nextState];
546 if (o != 0) {
547 nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0;
548 }
549 }
550 }
551 else if (nextLoop < (unsigned int)o->minOccurs) {
552 // not enough loops on the current state.
553 return false;
554 }
555 else {
556 // Exiting a counting state. If we're entering a new
557 // counting state, reset the counter.
558 o = fCountingStates[nextState];
559 if (o != 0) {
560 nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0;
561 }
562 }
563 }
564 else {
565 o = fCountingStates[nextState];
566 if (o != 0) {
567 // Entering a new counting state. Reset the counter.
568 // If we've already seen one instance of the looping
569 // particle set the counter to 1, otherwise set it
570 // to 0.
571 nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0;
572 }
573 }
574 }
575 return true;
576 }
577
578 // ---------------------------------------------------------------------------
579 // DFAContentModel: Private helper methods
580 // ---------------------------------------------------------------------------
buildDFA(ContentSpecNode * const curNode)581 void DFAContentModel::buildDFA(ContentSpecNode* const curNode)
582 {
583 unsigned int index;
584
585 //
586 // The first step we need to take is to rewrite the content model using
587 // our CMNode objects, and in the process get rid of any repetition short
588 // cuts, converting them into '*' style repetitions or getting rid of
589 // repetitions altogether.
590 //
591 // The conversions done are:
592 //
593 // x+ -> (x|x*)
594 // x? -> (x|epsilon)
595 //
596 // This is a relatively complex scenario. What is happening is that we
597 // create a top level binary node of which the special EOC value is set
598 // as the right side node. The the left side is set to the rewritten
599 // syntax tree. The source is the original content model info from the
600 // decl pool. The rewrite is done by buildSyntaxTree() which recurses the
601 // decl pool's content of the element and builds a new tree in the
602 // process.
603 //
604 // Note that, during this operation, we set each non-epsilon leaf node's
605 // DFA state position and count the number of such leafs, which is left
606 // in the fLeafCount member.
607 //
608 fLeafCount=countLeafNodes(curNode);
609 fEOCPos = fLeafCount++;
610
611 // We need to build an array of references to the non-epsilon
612 // leaf nodes. We will put them in the array according to their position values
613 //
614 fLeafList = (CMLeaf**) fMemoryManager->allocate(fLeafCount*sizeof(CMLeaf*)); //new CMLeaf*[fLeafCount];
615 fLeafListType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
616 (
617 fLeafCount * sizeof(ContentSpecNode::NodeTypes)
618 ); //new ContentSpecNode::NodeTypes[fLeafCount];
619 //
620 // And, moving onward... We now need to build the follow position sets
621 // for all the nodes. So we allocate an array of pointers to state sets,
622 // one for each leaf node (i.e. each significant DFA position.)
623 //
624 fFollowList = (CMStateSet**) fMemoryManager->allocate
625 (
626 fLeafCount * sizeof(CMStateSet*)
627 ); //new CMStateSet*[fLeafCount];
628 for (index = 0; index < fLeafCount; index++)
629 fFollowList[index] = new (fMemoryManager) CMStateSet(fLeafCount, fMemoryManager);
630
631 // The buildSyntaxTree function will recursively iterate over the ContentSpecNode
632 // and build the CMNode hierarchy; it will also put every leaf node in the fLeafList
633 // array, then calculate the first and last position sets of each node. This is
634 // cached away in each of the nodes.
635 //
636 // Along the way we also set the leaf count in each node as the maximum
637 // state count. They must know this in order to create their first/last
638 // position sets.
639 //
640 unsigned int counter=0;
641 CMNode* nodeOrgContent = buildSyntaxTree(curNode, counter);
642 //
643 // Check to see whether this content model can handle an empty content,
644 // which is something we need to optimize by looking now before we
645 // throw away the info that would tell us that.
646 //
647 // If the left node of the head (the top level of the original content)
648 // is nullable, then its true.
649 //
650 fEmptyOk = nodeOrgContent->isNullable();
651
652 //
653 // And handle specially the EOC node, which also must be numbered and
654 // counted as a non-epsilon leaf node. It could not be handled in the
655 // above tree build because it was created before all that started. We
656 // save the EOC position since its used during the DFA building loop.
657 //
658 CMLeaf* nodeEOC = new (fMemoryManager) CMLeaf
659 (
660 new (fMemoryManager) QName
661 (
662 XMLUni::fgZeroLenString
663 , XMLUni::fgZeroLenString
664 , XMLContentModel::gEOCFakeId
665 , fMemoryManager
666 )
667 , fEOCPos
668 , true
669 , fLeafCount
670 , fMemoryManager
671 );
672 fHeadNode = new (fMemoryManager) CMBinaryOp
673 (
674 ContentSpecNode::Sequence
675 , nodeOrgContent
676 , nodeEOC
677 , fLeafCount
678 , fMemoryManager
679 );
680
681 // Put also the final EOC node in the leaf array
682 fLeafList[counter] = new (fMemoryManager) CMLeaf
683 (
684 nodeEOC->getElement()
685 , nodeEOC->getPosition()
686 , fLeafCount
687 , fMemoryManager
688 );
689 fLeafListType[counter] = ContentSpecNode::Leaf;
690
691 //
692 // Now handle our top level. We use our left child's last pos set and our
693 // right child's first pos set, so get them now for convenience.
694 //
695 const CMStateSet& last = nodeOrgContent->getLastPos();
696 const CMStateSet& first = nodeEOC->getFirstPos();
697
698 //
699 // Now, for every position which is in our left child's last set
700 // add all of the states in our right child's first set to the
701 // follow set for that position.
702 //
703 CMStateSetEnumerator enumLast(&last);
704 while(enumLast.hasMoreElements())
705 {
706 XMLSize_t index=enumLast.nextElement();
707 *fFollowList[index] |= first;
708 }
709
710 //
711 // And finally the big push... Now we build the DFA using all the states
712 // and the tree we've built up. First we set up the various data
713 // structures we are going to use while we do this.
714 //
715 // First of all we need an array of unique element ids in our content
716 // model. For each transition table entry, we need a set of contiguous
717 // indices to represent the transitions for a particular input element.
718 // So we need to a zero based range of indexes that map to element types.
719 // This element map provides that mapping.
720 //
721 fElemMap = (QName**) fMemoryManager->allocate
722 (
723 fLeafCount * sizeof(QName*)
724 ); //new QName*[fLeafCount];
725 fElemMapType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
726 (
727 fLeafCount * sizeof(ContentSpecNode::NodeTypes)
728 ); //new ContentSpecNode::NodeTypes[fLeafCount];
729 fElemMapSize = 0;
730
731 Occurence** elemOccurenceMap=0;
732 for (unsigned int outIndex = 0; outIndex < fLeafCount; outIndex++)
733 {
734 fElemMap[outIndex] = new (fMemoryManager) QName(fMemoryManager);
735
736 if ( (fLeafListType[outIndex] & 0x0f) != ContentSpecNode::Leaf )
737 if (!fLeafNameTypeVector)
738 fLeafNameTypeVector = new (fMemoryManager) ContentLeafNameTypeVector(fMemoryManager);
739
740 // Get the current leaf's element index
741 CMLeaf* leaf=fLeafList[outIndex];
742 const QName* element = leaf->getElement();
743 const XMLCh* elementRawName = 0;
744 if (fDTD && element)
745 elementRawName = element->getRawName();
746
747 // See if the current leaf node's element index is in the list
748 unsigned int inIndex = 0;
749
750 for (; inIndex < fElemMapSize; inIndex++)
751 {
752 const QName* inElem = fElemMap[inIndex];
753 if (fDTD) {
754 if (XMLString::equals(inElem->getRawName(), elementRawName)) {
755 break;
756 }
757 }
758 else {
759 if ((fElemMapType[inIndex] == fLeafListType[outIndex]) &&
760 (inElem->getURI() == element->getURI()) &&
761 (XMLString::equals(inElem->getLocalPart(), element->getLocalPart()))) {
762 break;
763 }
764 }
765 }
766
767 // If it was not in the list, then add it and bump the map size
768 if (inIndex == fElemMapSize)
769 {
770 fElemMap[fElemMapSize]->setValues(*element);
771 if(leaf->isRepeatableLeaf())
772 {
773 if (elemOccurenceMap == 0) {
774 elemOccurenceMap = (Occurence**)fMemoryManager->allocate(fLeafCount*sizeof(Occurence*));
775 memset(elemOccurenceMap, 0, fLeafCount*sizeof(Occurence*));
776 }
777 elemOccurenceMap[fElemMapSize] = new (fMemoryManager) Occurence(((CMRepeatingLeaf*)leaf)->getMinOccurs(), ((CMRepeatingLeaf*)leaf)->getMaxOccurs(), fElemMapSize);
778 }
779 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
780 ++fElemMapSize;
781 }
782 }
783
784 // set up the fLeafNameTypeVector object if there is one.
785 if (fLeafNameTypeVector) {
786 fLeafNameTypeVector->setValues(fElemMap, fElemMapType, fElemMapSize);
787 }
788
789 /***
790 * Optimization(Jan, 2001); We sort fLeafList according to
791 * elemIndex which is *uniquely* associated to each leaf.
792 * We are *assuming* that each element appears in at least one leaf.
793 **/
794 // don't forget to delete it
795 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
796 int *fLeafSorter = (int*) fMemoryManager->allocate
797 (
798 (fLeafCount + fElemMapSize) * sizeof(int)
799 ); //new int[fLeafCount + fElemMapSize];
800 unsigned int fSortCount = 0;
801
802 for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
803 {
804 const QName* element = fElemMap[elemIndex];
805 const XMLCh* elementRawName = 0;
806 if (fDTD && element)
807 elementRawName = element->getRawName();
808
809 for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
810 {
811 const QName* leaf = fLeafList[leafIndex]->getElement();
812 if (fDTD) {
813 if (XMLString::equals(leaf->getRawName(), elementRawName)) {
814 fLeafSorter[fSortCount++] = leafIndex;
815 }
816 }
817 else {
818 if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) &&
819 (leaf->getURI() == element->getURI()) &&
820 (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
821 fLeafSorter[fSortCount++] = leafIndex;
822 }
823 }
824 }
825 fLeafSorter[fSortCount++] = -1;
826 }
827 #endif
828
829 // instead of using a single array with -1 to separate elements, use a bidimensional map
830 unsigned int** fLeafSorter = (unsigned int**)fMemoryManager->allocate(fElemMapSize * sizeof(unsigned int*));
831 unsigned int* tmpSorter = (unsigned int*)fMemoryManager->allocate(fLeafCount * sizeof(unsigned int));
832 for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
833 {
834 const QName* element = fElemMap[elemIndex];
835 const XMLCh* elementRawName = 0;
836 if (fDTD && element)
837 elementRawName = element->getRawName();
838
839 unsigned int fSortCount=0;
840 for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
841 {
842 const QName* leaf = fLeafList[leafIndex]->getElement();
843 if (fDTD) {
844 if (XMLString::equals(leaf->getRawName(), elementRawName)) {
845 tmpSorter[fSortCount++] = leafIndex;
846 }
847 }
848 else {
849 if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) &&
850 (leaf->getURI() == element->getURI()) &&
851 (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
852 tmpSorter[fSortCount++] = leafIndex;
853 }
854 }
855 }
856
857 fLeafSorter[elemIndex]=(unsigned int*)fMemoryManager->allocate((fSortCount+1) * sizeof(unsigned int));
858 fLeafSorter[elemIndex][0]=fSortCount;
859 for (unsigned int index=0;index<fSortCount;index++)
860 fLeafSorter[elemIndex][index+1]=tmpSorter[index];
861 }
862 fMemoryManager->deallocate(tmpSorter);
863
864 //
865 // Next lets create some arrays, some that that hold transient info
866 // during the DFA build and some that are permament. These are kind of
867 // sticky since we cannot know how big they will get, but we don't want
868 // to use any collection type classes because of performance.
869 //
870 // Basically they will probably be about fLeafCount*2 on average, but can
871 // be as large as 2^(fLeafCount*2), worst case. So we start with
872 // fLeafCount*4 as a middle ground. This will be very unlikely to ever
873 // have to expand though, it if does, the overhead will be somewhat ugly.
874 //
875 unsigned int curArraySize = fLeafCount * 4;
876 CMStateSet** statesToDo = (CMStateSet**)
877 fMemoryManager->allocate
878 (
879 curArraySize * sizeof(CMStateSet*)
880 ); //new const CMStateSet*[curArraySize];
881 fFinalStateFlags = (bool*) fMemoryManager->allocate
882 (
883 curArraySize * sizeof(bool)
884 ); //new bool[curArraySize];
885 fTransTable = (unsigned int**) fMemoryManager->allocate
886 (
887 curArraySize * sizeof(unsigned int*)
888 ); //new unsigned int*[curArraySize];
889
890 //
891 // Ok we start with the initial set as the first pos set of the head node
892 // (which is the seq node that holds the content model and the EOC node.)
893 //
894 CMStateSet* setT = new (fMemoryManager) CMStateSet(fHeadNode->getFirstPos());
895
896 //
897 // Note on memory leak: Bugzilla#2707:
898 // ===================================
899 // The CMBinary, pointed to by fHeadNode, shall be released by
900 // deleted by itself.
901 //
902 // fLeafList[] maintains its **OWN** copy of CMLeaf to avoid double deletion
903 // of CMLeaf.
904 //
905
906 delete fHeadNode;
907
908 //
909 // Init our two state flags. Basically the unmarked state counter is
910 // always chasing the current state counter. When it catches up, that
911 // means we made a pass through that did not add any new states to the
912 // lists, at which time we are done. We could have used a expanding array
913 // of flags which we used to mark off states as we complete them, but
914 // this is easier though less readable maybe.
915 //
916 unsigned int unmarkedState = 0;
917 unsigned int curState = 0;
918
919 //
920 // Init the first transition table entry, and put the initial state
921 // into the states to do list, then bump the current state.
922 //
923 fTransTable[curState] = makeDefStateList();
924 statesToDo[curState] = setT;
925 curState++;
926
927 //
928 // the stateTable is an auxiliary means to fast
929 // identification of new state created (instead
930 // of sequential loop statesToDo to find out),
931 // while the role that statesToDo plays remain unchanged.
932 //
933 RefHashTableOf<XMLInteger, CMStateSetHasher> *stateTable =
934 new (fMemoryManager) RefHashTableOf<XMLInteger, CMStateSetHasher>
935 (
936 curArraySize
937 , true
938 , fMemoryManager
939 );
940 //stateTable->put((CMStateSet*)setT, new (fMemoryManager) XMLInteger(0));
941
942 //
943 // Ok, almost done with the algorithm from hell... We now enter the
944 // loop where we go until the states done counter catches up with
945 // the states to do counter.
946 //
947 CMStateSet* newSet = 0;
948 while (unmarkedState < curState)
949 {
950 //
951 // Get the next unmarked state out of the list of states to do.
952 // And get the associated transition table entry.
953 //
954 setT = statesToDo[unmarkedState];
955 unsigned int* transEntry = fTransTable[unmarkedState];
956
957 // Mark this one final if it contains the EOC state
958 fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos);
959
960 // Bump up the unmarked state count, marking this state done
961 unmarkedState++;
962
963 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
964 // Optimization(Jan, 2001)
965 unsigned int sorterIndex = 0;
966 // Optimization(Jan, 2001)
967 #endif
968
969 // Loop through each possible input symbol in the element map
970 for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
971 {
972 //
973 // Build up a set of states which is the union of all of the
974 // follow sets of DFA positions that are in the current state. If
975 // we gave away the new set last time through then create a new
976 // one. Otherwise, zero out the existing one.
977 //
978 if (!newSet)
979 newSet = new (fMemoryManager) CMStateSet
980 (
981 fLeafCount
982 , fMemoryManager
983 );
984 else
985 newSet->zeroBits();
986
987 #ifdef OBSOLETED
988 // unoptimized code
989 for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
990 {
991 // If this leaf index (DFA position) is in the current set...
992 if (setT->getBit(leafIndex))
993 {
994 //
995 // If this leaf is the current input symbol, then we want
996 // to add its follow list to the set of states to transition
997 // to from the current state.
998 //
999 const QName* leaf = fLeafList[leafIndex]->getElement();
1000 const QName* element = fElemMap[elemIndex];
1001 if (fDTD) {
1002 if (XMLString::equals(leaf->getRawName(), element->getRawName())) {
1003 *newSet |= *fFollowList[leafIndex];
1004 }
1005 }
1006 else {
1007 if ((leaf->getURI() == element->getURI()) &&
1008 (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
1009 *newSet |= *fFollowList[leafIndex];
1010 }
1011 }
1012 }
1013 } // for leafIndex
1014 #endif
1015
1016 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
1017 // Optimization(Jan, 2001)
1018 int leafIndex = fLeafSorter[sorterIndex++];
1019
1020 while (leafIndex != -1)
1021 {
1022 // If this leaf index (DFA position) is in the current set...
1023 if (setT->getBit(leafIndex))
1024 {
1025 //
1026 // If this leaf is the current input symbol, then we
1027 // want to add its follow list to the set of states to
1028 // transition to from the current state.
1029 //
1030 *newSet |= *fFollowList[leafIndex];
1031 }
1032 leafIndex = fLeafSorter[sorterIndex++];
1033 } // while (leafIndex != -1)
1034 #endif
1035
1036 unsigned int* fLeafIndexes=fLeafSorter[elemIndex];
1037 unsigned int fNumItems=fLeafIndexes[0];
1038 if(fNumItems!=0)
1039 {
1040 // The algorithm requires finding the leaf that is present both in the bitfield of the current state, and in the
1041 // list of places where the currently tested item can appear. When this occurs, the follow list of this parent item
1042 // is added to the bitfield representing the next state.
1043 // Both the bitfield and the list of places are sorted, so we can analyze them in two ways; either iterating over the
1044 // parent items, testing the bitfield for the existence of the parent (N times a constant Tb), or by iterating over the
1045 // bitfield (restricted to the range of the sorted list of places), using a binary search to locate the leaf in the
1046 // sorted list of places (M times log(N) testing operations Ts)
1047 // Assuming that the time to test a bit is roughly the same of the time needed to compute the average of two integers,
1048 // plus a couple of comparisons and additions, we compare N agains M*log(N) to decide which algorithm should be faster given
1049 // the two sets
1050 if(fNumItems <= setT->getBitCountInRange(fLeafIndexes[1], fLeafIndexes[fNumItems])*log((float)fNumItems))
1051 {
1052 for(unsigned int i=1; i<=fNumItems; ++i)
1053 if(setT->getBit(fLeafIndexes[i]))
1054 {
1055 //
1056 // If this leaf is the current input symbol, then we
1057 // want to add its follow list to the set of states to
1058 // transition to from the current state.
1059 //
1060 *newSet |= *fFollowList[ fLeafIndexes[i] ];
1061 }
1062 }
1063 else
1064 {
1065 // Further optimization: given that the bitfield enumerator returns the numbers in order,
1066 // every time we raise the lower marker we know it will true also for the next bits, so
1067 // the next binary search will not start from 1 but from this index
1068 unsigned int lowIndex = 1;
1069 // Start the enumerator from the first index in the sorted list of places,
1070 // as nothing before that point will match
1071 CMStateSetEnumerator enumBits(setT, fLeafIndexes[1]);
1072 while(enumBits.hasMoreElements())
1073 {
1074 unsigned int bitIndex=enumBits.nextElement();
1075 // if this leaf is greater than the last index in the sorted list of places,
1076 // nothing can be found from now on, so get out of here
1077 if(bitIndex > fLeafIndexes[fNumItems])
1078 break;
1079
1080 // Check if this leaf index (DFA position) is in the current set
1081 // (using binary search: the indexes are sorted)
1082 unsigned int first=lowIndex,last=fNumItems,i;
1083 while(first<=last)
1084 {
1085 i=(first+last)/2;
1086 if(fLeafIndexes[i]>bitIndex)
1087 last=i-1;
1088 else if(fLeafIndexes[i]<bitIndex)
1089 lowIndex=first=i+1;
1090 else
1091 {
1092 //
1093 // If this leaf is the current input symbol, then we
1094 // want to add its follow list to the set of states to
1095 // transition to from the current state.
1096 //
1097 *newSet |= *fFollowList[bitIndex];
1098 break;
1099 }
1100 }
1101 }
1102 }
1103 }
1104
1105 //
1106 // If this new set is not empty, then see if its in the list
1107 // of states to do. If not, then add it.
1108 //
1109 if (!newSet->isEmpty())
1110 {
1111 //
1112 // Search the 'states to do' list to see if this new
1113 // state set is already in there.
1114 //
1115 /***
1116 unsigned int stateIndex = 0;
1117 for (; stateIndex < curState; stateIndex++)
1118 {
1119 if (*statesToDo[stateIndex] == *newSet)
1120 break;
1121 }
1122 ***/
1123
1124 XMLInteger *stateObj = stateTable->get(newSet);
1125 unsigned int stateIndex = (stateObj == 0 ? curState : stateObj->intValue());
1126
1127 // If we did not find it, then add it
1128 if (stateIndex == curState)
1129 {
1130 //
1131 // Put this new state into the states to do and init
1132 // a new entry at the same index in the transition
1133 // table.
1134 //
1135 statesToDo[curState] = newSet;
1136 fTransTable[curState] = makeDefStateList();
1137 stateTable->put
1138 (
1139 newSet
1140 , new (fMemoryManager) XMLInteger(curState)
1141 );
1142
1143 // We now have a new state to do so bump the count
1144 curState++;
1145
1146 //
1147 // Null out the new set to indicate we adopted it. This
1148 // will cause the creation of a new set on the next time
1149 // around the loop.
1150 //
1151 newSet = 0;
1152 }
1153
1154 //
1155 // Now set this state in the transition table's entry for this
1156 // element (using its index), with the DFA state we will move
1157 // to from the current state when we see this input element.
1158 //
1159 transEntry[elemIndex] = stateIndex;
1160
1161 // Expand the arrays if we're full
1162 if (curState == curArraySize)
1163 {
1164 //
1165 // Yikes, we overflowed the initial array size, so we've
1166 // got to expand all of these arrays. So adjust up the
1167 // size by 50% and allocate new arrays.
1168 //
1169 const unsigned int newSize = (unsigned int)(curArraySize * 1.5);
1170 CMStateSet** newToDo = (CMStateSet**)
1171 fMemoryManager->allocate
1172 (
1173 newSize * sizeof(CMStateSet*)
1174 ); //new const CMStateSet*[newSize];
1175 bool* newFinalFlags = (bool*) fMemoryManager->allocate
1176 (
1177 newSize * sizeof(bool)
1178 ); //new bool[newSize];
1179 unsigned int** newTransTable = (unsigned int**)
1180 fMemoryManager->allocate
1181 (
1182 newSize * sizeof(unsigned int*)
1183 ); //new unsigned int*[newSize];
1184
1185 // Copy over all of the existing content
1186 for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++)
1187 {
1188 newToDo[expIndex] = statesToDo[expIndex];
1189 newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
1190 newTransTable[expIndex] = fTransTable[expIndex];
1191 }
1192
1193 // Clean up the old stuff
1194 fMemoryManager->deallocate(statesToDo); //delete [] statesToDo;
1195 fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags;
1196 fMemoryManager->deallocate(fTransTable); //delete [] fTransTable;
1197
1198 // Store the new array size and pointers
1199 curArraySize = newSize;
1200 statesToDo = newToDo;
1201 fFinalStateFlags = newFinalFlags;
1202 fTransTable = newTransTable;
1203 } //if (curState == curArraySize)
1204 } //if (!newSet->isEmpty())
1205 } // for elemIndex
1206 } //while
1207
1208 // Store the current state count in the trans table size
1209 fTransTableSize = curState;
1210
1211 //
1212 // Fill in the occurence information for each looping state
1213 // if we're using counters.
1214 //
1215 if (elemOccurenceMap != 0) {
1216 fCountingStates = (Occurence**)fMemoryManager->allocate(fTransTableSize*sizeof(Occurence));
1217 memset(fCountingStates, 0, fTransTableSize*sizeof(Occurence*));
1218 for (unsigned int i = 0; i < fTransTableSize; ++i) {
1219 unsigned int * transitions = fTransTable[i];
1220 for (unsigned int j = 0; j < fElemMapSize; ++j) {
1221 if (i == transitions[j]) {
1222 Occurence* old=elemOccurenceMap[j];
1223 if(old!=0)
1224 fCountingStates[i] = new (fMemoryManager) Occurence(old->minOccurs, old->maxOccurs, old->elemIndex);
1225 break;
1226 }
1227 }
1228 }
1229 for (unsigned int j = 0; j < fLeafCount; ++j) {
1230 if(elemOccurenceMap[j]!=0)
1231 delete elemOccurenceMap[j];
1232 }
1233 fMemoryManager->deallocate(elemOccurenceMap);
1234 }
1235
1236 // If the last temp set was not stored, then clean it up
1237 if (newSet)
1238 delete newSet;
1239
1240 //
1241 // Now we can clean up all of the temporary data that was needed during
1242 // DFA build.
1243 //
1244
1245 for (index = 0; index < fLeafCount; index++)
1246 delete fFollowList[index];
1247 fMemoryManager->deallocate(fFollowList); //delete [] fFollowList;
1248
1249 //
1250 // removeAll() will delete all data, XMLInteger,
1251 // while the keys are to be deleted by the
1252 // deletion of statesToDo.
1253 //
1254 delete stateTable;
1255
1256 for (index = 0; index < curState; index++)
1257 delete statesToDo[index];
1258 fMemoryManager->deallocate(statesToDo); //delete [] statesToDo;
1259
1260 for (index = 0; index < fLeafCount; index++)
1261 delete fLeafList[index];
1262 fMemoryManager->deallocate(fLeafList); //delete [] fLeafList;
1263
1264 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
1265 fMemoryManager->deallocate(fLeafSorter); //delete [] fLeafSorter;
1266 #endif
1267 for (index=0; index < fElemMapSize; index++)
1268 fMemoryManager->deallocate(fLeafSorter[index]);
1269 fMemoryManager->deallocate(fLeafSorter);
1270 }
1271
countLeafNodes(ContentSpecNode * const curNode)1272 unsigned int DFAContentModel::countLeafNodes(ContentSpecNode* const curNode)
1273 {
1274 unsigned int count = 0;
1275
1276 // Get the spec type of the passed node
1277 const ContentSpecNode::NodeTypes curType = curNode->getType();
1278
1279 if ((curType & 0x0f) == ContentSpecNode::Any
1280 || (curType & 0x0f) == ContentSpecNode::Any_Other
1281 || (curType & 0x0f) == ContentSpecNode::Any_NS
1282 || curType == ContentSpecNode::Leaf
1283 || curType == ContentSpecNode::Loop)
1284 {
1285 count++;
1286 }
1287 else
1288 {
1289 //
1290 // Its not a leaf, so we have to recurse its left and maybe right
1291 // nodes. Save both values before we recurse and trash the node.
1292 //
1293 ContentSpecNode* leftNode = curNode->getFirst();
1294 ContentSpecNode* rightNode = curNode->getSecond();
1295
1296 // Detect if we have a deep tree that can be analyzed using a loop instead of recursion
1297 unsigned int nLoopCount=0;
1298 ContentSpecNode* cursor=curNode;
1299 while(cursor->getType()==ContentSpecNode::Sequence && cursor->getFirst() && cursor->getFirst()->getSecond()==rightNode)
1300 {
1301 nLoopCount++;
1302 cursor=cursor->getFirst();
1303 }
1304 if(nLoopCount!=0)
1305 {
1306 count += countLeafNodes(cursor);
1307 for(unsigned int i=0;i<nLoopCount;i++)
1308 count += countLeafNodes(rightNode);
1309 return count;
1310 }
1311 if(leftNode)
1312 count+=countLeafNodes(leftNode);
1313 if(rightNode)
1314 count+=countLeafNodes(rightNode);
1315 }
1316 return count;
1317 }
1318
buildSyntaxTree(ContentSpecNode * const curNode,unsigned int & curIndex)1319 CMNode* DFAContentModel::buildSyntaxTree(ContentSpecNode* const curNode
1320 , unsigned int& curIndex)
1321 {
1322 // Initialize a return node pointer
1323 CMNode* retNode = 0;
1324
1325 // Get the spec type of the passed node
1326 const ContentSpecNode::NodeTypes curType = curNode->getType();
1327
1328 if ((curType & 0x0f) == ContentSpecNode::Any
1329 || (curType & 0x0f) == ContentSpecNode::Any_Other
1330 || (curType & 0x0f) == ContentSpecNode::Any_NS)
1331 {
1332 retNode = new (fMemoryManager) CMAny
1333 (
1334 curType
1335 , curNode->getElement()->getURI()
1336 , curIndex
1337 , fLeafCount
1338 , fMemoryManager
1339 );
1340 fLeafList[curIndex] = new (fMemoryManager) CMLeaf
1341 (
1342 new (fMemoryManager) QName
1343 (
1344 XMLUni::fgZeroLenString
1345 , XMLUni::fgZeroLenString
1346 , curNode->getElement()->getURI()
1347 , fMemoryManager
1348 )
1349 , curIndex
1350 , true
1351 , fLeafCount
1352 , fMemoryManager
1353 );
1354 fLeafListType[curIndex] = curType;
1355 ++curIndex;
1356 }
1357 else if (curType == ContentSpecNode::Leaf)
1358 {
1359 //
1360 // Create a new leaf node, and pass it the current leaf count, which
1361 // is its DFA state position. Bump the leaf count after storing it.
1362 // This makes the positions zero based since we store first and then
1363 // increment.
1364 //
1365 retNode = new (fMemoryManager) CMLeaf
1366 (
1367 curNode->getElement()
1368 , curIndex
1369 , fLeafCount
1370 , fMemoryManager
1371 );
1372 fLeafList[curIndex] = new (fMemoryManager) CMLeaf
1373 (
1374 curNode->getElement()
1375 , curIndex
1376 , fLeafCount
1377 , fMemoryManager
1378 );
1379 fLeafListType[curIndex] = ContentSpecNode::Leaf;
1380 ++curIndex;
1381 }
1382 else if (curType == ContentSpecNode::Loop)
1383 {
1384 //
1385 // Create a new leaf node, and pass it the current leaf count, which
1386 // is its DFA state position. Bump the leaf count after storing it.
1387 // This makes the positions zero based since we store first and then
1388 // increment.
1389 //
1390 retNode = new (fMemoryManager) CMRepeatingLeaf
1391 (
1392 curNode->getFirst()->getElement()
1393 , curNode->getMinOccurs()
1394 , curNode->getMaxOccurs()
1395 , curIndex
1396 , fLeafCount
1397 , fMemoryManager
1398 );
1399 fLeafList[curIndex] = new (fMemoryManager) CMRepeatingLeaf
1400 (
1401 curNode->getFirst()->getElement()
1402 , curNode->getMinOccurs()
1403 , curNode->getMaxOccurs()
1404 , curIndex
1405 , fLeafCount
1406 , fMemoryManager
1407 );
1408 fLeafListType[curIndex] = curNode->getFirst()->getType();
1409 ++curIndex;
1410 }
1411 else
1412 {
1413 //
1414 // Its not a leaf, so we have to recurse its left and maybe right
1415 // nodes. Save both values before we recurse and trash the node.
1416 //
1417 ContentSpecNode* leftNode = curNode->getFirst();
1418 ContentSpecNode* rightNode = curNode->getSecond();
1419
1420 // Detect if we have a deep tree that can be analyzed using a loop instead of recursion
1421 unsigned int nLoopCount=0;
1422 ContentSpecNode* cursor=curNode;
1423 while(cursor->getType()==ContentSpecNode::Sequence && cursor->getFirst() && cursor->getFirst()->getSecond()==rightNode)
1424 {
1425 nLoopCount++;
1426 cursor=cursor->getFirst();
1427 }
1428 if(nLoopCount!=0)
1429 {
1430 retNode = buildSyntaxTree(cursor, curIndex);
1431 for(unsigned int i=0;i<nLoopCount;i++)
1432 {
1433 CMNode* newRight = buildSyntaxTree(rightNode, curIndex);
1434 //
1435 // Now handle our level. We use our left child's last pos set and our
1436 // right child's first pos set, so get them now for convenience.
1437 //
1438 const CMStateSet& last = retNode->getLastPos();
1439 const CMStateSet& first = newRight->getFirstPos();
1440
1441 //
1442 // Now, for every position which is in our left child's last set
1443 // add all of the states in our right child's first set to the
1444 // follow set for that position.
1445 //
1446 CMStateSetEnumerator enumLast(&last);
1447 while(enumLast.hasMoreElements())
1448 {
1449 XMLSize_t index=enumLast.nextElement();
1450 *fFollowList[index] |= first;
1451 }
1452 retNode = new (fMemoryManager) CMBinaryOp
1453 (
1454 ContentSpecNode::Sequence
1455 , retNode
1456 , newRight
1457 , fLeafCount
1458 , fMemoryManager
1459 );
1460 }
1461 return retNode;
1462 }
1463
1464 if (((curType & 0x0f) == ContentSpecNode::Choice)
1465 || ((curType & 0x0f) == ContentSpecNode::Sequence))
1466 {
1467 //
1468 // Recurse on both children, and return a binary op node with the
1469 // two created sub nodes as its children. The node type is the
1470 // same type as the source.
1471 //
1472 CMNode* newLeft = buildSyntaxTree(leftNode, curIndex);
1473 CMNode* newRight = buildSyntaxTree(rightNode, curIndex);
1474 if(((curType & 0x0f) == ContentSpecNode::Sequence))
1475 {
1476 //
1477 // Now handle our level. We use our left child's last pos set and our
1478 // right child's first pos set, so get them now for convenience.
1479 //
1480 const CMStateSet& last = newLeft->getLastPos();
1481 const CMStateSet& first = newRight->getFirstPos();
1482
1483 //
1484 // Now, for every position which is in our left child's last set
1485 // add all of the states in our right child's first set to the
1486 // follow set for that position.
1487 //
1488 CMStateSetEnumerator enumLast(&last);
1489 while(enumLast.hasMoreElements())
1490 {
1491 XMLSize_t index=enumLast.nextElement();
1492 *fFollowList[index] |= first;
1493 }
1494 }
1495 retNode = new (fMemoryManager) CMBinaryOp
1496 (
1497 curType
1498 , newLeft
1499 , newRight
1500 , fLeafCount
1501 , fMemoryManager
1502 );
1503 }
1504 else if (curType == ContentSpecNode::ZeroOrMore
1505 || curType == ContentSpecNode::ZeroOrOne
1506 || curType == ContentSpecNode::OneOrMore)
1507 {
1508 CMNode* newChild = buildSyntaxTree(leftNode, curIndex);
1509 if (curType == ContentSpecNode::ZeroOrMore
1510 || curType == ContentSpecNode::OneOrMore)
1511 {
1512 //
1513 // Now handle our level. We use our own first and last position
1514 // sets, so get them up front.
1515 //
1516 const CMStateSet& first = newChild->getFirstPos();
1517 const CMStateSet& last = newChild->getLastPos();
1518
1519 //
1520 // For every position which is in our last position set, add all
1521 // of our first position states to the follow set for that
1522 // position.
1523 //
1524 CMStateSetEnumerator enumLast(&last);
1525 while(enumLast.hasMoreElements())
1526 {
1527 XMLSize_t index=enumLast.nextElement();
1528 *fFollowList[index] |= first;
1529 }
1530 }
1531 // This one is fine as is, just change to our form
1532 retNode = new (fMemoryManager) CMUnaryOp
1533 (
1534 curType
1535 , newChild
1536 , fLeafCount
1537 , fMemoryManager
1538 );
1539 }
1540 else
1541 {
1542 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMSpecType, fMemoryManager);
1543 }
1544 }
1545 // fault in the first and last pos, then delete it children
1546 retNode->getFirstPos();
1547 retNode->getLastPos();
1548 retNode->orphanChild();
1549 return retNode;
1550 }
1551
1552 //
1553 // gInvalidTrans is used to represent bad transitions in the transition table
1554 // entry for each state. So each entry is initialized to that value. This
1555 // method creates a new entry and initializes it.
1556 //
makeDefStateList() const1557 unsigned int* DFAContentModel::makeDefStateList() const
1558 {
1559 unsigned int* retArray = (unsigned int*) fMemoryManager->allocate
1560 (
1561 fElemMapSize * sizeof(unsigned int)
1562 ); //new unsigned int[fElemMapSize];
1563 for (unsigned int index = 0; index < fElemMapSize; index++)
1564 retArray[index] = XMLContentModel::gInvalidTrans;
1565 return retArray;
1566 }
1567
getContentLeafNameTypeVector() const1568 ContentLeafNameTypeVector* DFAContentModel::getContentLeafNameTypeVector() const
1569 {
1570 //later change it to return the data member
1571 return fLeafNameTypeVector;
1572 }
1573
checkUniqueParticleAttribution(SchemaGrammar * const pGrammar,GrammarResolver * const pGrammarResolver,XMLStringPool * const pStringPool,XMLValidator * const pValidator,unsigned int * const pContentSpecOrgURI,const XMLCh * pComplexTypeName)1574 void DFAContentModel::checkUniqueParticleAttribution (SchemaGrammar* const pGrammar,
1575 GrammarResolver* const pGrammarResolver,
1576 XMLStringPool* const pStringPool,
1577 XMLValidator* const pValidator,
1578 unsigned int* const pContentSpecOrgURI,
1579 const XMLCh* pComplexTypeName /*= 0*/)
1580 {
1581
1582 SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);
1583
1584 unsigned int i, j, k;
1585
1586 // Rename the URI back
1587 for (i = 0; i < fElemMapSize; i++) {
1588
1589 unsigned int orgURIIndex = fElemMap[i]->getURI();
1590
1591 if ((orgURIIndex != XMLContentModel::gEOCFakeId) &&
1592 (orgURIIndex != XMLContentModel::gEpsilonFakeId) &&
1593 (orgURIIndex != XMLElementDecl::fgInvalidElemId) &&
1594 (orgURIIndex != XMLElementDecl::fgPCDataElemId)) {
1595 fElemMap[i]->setURI(pContentSpecOrgURI[orgURIIndex]);
1596 }
1597 }
1598
1599 // Unique Particle Attribution
1600 // Store the conflict results between any two elements in fElemMap
1601 // 0 - not yet tested, 1 - conflict, (-1) - no conflict
1602 signed char** conflictTable = (signed char**) fMemoryManager->allocate
1603 (
1604 fElemMapSize * sizeof(signed char*)
1605 );
1606
1607 // initialize the conflict table
1608 for (j = 0; j < fElemMapSize; j++) {
1609 conflictTable[j] = (signed char*) fMemoryManager->allocate
1610 (
1611 fElemMapSize * sizeof(signed char)
1612 );
1613 memset(conflictTable[j], 0, fElemMapSize*sizeof(signed char));
1614 }
1615
1616 // for each state, check whether it has overlap transitions
1617 for (i = 0; i < fTransTableSize; i++) {
1618 for (j = 0; j < fElemMapSize; j++) {
1619 for (k = j+1; k < fElemMapSize; k++) {
1620 if (fTransTable[i][j] != XMLContentModel::gInvalidTrans &&
1621 fTransTable[i][k] != XMLContentModel::gInvalidTrans &&
1622 conflictTable[j][k] == 0) {
1623
1624 // If this is text in a Schema mixed content model, skip it.
1625 if ( fIsMixed &&
1626 (( fElemMap[j]->getURI() == XMLElementDecl::fgPCDataElemId) ||
1627 ( fElemMap[k]->getURI() == XMLElementDecl::fgPCDataElemId)))
1628 continue;
1629
1630 if (XercesElementWildcard::conflict(pGrammar,
1631 fElemMapType[j],
1632 fElemMap[j],
1633 fElemMapType[k],
1634 fElemMap[k],
1635 &comparator)) {
1636 if (fCountingStates != 0) {
1637 Occurence* o = fCountingStates[i];
1638 // If "i" is a counting state and exactly one of the transitions
1639 // loops back to "i" then the two particles do not overlap if
1640 // minOccurs == maxOccurs.
1641 if (o != 0 &&
1642 ((fTransTable[i][j] == i) ^ (fTransTable[i][k] == i)) &&
1643 o->minOccurs == o->maxOccurs) {
1644 conflictTable[j][k] = -1;
1645 continue;
1646 }
1647 }
1648 conflictTable[j][k] = 1;
1649
1650 XMLBuffer buf1(1023, fMemoryManager);
1651 if (((fElemMapType[j] & 0x0f) == ContentSpecNode::Any) ||
1652 ((fElemMapType[j] & 0x0f) == ContentSpecNode::Any_NS))
1653 buf1.set(SchemaSymbols::fgATTVAL_TWOPOUNDANY);
1654 else if ((fElemMapType[j] & 0x0f) == ContentSpecNode::Any_Other)
1655 buf1.set(SchemaSymbols::fgATTVAL_TWOPOUNDOTHER);
1656 else
1657 buf1.set(fElemMap[j]->getRawName());
1658
1659 XMLBuffer buf2(1023, fMemoryManager);
1660 if (((fElemMapType[k] & 0x0f) == ContentSpecNode::Any) ||
1661 ((fElemMapType[k] & 0x0f) == ContentSpecNode::Any_NS))
1662 buf2.set(SchemaSymbols::fgATTVAL_TWOPOUNDANY);
1663 else if ((fElemMapType[k] & 0x0f) == ContentSpecNode::Any_Other)
1664 buf2.set(SchemaSymbols::fgATTVAL_TWOPOUNDOTHER);
1665 else
1666 buf2.set(fElemMap[k]->getRawName());
1667
1668 pValidator->emitError(XMLValid::UniqueParticleAttributionFail,
1669 pComplexTypeName,
1670 buf1.getRawBuffer(),
1671 buf2.getRawBuffer());
1672 }
1673 else
1674 conflictTable[j][k] = -1;
1675 }
1676 }
1677 }
1678 }
1679
1680 for (i = 0; i < fElemMapSize; i++)
1681 fMemoryManager->deallocate(conflictTable[i]);
1682 fMemoryManager->deallocate(conflictTable);
1683 }
1684
1685 XERCES_CPP_NAMESPACE_END
1686