1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id: IGXMLScanner.cpp 882548 2009-11-20 13:44:14Z borisk $
20 */
21
22 // ---------------------------------------------------------------------------
23 // Includes
24 // ---------------------------------------------------------------------------
25 #include <xercesc/internal/IGXMLScanner.hpp>
26 #include <xercesc/util/RuntimeException.hpp>
27 #include <xercesc/util/UnexpectedEOFException.hpp>
28 #include <xercesc/sax/InputSource.hpp>
29 #include <xercesc/framework/XMLDocumentHandler.hpp>
30 #include <xercesc/framework/XMLEntityHandler.hpp>
31 #include <xercesc/framework/XMLPScanToken.hpp>
32 #include <xercesc/internal/EndOfEntityException.hpp>
33 #include <xercesc/framework/MemoryManager.hpp>
34 #include <xercesc/framework/XMLGrammarPool.hpp>
35 #include <xercesc/framework/XMLDTDDescription.hpp>
36 #include <xercesc/framework/psvi/PSVIElement.hpp>
37 #include <xercesc/framework/psvi/PSVIHandler.hpp>
38 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
39 #include <xercesc/validators/common/GrammarResolver.hpp>
40 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
41 #include <xercesc/validators/DTD/DTDScanner.hpp>
42 #include <xercesc/validators/DTD/DTDValidator.hpp>
43 #include <xercesc/validators/schema/SchemaValidator.hpp>
44 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
45 #include <xercesc/validators/schema/identity/IC_Selector.hpp>
46 #include <xercesc/util/OutOfMemoryException.hpp>
47
48 XERCES_CPP_NAMESPACE_BEGIN
49
50
51 typedef JanitorMemFunCall<IGXMLScanner> CleanupType;
52 typedef JanitorMemFunCall<ReaderMgr> ReaderMgrResetType;
53
54
55 // ---------------------------------------------------------------------------
56 // IGXMLScanner: Constructors and Destructor
57 // ---------------------------------------------------------------------------
IGXMLScanner(XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)58 IGXMLScanner::IGXMLScanner( XMLValidator* const valToAdopt
59 , GrammarResolver* const grammarResolver
60 , MemoryManager* const manager) :
61
62 XMLScanner(valToAdopt, grammarResolver, manager)
63 , fSeeXsi(false)
64 , fGrammarType(Grammar::UnKnown)
65 , fElemStateSize(16)
66 , fElemState(0)
67 , fElemLoopState(0)
68 , fContent(1023, manager)
69 , fRawAttrList(0)
70 , fRawAttrColonListSize(32)
71 , fRawAttrColonList(0)
72 , fDTDValidator(0)
73 , fSchemaValidator(0)
74 , fDTDGrammar(0)
75 , fICHandler(0)
76 , fLocationPairs(0)
77 , fDTDElemNonDeclPool(0)
78 , fSchemaElemNonDeclPool(0)
79 , fElemCount(0)
80 , fAttDefRegistry(0)
81 , fUndeclaredAttrRegistry(0)
82 , fPSVIAttrList(0)
83 , fModel(0)
84 , fPSVIElement(0)
85 , fErrorStack(0)
86 , fSchemaInfoList(0)
87 , fCachedSchemaInfoList (0)
88 {
89 CleanupType cleanup(this, &IGXMLScanner::cleanUp);
90
91 try
92 {
93 commonInit();
94 }
95 catch(const OutOfMemoryException&)
96 {
97 // Don't cleanup when out of memory, since executing the
98 // code can cause problems.
99 cleanup.release();
100
101 throw;
102 }
103
104 cleanup.release();
105 }
106
IGXMLScanner(XMLDocumentHandler * const docHandler,DocTypeHandler * const docTypeHandler,XMLEntityHandler * const entityHandler,XMLErrorReporter * const errHandler,XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)107 IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler
108 , DocTypeHandler* const docTypeHandler
109 , XMLEntityHandler* const entityHandler
110 , XMLErrorReporter* const errHandler
111 , XMLValidator* const valToAdopt
112 , GrammarResolver* const grammarResolver
113 , MemoryManager* const manager) :
114
115 XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
116 , fSeeXsi(false)
117 , fGrammarType(Grammar::UnKnown)
118 , fElemStateSize(16)
119 , fElemState(0)
120 , fElemLoopState(0)
121 , fContent(1023, manager)
122 , fRawAttrList(0)
123 , fRawAttrColonListSize(32)
124 , fRawAttrColonList(0)
125 , fDTDValidator(0)
126 , fSchemaValidator(0)
127 , fDTDGrammar(0)
128 , fICHandler(0)
129 , fLocationPairs(0)
130 , fDTDElemNonDeclPool(0)
131 , fSchemaElemNonDeclPool(0)
132 , fElemCount(0)
133 , fAttDefRegistry(0)
134 , fUndeclaredAttrRegistry(0)
135 , fPSVIAttrList(0)
136 , fModel(0)
137 , fPSVIElement(0)
138 , fErrorStack(0)
139 , fSchemaInfoList(0)
140 , fCachedSchemaInfoList (0)
141 {
142 CleanupType cleanup(this, &IGXMLScanner::cleanUp);
143
144 try
145 {
146 commonInit();
147 }
148 catch(const OutOfMemoryException&)
149 {
150 // Don't cleanup when out of memory, since executing the
151 // code can cause problems.
152 cleanup.release();
153
154 throw;
155 }
156
157 cleanup.release();
158 }
159
~IGXMLScanner()160 IGXMLScanner::~IGXMLScanner()
161 {
162 cleanUp();
163 }
164
165 // ---------------------------------------------------------------------------
166 // XMLScanner: Getter methods
167 // ---------------------------------------------------------------------------
getEntityDeclPool()168 NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool()
169 {
170 if(!fDTDGrammar)
171 return 0;
172 return fDTDGrammar->getEntityDeclPool();
173 }
174
getEntityDeclPool() const175 const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const
176 {
177 if(!fDTDGrammar)
178 return 0;
179 return fDTDGrammar->getEntityDeclPool();
180 }
181
182 // ---------------------------------------------------------------------------
183 // IGXMLScanner: Main entry point to scan a document
184 // ---------------------------------------------------------------------------
scanDocument(const InputSource & src)185 void IGXMLScanner::scanDocument(const InputSource& src)
186 {
187 // Bump up the sequence id for this parser instance. This will invalidate
188 // any previous progressive scan tokens.
189 fSequenceId++;
190
191 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
192
193 try
194 {
195 // Reset the scanner and its plugged in stuff for a new run. This
196 // resets all the data structures, creates the initial reader and
197 // pushes it on the stack, and sets up the base document path.
198 scanReset(src);
199
200 // If we have a document handler, then call the start document
201 if (fDocHandler)
202 fDocHandler->startDocument();
203
204 // Scan the prolog part, which is everything before the root element
205 // including the DTD subsets.
206 scanProlog();
207
208 // If we got to the end of input, then its not a valid XML file.
209 // Else, go on to scan the content.
210 if (fReaderMgr.atEOF())
211 {
212 emitError(XMLErrs::EmptyMainEntity);
213 }
214 else
215 {
216 // Scan content, and tell it its not an external entity
217 if (scanContent())
218 {
219 // Do post-parse validation if required
220 if (fValidate)
221 {
222 // We handle ID reference semantics at this level since
223 // its required by XML 1.0.
224 checkIDRefs();
225
226 // Then allow the validator to do any extra stuff it wants
227 // fValidator->postParseValidation();
228 }
229
230 // That went ok, so scan for any miscellaneous stuff
231 if (!fReaderMgr.atEOF())
232 scanMiscellaneous();
233 }
234 }
235
236 // If we have a document handler, then call the end document
237 if (fDocHandler)
238 fDocHandler->endDocument();
239
240 //cargill debug:
241 //fGrammarResolver->getXSModel();
242 }
243 // NOTE:
244 //
245 // In all of the error processing below, the emitError() call MUST come
246 // before the flush of the reader mgr, or it will fail because it tries
247 // to find out the position in the XML source of the error.
248 catch(const XMLErrs::Codes)
249 {
250 // This is a 'first failure' exception, so fall through
251 }
252 catch(const XMLValid::Codes)
253 {
254 // This is a 'first fatal error' type exit, so fall through
255 }
256 catch(const XMLException& excToCatch)
257 {
258 // Emit the error and catch any user exception thrown from here. Make
259 // sure in all cases we flush the reader manager.
260 fInException = true;
261 try
262 {
263 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
264 emitError
265 (
266 XMLErrs::XMLException_Warning
267 , excToCatch.getCode()
268 , excToCatch.getMessage()
269 );
270 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
271 emitError
272 (
273 XMLErrs::XMLException_Fatal
274 , excToCatch.getCode()
275 , excToCatch.getMessage()
276 );
277 else
278 emitError
279 (
280 XMLErrs::XMLException_Error
281 , excToCatch.getCode()
282 , excToCatch.getMessage()
283 );
284 }
285 catch(const OutOfMemoryException&)
286 {
287 // This is a special case for out-of-memory
288 // conditions, because resetting the ReaderMgr
289 // can be problematic.
290 resetReaderMgr.release();
291
292 throw;
293 }
294 }
295 catch(const OutOfMemoryException&)
296 {
297 // This is a special case for out-of-memory
298 // conditions, because resetting the ReaderMgr
299 // can be problematic.
300 resetReaderMgr.release();
301
302 throw;
303 }
304 }
305
306
scanNext(XMLPScanToken & token)307 bool IGXMLScanner::scanNext(XMLPScanToken& token)
308 {
309 // Make sure this token is still legal
310 if (!isLegalToken(token))
311 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
312
313 // Find the next token and remember the reader id
314 XMLSize_t orgReader;
315 XMLTokens curToken;
316
317 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
318
319 bool retVal = true;
320
321 try
322 {
323 while (true)
324 {
325 // We have to handle any end of entity exceptions that happen here.
326 // We could be at the end of X nested entities, each of which will
327 // generate an end of entity exception as we try to move forward.
328 try
329 {
330 curToken = senseNextToken(orgReader);
331 break;
332 }
333 catch(const EndOfEntityException& toCatch)
334 {
335 // Send an end of entity reference event
336 if (fDocHandler)
337 fDocHandler->endEntityReference(toCatch.getEntity());
338 }
339 }
340
341 if (curToken == Token_CharData)
342 {
343 scanCharData(fCDataBuf);
344 }
345 else if (curToken == Token_EOF)
346 {
347 if (!fElemStack.isEmpty())
348 {
349 const ElemStack::StackElem* topElem = fElemStack.popTop();
350 emitError
351 (
352 XMLErrs::EndedWithTagsOnStack
353 , topElem->fThisElement->getFullName()
354 );
355 }
356
357 retVal = false;
358 }
359 else
360 {
361 // Its some sort of markup
362 bool gotData = true;
363 switch(curToken)
364 {
365 case Token_CData :
366 // Make sure we are within content
367 if (fElemStack.isEmpty())
368 emitError(XMLErrs::CDATAOutsideOfContent);
369 scanCDSection();
370 break;
371
372 case Token_Comment :
373 scanComment();
374 break;
375
376 case Token_EndTag :
377 scanEndTag(gotData);
378 break;
379
380 case Token_PI :
381 scanPI();
382 break;
383
384 case Token_StartTag :
385 if (fDoNamespaces)
386 scanStartTagNS(gotData);
387 else
388 scanStartTag(gotData);
389 break;
390
391 default :
392 fReaderMgr.skipToChar(chOpenAngle);
393 break;
394 }
395
396 if (orgReader != fReaderMgr.getCurrentReaderNum())
397 emitError(XMLErrs::PartialMarkupInEntity);
398
399 // If we hit the end, then do the miscellaneous part
400 if (!gotData)
401 {
402 // Do post-parse validation if required
403 if (fValidate)
404 {
405 // We handle ID reference semantics at this level since
406 // its required by XML 1.0.
407 checkIDRefs();
408
409 // Then allow the validator to do any extra stuff it wants
410 // fValidator->postParseValidation();
411 }
412
413 // That went ok, so scan for any miscellaneous stuff
414 scanMiscellaneous();
415
416 if (toCheckIdentityConstraint())
417 fICHandler->endDocument();
418
419 if (fDocHandler)
420 fDocHandler->endDocument();
421 }
422 }
423 }
424 // NOTE:
425 //
426 // In all of the error processing below, the emitError() call MUST come
427 // before the flush of the reader mgr, or it will fail because it tries
428 // to find out the position in the XML source of the error.
429 catch(const XMLErrs::Codes)
430 {
431 // This is a 'first failure' exception so return failure
432 retVal = false;
433 }
434 catch(const XMLValid::Codes)
435 {
436 // This is a 'first fatal error' type exit, so return failure
437 retVal = false;
438 }
439 catch(const XMLException& excToCatch)
440 {
441 // Emit the error and catch any user exception thrown from here. Make
442 // sure in all cases we flush the reader manager.
443 fInException = true;
444 try
445 {
446 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
447 emitError
448 (
449 XMLErrs::XMLException_Warning
450 , excToCatch.getCode()
451 , excToCatch.getMessage()
452 );
453 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
454 emitError
455 (
456 XMLErrs::XMLException_Fatal
457 , excToCatch.getCode()
458 , excToCatch.getMessage()
459 );
460 else
461 emitError
462 (
463 XMLErrs::XMLException_Error
464 , excToCatch.getCode()
465 , excToCatch.getMessage()
466 );
467 }
468 catch(const OutOfMemoryException&)
469 {
470 // This is a special case for out-of-memory
471 // conditions, because resetting the ReaderMgr
472 // can be problematic.
473 resetReaderMgr.release();
474
475 throw;
476 }
477
478 retVal = false;
479 }
480 catch(const OutOfMemoryException&)
481 {
482 // This is a special case for out-of-memory
483 // conditions, because resetting the ReaderMgr
484 // can be problematic.
485 resetReaderMgr.release();
486
487 throw;
488 }
489
490 // If we are not at the end, release the object that will
491 // reset the ReaderMgr.
492 if (retVal)
493 resetReaderMgr.release();
494
495 return retVal;
496 }
497
498
499
500 // ---------------------------------------------------------------------------
501 // IGXMLScanner: Private helper methods. Most of these are implemented in
502 // IGXMLScanner2.Cpp.
503 // ---------------------------------------------------------------------------
504
505 // This method handles the common initialization, to avoid having to do
506 // it redundantly in multiple constructors.
commonInit()507 void IGXMLScanner::commonInit()
508 {
509
510 // Create the element state array
511 fElemState = (unsigned int*) fMemoryManager->allocate
512 (
513 fElemStateSize * sizeof(unsigned int)
514 ); //new unsigned int[fElemStateSize];
515 fElemLoopState = (unsigned int*) fMemoryManager->allocate
516 (
517 fElemStateSize * sizeof(unsigned int)
518 ); //new unsigned int[fElemStateSize];
519
520 // And we need one for the raw attribute scan. This just stores key/
521 // value string pairs (prior to any processing.)
522 fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
523 fRawAttrColonList = (int*) fMemoryManager->allocate
524 (
525 fRawAttrColonListSize * sizeof(int)
526 );
527
528 // Create the Validator and init them
529 fDTDValidator = new (fMemoryManager) DTDValidator();
530 initValidator(fDTDValidator);
531 fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
532 initValidator(fSchemaValidator);
533
534 // Create IdentityConstraint info
535 fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
536
537 // Create schemaLocation pair info
538 fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
539 // create pools for undeclared elements
540 fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
541 fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
542 fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
543 (
544 131, false, fMemoryManager
545 );
546 fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
547 fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
548
549 fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
550 fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
551
552 // use fDTDValidator as the default validator
553 if (!fValidator)
554 fValidator = fDTDValidator;
555 }
556
cleanUp()557 void IGXMLScanner::cleanUp()
558 {
559 fMemoryManager->deallocate(fElemState); //delete [] fElemState;
560 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
561 delete fRawAttrList;
562 fMemoryManager->deallocate(fRawAttrColonList);
563 delete fDTDValidator;
564 delete fSchemaValidator;
565 delete fICHandler;
566 delete fLocationPairs;
567 delete fDTDElemNonDeclPool;
568 delete fSchemaElemNonDeclPool;
569 delete fAttDefRegistry;
570 delete fUndeclaredAttrRegistry;
571 delete fPSVIAttrList;
572 delete fPSVIElement;
573 delete fErrorStack;
574 delete fSchemaInfoList;
575 delete fCachedSchemaInfoList;
576 }
577
578 // ---------------------------------------------------------------------------
579 // IGXMLScanner: Private scanning methods
580 // ---------------------------------------------------------------------------
581
582 // This method is called from scanStartTag() to handle the very raw initial
583 // scan of the attributes. It just fills in the passed collection with
584 // key/value pairs for each attribute. No processing is done on them at all.
585 XMLSize_t
rawAttrScan(const XMLCh * const elemName,RefVectorOf<KVStringPair> & toFill,bool & isEmpty)586 IGXMLScanner::rawAttrScan(const XMLCh* const elemName
587 , RefVectorOf<KVStringPair>& toFill
588 , bool& isEmpty)
589 {
590 // Keep up with how many attributes we've seen so far, and how many
591 // elements are available in the vector. This way we can reuse old
592 // elements until we run out and then expand it.
593 XMLSize_t attCount = 0;
594 XMLSize_t curVecSize = toFill.size();
595
596 // Assume it is not empty
597 isEmpty = false;
598
599 // We loop until we either see a /> or >, handling key/value pairs util
600 // we get there. We place them in the passed vector, which we will expand
601 // as required to hold them.
602 while (true)
603 {
604 // Get the next character, which should be non-space
605 XMLCh nextCh = fReaderMgr.peekNextChar();
606
607 // If the next character is not a slash or closed angle bracket,
608 // then it must be whitespace, since whitespace is required
609 // between the end of the last attribute and the name of the next
610 // one.
611 //
612 if (attCount)
613 {
614 if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
615 {
616 bool bFoundSpace;
617 fReaderMgr.skipPastSpaces(bFoundSpace);
618 if (!bFoundSpace)
619 {
620 // Emit the error but keep on going
621 emitError(XMLErrs::ExpectedWhitespace);
622 }
623 // Ok, peek another char
624 nextCh = fReaderMgr.peekNextChar();
625 }
626 }
627
628 // Ok, here we first check for any of the special case characters.
629 // If its not one, then we do the normal case processing, which
630 // assumes that we've hit an attribute value, Otherwise, we do all
631 // the special case checks.
632 if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
633 {
634 // Assume it's going to be an attribute, so get a name from
635 // the input.
636 int colonPosition;
637 if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
638 {
639 if (fAttNameBuf.isEmpty())
640 emitError(XMLErrs::ExpectedAttrName);
641 else
642 emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
643 fReaderMgr.skipPastChar(chCloseAngle);
644 return attCount;
645 }
646
647 const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
648
649 // And next must be an equal sign
650 if (!scanEq())
651 {
652 static const XMLCh tmpList[] =
653 {
654 chSingleQuote, chDoubleQuote, chCloseAngle
655 , chOpenAngle, chForwardSlash, chNull
656 };
657
658 emitError(XMLErrs::ExpectedEqSign);
659
660 // Try to sync back up by skipping forward until we either
661 // hit something meaningful.
662 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
663
664 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
665 {
666 // Jump back to top for normal processing of these
667 continue;
668 }
669 else if ((chFound == chSingleQuote)
670 || (chFound == chDoubleQuote)
671 || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
672 {
673 // Just fall through assuming that the value is to follow
674 }
675 else if (chFound == chOpenAngle)
676 {
677 // Assume a malformed tag and that new one is starting
678 emitError(XMLErrs::UnterminatedStartTag, elemName);
679 return attCount;
680 }
681 else
682 {
683 // Something went really wrong
684 return attCount;
685 }
686 }
687
688 // Next should be the quoted attribute value. We just do a simple
689 // and stupid scan of this value. The only thing we do here
690 // is to expand entity references.
691 if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
692 {
693 static const XMLCh tmpList[] =
694 {
695 chCloseAngle, chOpenAngle, chForwardSlash, chNull
696 };
697
698 emitError(XMLErrs::ExpectedAttrValue);
699
700 // It failed, so lets try to get synced back up. We skip
701 // forward until we find some whitespace or one of the
702 // chars in our list.
703 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
704
705 if ((chFound == chCloseAngle)
706 || (chFound == chForwardSlash)
707 || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
708 {
709 // Just fall through and process this attribute, though
710 // the value will be "".
711 }
712 else if (chFound == chOpenAngle)
713 {
714 // Assume a malformed tag and that new one is starting
715 emitError(XMLErrs::UnterminatedStartTag, elemName);
716 return attCount;
717 }
718 else
719 {
720 // Something went really wrong
721 return attCount;
722 }
723 }
724
725 // And now lets add it to the passed collection. If we have not
726 // filled it up yet, then we use the next element. Else we add
727 // a new one.
728 KVStringPair* curPair = 0;
729 if (attCount >= curVecSize)
730 {
731 curPair = new (fMemoryManager) KVStringPair
732 (
733 curAttNameBuf
734 , fAttNameBuf.getLen()
735 , fAttValueBuf.getRawBuffer()
736 , fAttValueBuf.getLen()
737 , fMemoryManager
738 );
739 toFill.addElement(curPair);
740 }
741 else
742 {
743 curPair = toFill.elementAt(attCount);
744 curPair->set
745 (
746 curAttNameBuf,
747 fAttNameBuf.getLen(),
748 fAttValueBuf.getRawBuffer(),
749 fAttValueBuf.getLen()
750 );
751 }
752
753 if (attCount >= fRawAttrColonListSize) {
754 resizeRawAttrColonList();
755 }
756 // Set the position of the colon and bump the count of attributes we've gotten
757 fRawAttrColonList[attCount++] = colonPosition;
758
759 // And go to the top again for another attribute
760 continue;
761 }
762
763 // It was some special case character so do all of the checks and
764 // deal with it.
765 if (!nextCh)
766 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
767
768 if (nextCh == chForwardSlash)
769 {
770 fReaderMgr.getNextChar();
771 isEmpty = true;
772 if (!fReaderMgr.skippedChar(chCloseAngle))
773 emitError(XMLErrs::UnterminatedStartTag, elemName);
774 break;
775 }
776 else if (nextCh == chCloseAngle)
777 {
778 fReaderMgr.getNextChar();
779 break;
780 }
781 else if (nextCh == chOpenAngle)
782 {
783 // Check for this one specially, since its going to be common
784 // and it is kind of auto-recovering since we've already hit the
785 // next open bracket, which is what we would have seeked to (and
786 // skipped this whole tag.)
787 emitError(XMLErrs::UnterminatedStartTag, elemName);
788 break;
789 }
790 else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
791 {
792 // Check for this one specially, which is probably a missing
793 // attribute name, e.g. ="value". Just issue expected name
794 // error and eat the quoted string, then jump back to the
795 // top again.
796 emitError(XMLErrs::ExpectedAttrName);
797 fReaderMgr.getNextChar();
798 fReaderMgr.skipQuotedString(nextCh);
799 fReaderMgr.skipPastSpaces();
800 continue;
801 }
802 }
803
804 return attCount;
805 }
806
807
808 // This method will kick off the scanning of the primary content of the
809 // document, i.e. the elements.
scanContent()810 bool IGXMLScanner::scanContent()
811 {
812 // Go into a loop until we hit the end of the root element, or we fall
813 // out because there is no root element.
814 //
815 // We have to do kind of a deeply nested double loop here in order to
816 // avoid doing the setup/teardown of the exception handler on each
817 // round. Doing it this way we only do it when an exception actually
818 // occurs.
819 bool gotData = true;
820 bool inMarkup = false;
821 while (gotData)
822 {
823 try
824 {
825 while (gotData)
826 {
827 // Sense what the next top level token is. According to what
828 // this tells us, we will call something to handle that kind
829 // of thing.
830 XMLSize_t orgReader;
831 const XMLTokens curToken = senseNextToken(orgReader);
832
833 // Handle character data and end of file specially. Char data
834 // is not markup so we don't want to handle it in the loop
835 // below.
836 if (curToken == Token_CharData)
837 {
838 // Scan the character data and call appropriate events. Let
839 // him use our local character data buffer for efficiency.
840 scanCharData(fCDataBuf);
841 continue;
842 }
843 else if (curToken == Token_EOF)
844 {
845 // The element stack better be empty at this point or we
846 // ended prematurely before all elements were closed.
847 if (!fElemStack.isEmpty())
848 {
849 const ElemStack::StackElem* topElem = fElemStack.popTop();
850 emitError
851 (
852 XMLErrs::EndedWithTagsOnStack
853 , topElem->fThisElement->getFullName()
854 );
855 }
856
857 // Its the end of file, so clear the got data flag
858 gotData = false;
859 continue;
860 }
861
862 // We are in some sort of markup now
863 inMarkup = true;
864
865 // According to the token we got, call the appropriate
866 // scanning method.
867 switch(curToken)
868 {
869 case Token_CData :
870 // Make sure we are within content
871 if (fElemStack.isEmpty())
872 emitError(XMLErrs::CDATAOutsideOfContent);
873 scanCDSection();
874 break;
875
876 case Token_Comment :
877 scanComment();
878 break;
879
880 case Token_EndTag :
881 scanEndTag(gotData);
882 break;
883
884 case Token_PI :
885 scanPI();
886 break;
887
888 case Token_StartTag :
889 if (fDoNamespaces)
890 scanStartTagNS(gotData);
891 else
892 scanStartTag(gotData);
893 break;
894
895 default :
896 fReaderMgr.skipToChar(chOpenAngle);
897 break;
898 }
899
900 if (orgReader != fReaderMgr.getCurrentReaderNum())
901 emitError(XMLErrs::PartialMarkupInEntity);
902
903 // And we are back out of markup again
904 inMarkup = false;
905 }
906 }
907 catch(const EndOfEntityException& toCatch)
908 {
909 // If we were in some markup when this happened, then its a
910 // partial markup error.
911 if (inMarkup)
912 emitError(XMLErrs::PartialMarkupInEntity);
913
914 // Send an end of entity reference event
915 if (fDocHandler)
916 fDocHandler->endEntityReference(toCatch.getEntity());
917
918 inMarkup = false;
919 }
920 }
921
922 // It went ok, so return success
923 return true;
924 }
925
926
scanEndTag(bool & gotData)927 void IGXMLScanner::scanEndTag(bool& gotData)
928 {
929 // Assume we will still have data until proven otherwise. It will only
930 // ever be false if this is the end of the root element.
931 gotData = true;
932
933 // Check if the element stack is empty. If so, then this is an unbalanced
934 // element (i.e. more ends than starts, perhaps because of bad text
935 // causing one to be skipped.)
936 if (fElemStack.isEmpty())
937 {
938 emitError(XMLErrs::MoreEndThanStartTags);
939 fReaderMgr.skipPastChar(chCloseAngle);
940 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
941 }
942
943 // Pop the stack of the element we are supposed to be ending. Remember
944 // that we don't own this. The stack just keeps them and reuses them.
945 unsigned int uriId = (fDoNamespaces)
946 ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
947
948 // these get initialized below
949 const ElemStack::StackElem* topElem = 0;
950 const XMLCh *elemName = 0;
951
952 // Make sure that its the end of the element that we expect
953 // special case for schema validation, whose element decls,
954 // obviously don't contain prefix information
955 if(fGrammarType == Grammar::SchemaGrammarType)
956 {
957 elemName = fElemStack.getCurrentSchemaElemName();
958 topElem = fElemStack.topElement();
959 }
960 else
961 {
962 topElem = fElemStack.topElement();
963 elemName = topElem->fThisElement->getFullName();
964 }
965 if (!fReaderMgr.skippedStringLong(elemName))
966 {
967 emitError
968 (
969 XMLErrs::ExpectedEndOfTagX
970 , elemName
971 );
972 fReaderMgr.skipPastChar(chCloseAngle);
973 fElemStack.popTop();
974 return;
975 }
976
977 // Make sure we are back on the same reader as where we started
978 if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
979 emitError(XMLErrs::PartialTagMarkupError);
980
981 // Skip optional whitespace
982 fReaderMgr.skipPastSpaces();
983
984 // Make sure we find the closing bracket
985 if (!fReaderMgr.skippedChar(chCloseAngle))
986 {
987 emitError
988 (
989 XMLErrs::UnterminatedEndTag
990 , topElem->fThisElement->getFullName()
991 );
992 }
993
994 if (fGrammarType == Grammar::SchemaGrammarType)
995 {
996 // reset error occurred
997 fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
998 if (fValidate && topElem->fThisElement->isDeclared())
999 {
1000 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1001 if(!fPSVIElemContext.fCurrentTypeInfo)
1002 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1003 else
1004 fPSVIElemContext.fCurrentDV = 0;
1005 if(fPSVIHandler)
1006 {
1007 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
1008
1009 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
1010 fPSVIElemContext.fNormalizedValue = 0;
1011 }
1012 }
1013 else
1014 {
1015 fPSVIElemContext.fCurrentDV = 0;
1016 fPSVIElemContext.fCurrentTypeInfo = 0;
1017 fPSVIElemContext.fNormalizedValue = 0;
1018 }
1019 }
1020
1021 // If validation is enabled, then lets pass him the list of children and
1022 // this element and let him validate it.
1023 DatatypeValidator* psviMemberType = 0;
1024 if (fValidate)
1025 {
1026
1027 //
1028 // XML1.0-3rd
1029 // Validity Constraint:
1030 // The declaration matches EMPTY and the element has no content (not even
1031 // entity references, comments, PIs or white space).
1032 //
1033 if ( (fGrammarType == Grammar::DTDGrammarType) &&
1034 (topElem->fCommentOrPISeen) &&
1035 (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))
1036 {
1037 fValidator->emitError
1038 (
1039 XMLValid::EmptyElemHasContent
1040 , topElem->fThisElement->getFullName()
1041 );
1042 }
1043
1044 //
1045 // XML1.0-3rd
1046 // Validity Constraint:
1047 //
1048 // The declaration matches children and the sequence of child elements
1049 // belongs to the language generated by the regular expression in the
1050 // content model, with optional white space, comments and PIs
1051 // (i.e. markup matching production [27] Misc) between the start-tag and
1052 // the first child element, between child elements, or between the last
1053 // child element and the end-tag.
1054 //
1055 // Note that
1056 // a CDATA section containing only white space or
1057 // a reference to an entity whose replacement text is character references
1058 // expanding to white space do not match the nonterminal S, and hence
1059 // cannot appear in these positions; however,
1060 // a reference to an internal entity with a literal value consisting
1061 // of character references expanding to white space does match S,
1062 // since its replacement text is the white space resulting from expansion
1063 // of the character references.
1064 //
1065 if ( (fGrammarType == Grammar::DTDGrammarType) &&
1066 (topElem->fReferenceEscaped) &&
1067 (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))
1068 {
1069 fValidator->emitError
1070 (
1071 XMLValid::ElemChildrenHasInvalidWS
1072 , topElem->fThisElement->getFullName()
1073 );
1074 }
1075 XMLSize_t failure;
1076 bool res = fValidator->checkContent
1077 (
1078 topElem->fThisElement
1079 , topElem->fChildren
1080 , topElem->fChildCount
1081 , &failure
1082 );
1083
1084 if (!res)
1085 {
1086 // One of the elements is not valid for the content. NOTE that
1087 // if no children were provided but the content model requires
1088 // them, it comes back with a zero value. But we cannot use that
1089 // to index the child array in this case, and have to put out a
1090 // special message.
1091 if (!topElem->fChildCount)
1092 {
1093 fValidator->emitError
1094 (
1095 XMLValid::EmptyNotValidForContent
1096 , topElem->fThisElement->getFormattedContentModel()
1097 );
1098 }
1099 else if (failure >= topElem->fChildCount)
1100 {
1101 fValidator->emitError
1102 (
1103 XMLValid::NotEnoughElemsForCM
1104 , topElem->fThisElement->getFormattedContentModel()
1105 );
1106 }
1107 else
1108 {
1109 fValidator->emitError
1110 (
1111 XMLValid::ElementNotValidForContent
1112 , topElem->fChildren[failure]->getRawName()
1113 , topElem->fThisElement->getFormattedContentModel()
1114 );
1115 }
1116 }
1117
1118
1119 if (fGrammarType == Grammar::SchemaGrammarType) {
1120 if (((SchemaValidator*) fValidator)->getErrorOccurred())
1121 fPSVIElemContext.fErrorOccurred = true;
1122 else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
1123 psviMemberType = fValidationContext->getValidatingMemberType();
1124
1125 if (fPSVIHandler)
1126 {
1127 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
1128 if(fPSVIElemContext.fIsSpecified)
1129 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
1130 }
1131
1132 // call matchers and de-activate context
1133 if (toCheckIdentityConstraint())
1134 {
1135 fICHandler->deactivateContext
1136 (
1137 (SchemaElementDecl *) topElem->fThisElement
1138 , fContent.getRawBuffer()
1139 , fValidationContext
1140 , fPSVIElemContext.fCurrentDV
1141 );
1142 }
1143
1144 }
1145 }
1146
1147 // QName dv needed topElem to resolve URIs on the checkContent
1148 fElemStack.popTop();
1149
1150 // See if it was the root element, to avoid multiple calls below
1151 const bool isRoot = fElemStack.isEmpty();
1152
1153 if (fGrammarType == Grammar::SchemaGrammarType)
1154 {
1155 if (fPSVIHandler)
1156 {
1157 endElementPSVI(
1158 (SchemaElementDecl*)topElem->fThisElement, psviMemberType);
1159 }
1160 // now we can reset the datatype buffer, since the
1161 // application has had a chance to copy the characters somewhere else
1162 ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
1163 }
1164
1165 // If we have a doc handler, tell it about the end tag
1166 if (fDocHandler)
1167 {
1168 if (fGrammarType == Grammar::SchemaGrammarType) {
1169 if (topElem->fPrefixColonPos != -1)
1170 fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
1171 else
1172 fPrefixBuf.reset();
1173 }
1174 else {
1175 fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
1176 }
1177 fDocHandler->endElement
1178 (
1179 *topElem->fThisElement
1180 , uriId
1181 , isRoot
1182 , fPrefixBuf.getRawBuffer()
1183 );
1184 }
1185
1186 if (fGrammarType == Grammar::SchemaGrammarType) {
1187 if (!isRoot)
1188 {
1189 // update error information
1190 fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
1191
1192
1193 }
1194 }
1195
1196 // If this was the root, then done with content
1197 gotData = !isRoot;
1198
1199 if (gotData) {
1200 if (fDoNamespaces) {
1201 // Restore the grammar
1202 fGrammar = fElemStack.getCurrentGrammar();
1203 fGrammarType = fGrammar->getGrammarType();
1204 if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
1205 if (fValidatorFromUser)
1206 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
1207 else {
1208 fValidator = fSchemaValidator;
1209 }
1210 }
1211 else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
1212 if (fValidatorFromUser)
1213 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
1214 else {
1215 fValidator = fDTDValidator;
1216 }
1217 }
1218
1219 fValidator->setGrammar(fGrammar);
1220 }
1221
1222 // Restore the validation flag
1223 fValidate = fElemStack.getValidationFlag();
1224 }
1225 }
1226
1227
1228 // This method handles the high level logic of scanning the DOCType
1229 // declaration. This calls the DTDScanner and kicks off both the scanning of
1230 // the internal subset and the scanning of the external subset, if any.
1231 //
1232 // When we get here the '<!DOCTYPE' part has already been scanned, which is
1233 // what told us that we had a doc type decl to parse.
scanDocTypeDecl()1234 void IGXMLScanner::scanDocTypeDecl()
1235 {
1236 // We have a doc type. So, switch the Grammar.
1237 switchGrammar(XMLUni::fgDTDEntityString);
1238
1239 if (fDocTypeHandler)
1240 fDocTypeHandler->resetDocType();
1241
1242 // There must be some space after DOCTYPE
1243 bool skippedSomething;
1244 fReaderMgr.skipPastSpaces(skippedSomething);
1245 if (!skippedSomething)
1246 {
1247 emitError(XMLErrs::ExpectedWhitespace);
1248
1249 // Just skip the Doctype declaration and return
1250 fReaderMgr.skipPastChar(chCloseAngle);
1251 return;
1252 }
1253
1254 // Get a buffer for the root element
1255 XMLBufBid bbRootName(&fBufMgr);
1256
1257 // Get a name from the input, which should be the name of the root
1258 // element of the upcoming content.
1259 int colonPosition;
1260 bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) :
1261 fReaderMgr.getName(bbRootName.getBuffer());
1262 if (!validName)
1263 {
1264 if (bbRootName.isEmpty())
1265 emitError(XMLErrs::NoRootElemInDOCTYPE);
1266 else
1267 emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer());
1268 fReaderMgr.skipPastChar(chCloseAngle);
1269 return;
1270 }
1271
1272 // Store the root element name for later check
1273 setRootElemName(bbRootName.getRawBuffer());
1274
1275 // This element obviously is not going to exist in the element decl
1276 // pool yet, but we need to call docTypeDecl. So force it into
1277 // the element decl pool, marked as being there because it was in
1278 // the DOCTYPE. Later, when its declared, the status will be updated.
1279 //
1280 // Only do this if we are not reusing the validator! If we are reusing,
1281 // then look it up instead. It has to exist!
1282 MemoryManager* const rootDeclMgr =
1283 fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager;
1284
1285 DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl
1286 (
1287 bbRootName.getRawBuffer()
1288 , fEmptyNamespaceId
1289 , DTDElementDecl::Any
1290 , rootDeclMgr
1291 );
1292
1293 Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);
1294 rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
1295 rootDecl->setExternalElemDeclaration(true);
1296 if(!fUseCachedGrammar)
1297 {
1298 fGrammar->putElemDecl(rootDecl);
1299 rootDeclJanitor.release();
1300 } else
1301 {
1302 // attach this to the undeclared element pool so that it gets deleted
1303 XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
1304 if (elemDecl)
1305 {
1306 rootDecl->setId(elemDecl->getId());
1307 }
1308 else
1309 {
1310 rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
1311 rootDeclJanitor.release();
1312 }
1313 }
1314
1315 // Skip any spaces after the name
1316 fReaderMgr.skipPastSpaces();
1317
1318 // And now if we are looking at a >, then we are done. It is not
1319 // required to have an internal or external subset, though why you
1320 // would not escapes me.
1321 if (fReaderMgr.skippedChar(chCloseAngle)) {
1322
1323 // If we have a doc type handler and advanced callbacks are enabled,
1324 // call the doctype event.
1325 if (fDocTypeHandler)
1326 fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
1327 return;
1328 }
1329
1330 // either internal/external subset
1331 if (fValScheme == Val_Auto && !fValidate)
1332 fValidate = true;
1333
1334 bool hasIntSubset = false;
1335 bool hasExtSubset = false;
1336 XMLCh* sysId = 0;
1337 XMLCh* pubId = 0;
1338
1339 DTDScanner dtdScanner
1340 (
1341 (DTDGrammar*) fGrammar
1342 , fDocTypeHandler
1343 , fGrammarPoolMemoryManager
1344 , fMemoryManager
1345 );
1346 dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
1347
1348 // If the next character is '[' then we have no external subset cause
1349 // there is no system id, just the opening character of the internal
1350 // subset. Else, has to be an id.
1351 //
1352 // Just look at the next char, don't eat it.
1353 if (fReaderMgr.peekNextChar() == chOpenSquare)
1354 {
1355 hasIntSubset = true;
1356 }
1357 else
1358 {
1359 // Indicate we have an external subset
1360 hasExtSubset = true;
1361 fHasNoDTD = false;
1362
1363 // Get buffers for the ids
1364 XMLBufBid bbPubId(&fBufMgr);
1365 XMLBufBid bbSysId(&fBufMgr);
1366
1367 // Get the external subset id
1368 if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
1369 {
1370 fReaderMgr.skipPastChar(chCloseAngle);
1371 return;
1372 }
1373
1374 // Get copies of the ids we got
1375 pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
1376 sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
1377
1378 // Skip spaces and check again for the opening of an internal subset
1379 fReaderMgr.skipPastSpaces();
1380
1381 // Just look at the next char, don't eat it.
1382 if (fReaderMgr.peekNextChar() == chOpenSquare) {
1383 hasIntSubset = true;
1384 }
1385 }
1386
1387 // Insure that the ids get cleaned up, if they got allocated
1388 ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
1389 ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
1390
1391 // If we have a doc type handler and advanced callbacks are enabled,
1392 // call the doctype event.
1393 if (fDocTypeHandler)
1394 fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
1395
1396 // Ok, if we had an internal subset, we are just past the [ character
1397 // and need to parse that first.
1398 if (hasIntSubset)
1399 {
1400 // Eat the opening square bracket
1401 fReaderMgr.getNextChar();
1402
1403 checkInternalDTD(hasExtSubset, sysId, pubId);
1404
1405 // And try to scan the internal subset. If we fail, try to recover
1406 // by skipping forward tot he close angle and returning.
1407 if (!dtdScanner.scanInternalSubset())
1408 {
1409 fReaderMgr.skipPastChar(chCloseAngle);
1410 return;
1411 }
1412
1413 // Do a sanity check that some expanded PE did not propogate out of
1414 // the doctype. This could happen if it was terminated early by bad
1415 // syntax.
1416 if (fReaderMgr.getReaderDepth() > 1)
1417 {
1418 emitError(XMLErrs::PEPropogated);
1419
1420 // Ask the reader manager to pop back down to the main level
1421 fReaderMgr.cleanStackBackTo(1);
1422 }
1423
1424 fReaderMgr.skipPastSpaces();
1425 }
1426
1427 // And that should leave us at the closing > of the DOCTYPE line
1428 if (!fReaderMgr.skippedChar(chCloseAngle))
1429 {
1430 // Do a special check for the common scenario of an extra ] char at
1431 // the end. This is easy to recover from.
1432 if (fReaderMgr.skippedChar(chCloseSquare)
1433 && fReaderMgr.skippedChar(chCloseAngle))
1434 {
1435 emitError(XMLErrs::ExtraCloseSquare);
1436 }
1437 else
1438 {
1439 emitError(XMLErrs::UnterminatedDOCTYPE);
1440 fReaderMgr.skipPastChar(chCloseAngle);
1441 }
1442 }
1443
1444 // If we had an external subset, then we need to deal with that one
1445 // next. If we are reusing the validator, then don't scan it.
1446 if (hasExtSubset) {
1447
1448 InputSource* srcUsed=0;
1449 Janitor<InputSource> janSrc(srcUsed);
1450 // If we had an internal subset and we're using the cached grammar, it
1451 // means that the ignoreCachedDTD is set, so we ignore the cached
1452 // grammar
1453 if (fUseCachedGrammar && !hasIntSubset)
1454 {
1455 srcUsed = resolveSystemId(sysId, pubId);
1456 if (srcUsed) {
1457 janSrc.reset(srcUsed);
1458 Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId());
1459
1460 if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
1461
1462 fDTDGrammar = (DTDGrammar*) grammar;
1463 fGrammar = fDTDGrammar;
1464 fValidator->setGrammar(fGrammar);
1465 // If we don't report at least the external subset boundaries,
1466 // an advanced document handler cannot know when the DTD end,
1467 // since we've already sent a doctype decl that indicates there's
1468 // there's an external subset.
1469 if (fDocTypeHandler)
1470 {
1471 fDocTypeHandler->startExtSubset();
1472 fDocTypeHandler->endExtSubset();
1473 }
1474
1475 return;
1476 }
1477 }
1478 }
1479
1480 if (fLoadExternalDTD || fValidate)
1481 {
1482 // And now create a reader to read this entity
1483 XMLReader* reader;
1484 if (srcUsed) {
1485 reader = fReaderMgr.createReader
1486 (
1487 *srcUsed
1488 , false
1489 , XMLReader::RefFrom_NonLiteral
1490 , XMLReader::Type_General
1491 , XMLReader::Source_External
1492 , fCalculateSrcOfs
1493 , fLowWaterMark
1494 );
1495 }
1496 else {
1497 reader = fReaderMgr.createReader
1498 (
1499 sysId
1500 , pubId
1501 , false
1502 , XMLReader::RefFrom_NonLiteral
1503 , XMLReader::Type_General
1504 , XMLReader::Source_External
1505 , srcUsed
1506 , fCalculateSrcOfs
1507 , fLowWaterMark
1508 , fDisableDefaultEntityResolution
1509 );
1510 janSrc.reset(srcUsed);
1511 }
1512 // If it failed then throw an exception
1513 if (!reader)
1514 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager);
1515
1516 if (fToCacheGrammar) {
1517
1518 unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
1519 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
1520
1521 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
1522 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
1523 fGrammarResolver->putGrammar(fGrammar);
1524 }
1525
1526 // In order to make the processing work consistently, we have to
1527 // make this look like an external entity. So create an entity
1528 // decl and fill it in and push it with the reader, as happens
1529 // with an external entity. Put a janitor on it to insure it gets
1530 // cleaned up. The reader manager does not adopt them.
1531 const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
1532 DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
1533 declDTD->setSystemId(sysId);
1534 declDTD->setIsExternal(true);
1535 Janitor<DTDEntityDecl> janDecl(declDTD);
1536
1537 // Mark this one as a throw at end
1538 reader->setThrowAtEnd(true);
1539
1540 // And push it onto the stack, with its pseudo name
1541 fReaderMgr.pushReader(reader, declDTD);
1542
1543 // Tell it its not in an include section
1544 dtdScanner.scanExtSubsetDecl(false, true);
1545 }
1546 }
1547 }
1548
scanStartTag(bool & gotData)1549 bool IGXMLScanner::scanStartTag(bool& gotData)
1550 {
1551 // Assume we will still have data until proven otherwise. It will only
1552 // ever be false if this is the root and its empty.
1553 gotData = true;
1554
1555 // Get the QName. In this case, we are not doing namespaces, so we just
1556 // use it as is and don't have to break it into parts.
1557 if (!fReaderMgr.getName(fQNameBuf))
1558 {
1559 emitError(XMLErrs::ExpectedElementName);
1560 fReaderMgr.skipToChar(chOpenAngle);
1561 return false;
1562 }
1563
1564 // Assume it won't be an empty tag
1565 bool isEmpty = false;
1566
1567 // Lets try to look up the element in the validator's element decl pool
1568 // We can pass bogus values for the URI id and the base name. We know that
1569 // this can only be called if we are doing a DTD style validator and that
1570 // he will only look at the QName.
1571 //
1572 // We tell him to fault in a decl if he does not find one.
1573 // Actually, we *don't* tell him to fault in a decl if he does not find one- NG
1574 bool wasAdded = false;
1575 const XMLCh *rawQName = fQNameBuf.getRawBuffer();
1576 XMLElementDecl* elemDecl = fGrammar->getElemDecl
1577 (
1578 fEmptyNamespaceId
1579 , 0
1580 , rawQName
1581 , Grammar::TOP_LEVEL_SCOPE
1582 );
1583 // look for it in the undeclared pool:
1584 if(!elemDecl)
1585 {
1586 elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
1587 }
1588 if(!elemDecl)
1589 {
1590 // we're assuming this must be a DTD element. DTD's can be
1591 // used with or without namespaces, but schemas cannot be used without
1592 // namespaces.
1593 wasAdded = true;
1594 elemDecl = new (fMemoryManager) DTDElementDecl
1595 (
1596 rawQName
1597 , fEmptyNamespaceId
1598 , DTDElementDecl::Any
1599 , fMemoryManager
1600 );
1601 elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
1602 }
1603
1604 // We do something different here according to whether we found the
1605 // element or not.
1606 if (wasAdded)
1607 {
1608 // If validating then emit an error
1609 if (fValidate)
1610 {
1611 // This is to tell the reuse Validator that this element was
1612 // faulted-in, was not an element in the validator pool originally
1613 elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
1614
1615 fValidator->emitError
1616 (
1617 XMLValid::ElementNotDefined
1618 , elemDecl->getFullName()
1619 );
1620 }
1621 }
1622 else
1623 {
1624 // If its not marked declared and validating, then emit an error
1625 if (fValidate && !elemDecl->isDeclared())
1626 {
1627 fValidator->emitError
1628 (
1629 XMLValid::ElementNotDefined
1630 , elemDecl->getFullName()
1631 );
1632 }
1633 }
1634
1635 // See if its the root element
1636 const bool isRoot = fElemStack.isEmpty();
1637
1638 // Expand the element stack and add the new element
1639 fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
1640 fElemStack.setValidationFlag(fValidate);
1641
1642 // Validate the element
1643 if (fValidate)
1644 fValidator->validateElement(elemDecl);
1645
1646 // If this is the first element and we are validating, check the root
1647 // element.
1648 if (isRoot)
1649 {
1650 fRootGrammar = fGrammar;
1651
1652 if (fValidate)
1653 {
1654 // If a DocType exists, then check if it matches the root name there.
1655 if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName))
1656 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
1657 }
1658 }
1659 else
1660 {
1661 // If the element stack is not empty, then add this element as a
1662 // child of the previous top element. If its empty, this is the root
1663 // elem and is not the child of anything.
1664 fElemStack.addChild(elemDecl->getElementName(), true);
1665 }
1666
1667 // Skip any whitespace after the name
1668 fReaderMgr.skipPastSpaces();
1669
1670 // We loop until we either see a /> or >, handling attribute/value
1671 // pairs until we get there.
1672 XMLSize_t attCount = 0;
1673 XMLSize_t curAttListSize = fAttrList->size();
1674 wasAdded = false;
1675
1676 fElemCount++;
1677
1678 while (true)
1679 {
1680 // And get the next non-space character
1681 XMLCh nextCh = fReaderMgr.peekNextChar();
1682
1683 // If the next character is not a slash or closed angle bracket,
1684 // then it must be whitespace, since whitespace is required
1685 // between the end of the last attribute and the name of the next
1686 // one.
1687 if (attCount)
1688 {
1689 if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
1690 {
1691 bool bFoundSpace;
1692 fReaderMgr.skipPastSpaces(bFoundSpace);
1693 if (!bFoundSpace)
1694 {
1695 // Emit the error but keep on going
1696 emitError(XMLErrs::ExpectedWhitespace);
1697 }
1698 // Ok, peek another char
1699 nextCh = fReaderMgr.peekNextChar();
1700 }
1701 }
1702
1703 // Ok, here we first check for any of the special case characters.
1704 // If its not one, then we do the normal case processing, which
1705 // assumes that we've hit an attribute value, Otherwise, we do all
1706 // the special case checks.
1707 if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
1708 {
1709 // Assume its going to be an attribute, so get a name from
1710 // the input.
1711 if (!fReaderMgr.getName(fAttNameBuf))
1712 {
1713 emitError(XMLErrs::ExpectedAttrName);
1714 fReaderMgr.skipPastChar(chCloseAngle);
1715 return false;
1716 }
1717
1718 // And next must be an equal sign
1719 if (!scanEq())
1720 {
1721 static const XMLCh tmpList[] =
1722 {
1723 chSingleQuote, chDoubleQuote, chCloseAngle
1724 , chOpenAngle, chForwardSlash, chNull
1725 };
1726
1727 emitError(XMLErrs::ExpectedEqSign);
1728
1729 // Try to sync back up by skipping forward until we either
1730 // hit something meaningful.
1731 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
1732
1733 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
1734 {
1735 // Jump back to top for normal processing of these
1736 continue;
1737 }
1738 else if ((chFound == chSingleQuote)
1739 || (chFound == chDoubleQuote)
1740 || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
1741 {
1742 // Just fall through assuming that the value is to follow
1743 }
1744 else if (chFound == chOpenAngle)
1745 {
1746 // Assume a malformed tag and that new one is starting
1747 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1748 return false;
1749 }
1750 else
1751 {
1752 // Something went really wrong
1753 return false;
1754 }
1755 }
1756 // See if this attribute is declared for this element. If we are
1757 // not validating of course it will not be at first, but we will
1758 // fault it into the pool (to avoid lots of redundant errors.)
1759 XMLCh * namePtr = fAttNameBuf.getRawBuffer();
1760 XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
1761
1762 // Add this attribute to the attribute list that we use to
1763 // pass them to the handler. We reuse its existing elements
1764 // but expand it as required.
1765 // Note that we want to this first since this will
1766 // make a copy of the namePtr; we can then make use of
1767 // that copy in the hashtable lookup that checks
1768 // for duplicates. This will mean we may have to update
1769 // the type of the XMLAttr later.
1770 XMLAttr* curAtt;
1771 if (attCount >= curAttListSize)
1772 {
1773 curAtt = new (fMemoryManager) XMLAttr
1774 (
1775 0
1776 , namePtr
1777 , XMLUni::fgZeroLenString
1778 , XMLUni::fgZeroLenString
1779 , (attDef)?attDef->getType():XMLAttDef::CData
1780 , true
1781 , fMemoryManager
1782 );
1783 fAttrList->addElement(curAtt);
1784 }
1785 else
1786 {
1787 curAtt = fAttrList->elementAt(attCount);
1788 curAtt->set
1789 (
1790 0
1791 , namePtr
1792 , XMLUni::fgZeroLenString
1793 , XMLUni::fgZeroLenString
1794 , (attDef)?attDef->getType():XMLAttDef::CData
1795 );
1796 curAtt->setSpecified(true);
1797 }
1798 // reset namePtr so it refers to newly-allocated memory
1799 namePtr = (XMLCh *)curAtt->getName();
1800
1801 if (!attDef)
1802 {
1803 // If there is a validation handler, then we are validating
1804 // so emit an error.
1805 if (fValidate)
1806 {
1807 fValidator->emitError
1808 (
1809 XMLValid::AttNotDefinedForElement
1810 , fAttNameBuf.getRawBuffer()
1811 , elemDecl->getFullName()
1812 );
1813 }
1814 if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
1815 {
1816 emitError
1817 (
1818 XMLErrs::AttrAlreadyUsedInSTag
1819 , namePtr
1820 , elemDecl->getFullName()
1821 );
1822 }
1823 }
1824 else
1825 {
1826 // prepare for duplicate detection
1827 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
1828 if(!curCountPtr)
1829 {
1830 curCountPtr = getNewUIntPtr();
1831 *curCountPtr = fElemCount;
1832 fAttDefRegistry->put(attDef, curCountPtr);
1833 }
1834 else if(*curCountPtr < fElemCount)
1835 *curCountPtr = fElemCount;
1836 else
1837 {
1838 emitError
1839 (
1840 XMLErrs::AttrAlreadyUsedInSTag
1841 , attDef->getFullName()
1842 , elemDecl->getFullName()
1843 );
1844 }
1845 }
1846
1847 // Skip any whitespace before the value and then scan the att
1848 // value. This will come back normalized with entity refs and
1849 // char refs expanded.
1850 fReaderMgr.skipPastSpaces();
1851 if (!scanAttValue(attDef, namePtr, fAttValueBuf))
1852 {
1853 static const XMLCh tmpList[] =
1854 {
1855 chCloseAngle, chOpenAngle, chForwardSlash, chNull
1856 };
1857
1858 emitError(XMLErrs::ExpectedAttrValue);
1859
1860 // It failed, so lets try to get synced back up. We skip
1861 // forward until we find some whitespace or one of the
1862 // chars in our list.
1863 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
1864
1865 if ((chFound == chCloseAngle)
1866 || (chFound == chForwardSlash)
1867 || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
1868 {
1869 // Just fall through and process this attribute, though
1870 // the value will be "".
1871 }
1872 else if (chFound == chOpenAngle)
1873 {
1874 // Assume a malformed tag and that new one is starting
1875 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1876 return false;
1877 }
1878 else
1879 {
1880 // Something went really wrong
1881 return false;
1882 }
1883 }
1884 // must set the newly-minted value on the XMLAttr:
1885 curAtt->setValue(fAttValueBuf.getRawBuffer());
1886
1887 // Now that its all stretched out, lets look at its type and
1888 // determine if it has a valid value. It will output any needed
1889 // errors, but we just keep going. We only need to do this if
1890 // we are validating.
1891 if (attDef)
1892 {
1893 // Let the validator pass judgement on the attribute value
1894 if (fValidate)
1895 {
1896 fValidator->validateAttrValue
1897 (
1898 attDef
1899 , fAttValueBuf.getRawBuffer()
1900 , false
1901 , elemDecl
1902 );
1903 }
1904 }
1905
1906 attCount++;
1907 // And jump back to the top of the loop
1908 continue;
1909 }
1910
1911 // It was some special case character so do all of the checks and
1912 // deal with it.
1913 if (!nextCh)
1914 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
1915
1916 if (nextCh == chForwardSlash)
1917 {
1918 fReaderMgr.getNextChar();
1919 isEmpty = true;
1920 if (!fReaderMgr.skippedChar(chCloseAngle))
1921 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1922 break;
1923 }
1924 else if (nextCh == chCloseAngle)
1925 {
1926 fReaderMgr.getNextChar();
1927 break;
1928 }
1929 else if (nextCh == chOpenAngle)
1930 {
1931 // Check for this one specially, since its going to be common
1932 // and it is kind of auto-recovering since we've already hit the
1933 // next open bracket, which is what we would have seeked to (and
1934 // skipped this whole tag.)
1935 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1936 break;
1937 }
1938 else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
1939 {
1940 // Check for this one specially, which is probably a missing
1941 // attribute name, e.g. ="value". Just issue expected name
1942 // error and eat the quoted string, then jump back to the
1943 // top again.
1944 emitError(XMLErrs::ExpectedAttrName);
1945 fReaderMgr.getNextChar();
1946 fReaderMgr.skipQuotedString(nextCh);
1947 fReaderMgr.skipPastSpaces();
1948 continue;
1949 }
1950 }
1951
1952 if(attCount)
1953 {
1954 // clean up after ourselves:
1955 // clear the map used to detect duplicate attributes
1956 fUndeclaredAttrRegistry->removeAll();
1957 }
1958
1959 // Ok, so lets get an enumerator for the attributes of this element
1960 // and run through them for well formedness and validity checks. But
1961 // make sure that we had any attributes before we do it, since the list
1962 // would have have gotten faulted in anyway.
1963 if (elemDecl->hasAttDefs())
1964 {
1965 // N.B.: this assumes DTD validation.
1966 XMLAttDefList& attDefList = elemDecl->getAttDefList();
1967 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
1968 {
1969 // Get the current att def, for convenience and its def type
1970 const XMLAttDef& curDef = attDefList.getAttDef(i);
1971 const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
1972
1973 unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
1974 if (!attCountPtr || *attCountPtr < fElemCount)
1975 { // did not occur
1976 if (fValidate)
1977 {
1978 // If we are validating and its required, then an error
1979 if (defType == XMLAttDef::Required)
1980 {
1981 fValidator->emitError
1982 (
1983 XMLValid::RequiredAttrNotProvided
1984 , curDef.getFullName()
1985 );
1986 }
1987 else if ((defType == XMLAttDef::Default) ||
1988 (defType == XMLAttDef::Fixed) )
1989 {
1990 if (fStandalone && curDef.isExternal())
1991 {
1992 // XML 1.0 Section 2.9
1993 // Document is standalone, so attributes must not be defaulted.
1994 fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
1995
1996 }
1997 }
1998 }
1999
2000 // Fault in the value if needed, and bump the att count
2001 if ((defType == XMLAttDef::Default)
2002 || (defType == XMLAttDef::Fixed))
2003 {
2004 // Let the validator pass judgement on the attribute value
2005 if (fValidate)
2006 {
2007 fValidator->validateAttrValue
2008 (
2009 &curDef
2010 , curDef.getValue()
2011 , false
2012 , elemDecl
2013 );
2014 }
2015
2016 XMLAttr* curAtt;
2017 if (attCount >= curAttListSize)
2018 {
2019 curAtt = new (fMemoryManager) XMLAttr
2020 (
2021 0
2022 , curDef.getFullName()
2023 , XMLUni::fgZeroLenString
2024 , curDef.getValue()
2025 , curDef.getType()
2026 , false
2027 , fMemoryManager
2028 );
2029 fAttrList->addElement(curAtt);
2030 curAttListSize++;
2031 }
2032 else
2033 {
2034 curAtt = fAttrList->elementAt(attCount);
2035 curAtt->set
2036 (
2037 0
2038 , curDef.getFullName()
2039 , XMLUni::fgZeroLenString
2040 , curDef.getValue()
2041 , curDef.getType()
2042 );
2043 curAtt->setSpecified(false);
2044 }
2045 attCount++;
2046 }
2047 }
2048 }
2049 }
2050
2051 // If empty, validate content right now if we are validating and then
2052 // pop the element stack top. Else, we have to update the current stack
2053 // top's namespace mapping elements.
2054 if (isEmpty)
2055 {
2056 // If validating, then insure that its legal to have no content
2057 if (fValidate)
2058 {
2059 XMLSize_t failure;
2060 bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
2061 if (!res)
2062 {
2063 fValidator->emitError
2064 (
2065 XMLValid::ElementNotValidForContent
2066 , elemDecl->getFullName()
2067 , elemDecl->getFormattedContentModel()
2068 );
2069 }
2070 }
2071
2072 // Pop the element stack back off since it'll never be used now
2073 fElemStack.popTop();
2074
2075 // If the elem stack is empty, then it was an empty root
2076 if (isRoot)
2077 gotData = false;
2078 else {
2079 // Restore the validation flag
2080 fValidate = fElemStack.getValidationFlag();
2081 }
2082 }
2083
2084 // If we have a document handler, then tell it about this start tag. We
2085 // don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
2086 // any prefix since its just one big name if we are not doing namespaces.
2087 if (fDocHandler)
2088 {
2089 fDocHandler->startElement
2090 (
2091 *elemDecl
2092 , fEmptyNamespaceId
2093 , 0
2094 , *fAttrList
2095 , attCount
2096 , isEmpty
2097 , isRoot
2098 );
2099 }
2100
2101 return true;
2102 }
2103
2104
2105 // This method is called to scan a start tag when we are processing
2106 // namespaces. There are two different versions of this method, one for
2107 // namespace aware processing and one for non-namespace aware processing.
2108 //
2109 // This method is called after we've scanned the < of a start tag. So we
2110 // have to get the element name, then scan the attributes, after which
2111 // we are either going to see >, />, or attributes followed by one of those
2112 // sequences.
scanStartTagNS(bool & gotData)2113 bool IGXMLScanner::scanStartTagNS(bool& gotData)
2114 {
2115 // Assume we will still have data until proven otherwise. It will only
2116 // ever be false if this is the root and its empty.
2117 gotData = true;
2118
2119 // Reset element content buffer
2120 fContent.reset();
2121
2122 // The current position is after the open bracket, so we need to read in
2123 // in the element name.
2124 int prefixColonPos;
2125 if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
2126 {
2127 if (fQNameBuf.isEmpty())
2128 emitError(XMLErrs::ExpectedElementName);
2129 else
2130 emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
2131 fReaderMgr.skipToChar(chOpenAngle);
2132 return false;
2133 }
2134
2135 // See if its the root element
2136 const bool isRoot = fElemStack.isEmpty();
2137
2138 // Skip any whitespace after the name
2139 fReaderMgr.skipPastSpaces();
2140
2141 // First we have to do the rawest attribute scan. We don't do any
2142 // normalization of them at all, since we don't know yet what type they
2143 // might be (since we need the element decl in order to do that.)
2144 bool isEmpty;
2145 XMLSize_t attCount = rawAttrScan
2146 (
2147 fQNameBuf.getRawBuffer()
2148 , *fRawAttrList
2149 , isEmpty
2150 );
2151
2152 // save the contentleafname and currentscope before addlevel, for later use
2153 ContentLeafNameTypeVector* cv = 0;
2154 XMLContentModel* cm = 0;
2155 unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
2156 bool laxThisOne = false;
2157
2158 if (!isRoot && fGrammarType == Grammar::SchemaGrammarType)
2159 {
2160 // schema validator will have correct type if validating
2161 SchemaElementDecl* tempElement = (SchemaElementDecl*)
2162 fElemStack.topElement()->fThisElement;
2163 SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
2164 ComplexTypeInfo *currType = 0;
2165
2166 if (fValidate)
2167 {
2168 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
2169 if (currType)
2170 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
2171 else // something must have gone wrong
2172 modelType = SchemaElementDecl::Any;
2173 }
2174 else
2175 {
2176 currType = tempElement->getComplexTypeInfo();
2177 }
2178
2179 if ((modelType == SchemaElementDecl::Mixed_Simple)
2180 || (modelType == SchemaElementDecl::Mixed_Complex)
2181 || (modelType == SchemaElementDecl::Children))
2182 {
2183 cm = currType->getContentModel();
2184 cv = cm->getContentLeafNameTypeVector();
2185 currentScope = fElemStack.getCurrentScope();
2186 }
2187 else if (modelType == SchemaElementDecl::Any) {
2188 laxThisOne = true;
2189 }
2190 }
2191
2192 // Now, since we might have to update the namespace map for this element,
2193 // but we don't have the element decl yet, we just tell the element stack
2194 // to expand up to get ready.
2195 XMLSize_t elemDepth = fElemStack.addLevel();
2196 fElemStack.setValidationFlag(fValidate);
2197 fElemStack.setPrefixColonPos(prefixColonPos);
2198
2199 // Check if there is any external schema location specified, and if we are at root,
2200 // go through them first before scanning those specified in the instance document
2201 if (isRoot && fDoSchema
2202 && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
2203
2204 if (fExternalSchemaLocation)
2205 parseSchemaLocation(fExternalSchemaLocation, true);
2206 if (fExternalNoNamespaceSchemaLocation)
2207 resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
2208 }
2209
2210 // Make an initial pass through the list and find any xmlns attributes or
2211 // schema attributes.
2212 if (attCount) {
2213 scanRawAttrListforNameSpaces(attCount);
2214 }
2215
2216 // Also find any default or fixed xmlns attributes in DTD defined for
2217 // this element.
2218 XMLElementDecl* elemDecl = 0;
2219 const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
2220
2221 if (fGrammarType == Grammar::DTDGrammarType) {
2222
2223 if (!fSkipDTDValidation) {
2224 elemDecl = fGrammar->getElemDecl(
2225 fEmptyNamespaceId, 0, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
2226 );
2227
2228 if (elemDecl) {
2229 if (elemDecl->hasAttDefs()) {
2230 XMLAttDefList& attDefList = elemDecl->getAttDefList();
2231 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
2232 {
2233 // Get the current att def, for convenience and its def type
2234 const XMLAttDef& curDef = attDefList.getAttDef(i);
2235 const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
2236
2237 // update the NSMap if there are any default/fixed xmlns attributes
2238 if ((defType == XMLAttDef::Default)
2239 || (defType == XMLAttDef::Fixed))
2240 {
2241 const XMLCh* rawPtr = curDef.getFullName();
2242 if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
2243 || XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
2244 updateNSMap(rawPtr, curDef.getValue());
2245 }
2246 }
2247 }
2248 }
2249 }
2250
2251 if (!elemDecl) {
2252 elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
2253 }
2254 }
2255
2256 // Resolve the qualified name to a URI and name so that we can look up
2257 // the element decl for this element. We have now update the prefix to
2258 // namespace map so we should get the correct element now.
2259 unsigned int uriId = resolveQNameWithColon(
2260 qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos
2261 );
2262
2263 //if schema, check if we should lax or skip the validation of this element
2264 bool parentValidation = fValidate;
2265 if (cv) {
2266 QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
2267 // elementDepth will be > 0, as cv is only constructed if element is not
2268 // root.
2269 laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
2270 }
2271
2272 // Look up the element now in the grammar. This will get us back a
2273 // generic element decl object. We tell him to fault one in if he does
2274 // not find it.
2275 bool wasAdded = false;
2276 const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
2277
2278 if (fDoSchema) {
2279
2280 if (fGrammarType == Grammar::DTDGrammarType) {
2281 if (!switchGrammar(getURIText(uriId))) {
2282 fValidator->emitError(
2283 XMLValid::GrammarNotFound, getURIText(uriId)
2284 );
2285 }
2286 }
2287
2288 if (fGrammarType == Grammar::SchemaGrammarType) {
2289 elemDecl = fGrammar->getElemDecl(
2290 uriId, nameRawBuf, qnameRawBuf, currentScope
2291 );
2292
2293 // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
2294 if (!elemDecl) {
2295 bool checkTopLevel = (currentScope != Grammar::TOP_LEVEL_SCOPE);
2296 const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
2297 unsigned int orgGrammarUri = fURIStringPool->getId(original_uriStr);
2298
2299 if (orgGrammarUri != uriId) {
2300 if (switchGrammar(getURIText(uriId))) {
2301 checkTopLevel = true;
2302 }
2303 else {
2304 // the laxElementValidation routine (called above) will
2305 // set fValidate to false for a "skipped" element
2306 if (!laxThisOne && fValidate) {
2307 fValidator->emitError(
2308 XMLValid::GrammarNotFound, getURIText(uriId)
2309 );
2310 }
2311 checkTopLevel = false;
2312 }
2313 }
2314
2315 if (checkTopLevel) {
2316 elemDecl = fGrammar->getElemDecl(
2317 uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
2318 );
2319 }
2320
2321 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
2322
2323 if (orgGrammarUri == uriId) {
2324 // still not found in specified uri
2325 // try emptyNamespace see if element should be
2326 // un-qualified.
2327 // Use a temp variable until we decide this is the case
2328 if (uriId != fEmptyNamespaceId) {
2329 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
2330 fEmptyNamespaceId, nameRawBuf, qnameRawBuf, currentScope
2331 );
2332
2333 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
2334 fValidator->emitError(
2335 XMLValid::ElementNotUnQualified, qnameRawBuf
2336 );
2337 elemDecl = tempElemDecl;
2338 }
2339 }
2340 }
2341 // still Not found in specified uri
2342 // go to original Grammar again to see if element needs
2343 // to be fully qualified.
2344 // Use a temp variable until we decide this is the case
2345 else if (uriId == fEmptyNamespaceId) {
2346
2347 if (switchGrammar(original_uriStr)) {
2348 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
2349 orgGrammarUri, nameRawBuf, qnameRawBuf, currentScope
2350 );
2351 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
2352 fValidator->emitError(
2353 XMLValid::ElementNotQualified, qnameRawBuf
2354 );
2355 elemDecl = tempElemDecl;
2356 }
2357 }
2358 else if (!laxThisOne && fValidate) {
2359 fValidator->emitError(
2360 XMLValid::GrammarNotFound,original_uriStr
2361 );
2362 }
2363 }
2364 }
2365
2366 if (!elemDecl) {
2367 // still not found
2368 // switch back to original grammar first if necessary
2369 if (orgGrammarUri != uriId) {
2370 switchGrammar(original_uriStr);
2371 }
2372
2373 // look in the list of undeclared elements, as would have been
2374 // done before we made grammars stateless:
2375 elemDecl = fSchemaElemNonDeclPool->getByKey(
2376 nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE
2377 );
2378 }
2379 }
2380 }
2381 }
2382
2383 if (!elemDecl) {
2384
2385 if (fGrammarType == Grammar::DTDGrammarType) {
2386 elemDecl = new (fMemoryManager) DTDElementDecl(
2387 qnameRawBuf, uriId, DTDElementDecl::Any, fMemoryManager
2388 );
2389 elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
2390 }
2391 else if (fGrammarType == Grammar::SchemaGrammarType) {
2392 elemDecl = new (fMemoryManager) SchemaElementDecl(
2393 fPrefixBuf.getRawBuffer(), nameRawBuf, uriId
2394 , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE
2395 , fMemoryManager
2396 );
2397 elemDecl->setId(
2398 fSchemaElemNonDeclPool->put((void*)elemDecl->getBaseName()
2399 , uriId, (int)Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl)
2400 );
2401 }
2402 wasAdded = true;
2403 }
2404
2405 // this info needed for DOMTypeInfo
2406 fPSVIElemContext.fErrorOccurred = false;
2407
2408 // We do something different here according to whether we found the
2409 // element or not.
2410 bool bXsiTypeSet= (fValidator && fGrammarType == Grammar::SchemaGrammarType)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
2411 if (wasAdded)
2412 {
2413 if (laxThisOne && !bXsiTypeSet) {
2414 fValidate = false;
2415 fElemStack.setValidationFlag(fValidate);
2416 }
2417 else if (fValidate)
2418 {
2419 // If validating then emit an error
2420
2421 // This is to tell the reuse Validator that this element was
2422 // faulted-in, was not an element in the grammar pool originally
2423 elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
2424
2425 // xsi:type was specified, don't complain about missing definition
2426 if(!bXsiTypeSet)
2427 {
2428 fValidator->emitError
2429 (
2430 XMLValid::ElementNotDefined
2431 , elemDecl->getFullName()
2432 );
2433
2434 if(fGrammarType == Grammar::SchemaGrammarType)
2435 {
2436 fPSVIElemContext.fErrorOccurred = true;
2437 }
2438 }
2439 }
2440 }
2441 else
2442 {
2443 // If its not marked declared and validating, then emit an error
2444 if (!elemDecl->isDeclared()) {
2445 if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
2446 if(!bXsiTypeSet && fGrammarType == Grammar::SchemaGrammarType) {
2447 fPSVIElemContext.fErrorOccurred = true;
2448 }
2449 }
2450
2451 if (laxThisOne) {
2452 fValidate = false;
2453 fElemStack.setValidationFlag(fValidate);
2454 }
2455 else if (fValidate && !bXsiTypeSet)
2456 {
2457 fValidator->emitError
2458 (
2459 XMLValid::ElementNotDefined
2460 , elemDecl->getFullName()
2461 );
2462 }
2463 }
2464 }
2465
2466 // Now we can update the element stack to set the current element
2467 // decl. We expanded the stack above, but couldn't store the element
2468 // decl because we didn't know it yet.
2469 fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
2470 fElemStack.setCurrentURI(uriId);
2471
2472 if (isRoot)
2473 {
2474 fRootGrammar = fGrammar;
2475 if (fGrammarType == Grammar::SchemaGrammarType && !fRootElemName)
2476 fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
2477 }
2478
2479 if (fGrammarType == Grammar::SchemaGrammarType && fPSVIHandler)
2480 {
2481
2482 fPSVIElemContext.fElemDepth++;
2483 if (elemDecl->isDeclared())
2484 {
2485 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
2486 }
2487 else
2488 {
2489 fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
2490
2491 /******
2492 * While we report an error for historical reasons, this should
2493 * actually result in lax assessment - NG.
2494 if (isRoot && fValidate)
2495 fPSVIElemContext.fErrorOccurred = true;
2496 *****/
2497 }
2498 }
2499
2500 // Validate the element
2501 if (fValidate)
2502 {
2503 fValidator->validateElement(elemDecl);
2504 if (fValidator->handlesSchema())
2505 {
2506 if (((SchemaValidator*) fValidator)->getErrorOccurred())
2507 fPSVIElemContext.fErrorOccurred = true;
2508 }
2509 }
2510
2511 if (fGrammarType == Grammar::SchemaGrammarType) {
2512
2513 // squirrel away the element's QName, so that we can do an efficient
2514 // end-tag match
2515 fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
2516
2517 ComplexTypeInfo* typeinfo = (fValidate)
2518 ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
2519 : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
2520
2521 if (typeinfo) {
2522 currentScope = typeinfo->getScopeDefined();
2523
2524 // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
2525 XMLCh* typeName = typeinfo->getTypeName();
2526 const int comma = XMLString::indexOf(typeName, chComma);
2527 if (comma > 0) {
2528 XMLBuffer prefixBuf(comma+1, fMemoryManager);
2529 prefixBuf.append(typeName, comma);
2530 const XMLCh* uriStr = prefixBuf.getRawBuffer();
2531
2532 bool errorCondition = !switchGrammar(uriStr) && fValidate;
2533 if (errorCondition && !laxThisOne)
2534 {
2535 fValidator->emitError
2536 (
2537 XMLValid::GrammarNotFound
2538 , prefixBuf.getRawBuffer()
2539 );
2540 }
2541 }
2542 else if (comma == 0) {
2543 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
2544 if (errorCondition && !laxThisOne)
2545 {
2546 fValidator->emitError
2547 (
2548 XMLValid::GrammarNotFound
2549 , XMLUni::fgZeroLenString
2550 );
2551 }
2552 }
2553 }
2554 fElemStack.setCurrentScope(currentScope);
2555
2556 // Set element next state
2557 if (elemDepth >= fElemStateSize) {
2558 resizeElemState();
2559 }
2560
2561 fElemState[elemDepth] = 0;
2562 fElemLoopState[elemDepth] = 0;
2563 }
2564
2565 fElemStack.setCurrentGrammar(fGrammar);
2566
2567 // If this is the first element and we are validating, check the root
2568 // element.
2569 if (isRoot)
2570 {
2571 if (fValidate)
2572 {
2573 // If a DocType exists, then check if it matches the root name there.
2574 if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
2575 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
2576 }
2577 }
2578 else if (parentValidation)
2579 {
2580 // If the element stack is not empty, then add this element as a
2581 // child of the previous top element. If its empty, this is the root
2582 // elem and is not the child of anything.
2583 fElemStack.addChild(elemDecl->getElementName(), true);
2584 }
2585
2586 // PSVI handling: even if it turns out there are
2587 // no attributes, we need to reset this list...
2588 if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType )
2589 fPSVIAttrList->reset();
2590
2591 // Now lets get the fAttrList filled in. This involves faulting in any
2592 // defaulted and fixed attributes and normalizing the values of any that
2593 // we got explicitly.
2594 //
2595 // We update the attCount value with the total number of attributes, but
2596 // it goes in with the number of values we got during the raw scan of
2597 // explictly provided attrs above.
2598 attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
2599 if(attCount)
2600 {
2601 // clean up after ourselves:
2602 // clear the map used to detect duplicate attributes
2603 fUndeclaredAttrRegistry->removeAll();
2604 }
2605
2606 // activate identity constraints
2607 if (fGrammar &&
2608 fGrammarType == Grammar::SchemaGrammarType &&
2609 toCheckIdentityConstraint())
2610 {
2611 fICHandler->activateIdentityConstraint
2612 (
2613 (SchemaElementDecl*) elemDecl
2614 , (int) elemDepth
2615 , uriId
2616 , fPrefixBuf.getRawBuffer()
2617 , *fAttrList
2618 , attCount
2619 , fValidationContext
2620 );
2621 }
2622
2623 // Since the element may have default values, call start tag now regardless if it is empty or not
2624 // If we have a document handler, then tell it about this start tag
2625 if (fDocHandler)
2626 {
2627 fDocHandler->startElement
2628 (
2629 *elemDecl
2630 , uriId
2631 , fPrefixBuf.getRawBuffer()
2632 , *fAttrList
2633 , attCount
2634 , false
2635 , isRoot
2636 );
2637 }
2638
2639 // if we have a PSVIHandler, now's the time to call
2640 // its handleAttributesPSVI method:
2641 if(fPSVIHandler && fGrammarType == Grammar::SchemaGrammarType)
2642 {
2643 QName *eName = elemDecl->getElementName();
2644 fPSVIHandler->handleAttributesPSVI
2645 (
2646 eName->getLocalPart()
2647 , fURIStringPool->getValueForId(eName->getURI())
2648 , fPSVIAttrList
2649 );
2650 }
2651
2652 // If empty, validate content right now if we are validating and then
2653 // pop the element stack top. Else, we have to update the current stack
2654 // top's namespace mapping elements.
2655 if (isEmpty)
2656 {
2657 // Pop the element stack back off since it'll never be used now
2658 fElemStack.popTop();
2659
2660 // reset current type info
2661 DatatypeValidator* psviMemberType = 0;
2662 if (fGrammarType == Grammar::SchemaGrammarType)
2663 {
2664 if (fValidate && elemDecl->isDeclared())
2665 {
2666 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
2667 if(!fPSVIElemContext.fCurrentTypeInfo)
2668 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
2669 else
2670 fPSVIElemContext.fCurrentDV = 0;
2671 if(fPSVIHandler)
2672 {
2673 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
2674
2675 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
2676 fPSVIElemContext.fNormalizedValue = 0;
2677 }
2678 }
2679 else
2680 {
2681 fPSVIElemContext.fCurrentDV = 0;
2682 fPSVIElemContext.fCurrentTypeInfo = 0;
2683 fPSVIElemContext.fNormalizedValue = 0;
2684 }
2685 }
2686
2687 // If validating, then insure that its legal to have no content
2688 if (fValidate)
2689 {
2690 XMLSize_t failure;
2691 bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
2692 if (!res)
2693 {
2694 fValidator->emitError
2695 (
2696 XMLValid::ElementNotValidForContent
2697 , elemDecl->getFullName()
2698 , elemDecl->getFormattedContentModel()
2699 );
2700 }
2701
2702 if (fGrammarType == Grammar::SchemaGrammarType) {
2703
2704 if (((SchemaValidator*) fValidator)->getErrorOccurred())
2705 {
2706 fPSVIElemContext.fErrorOccurred = true;
2707 }
2708 else
2709 {
2710 if (fPSVIHandler)
2711 {
2712 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
2713 if(fPSVIElemContext.fIsSpecified)
2714 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
2715 }
2716 // note that if we're empty, won't be a current DV
2717 if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
2718 psviMemberType = fValidationContext->getValidatingMemberType();
2719 }
2720
2721 // call matchers and de-activate context
2722 if (toCheckIdentityConstraint())
2723 {
2724 fICHandler->deactivateContext
2725 (
2726 (SchemaElementDecl *) elemDecl
2727 , fContent.getRawBuffer()
2728 , fValidationContext
2729 , fPSVIElemContext.fCurrentDV
2730 );
2731 }
2732
2733 }
2734 }
2735 else if (fGrammarType == Grammar::SchemaGrammarType) {
2736 ((SchemaValidator*)fValidator)->resetNillable();
2737 }
2738
2739 if (fGrammarType == Grammar::SchemaGrammarType)
2740 {
2741 if (fPSVIHandler)
2742 {
2743 endElementPSVI((SchemaElementDecl*)elemDecl, psviMemberType);
2744 }
2745 }
2746
2747 // If we have a doc handler, tell it about the end tag
2748 if (fDocHandler)
2749 {
2750 fDocHandler->endElement
2751 (
2752 *elemDecl
2753 , uriId
2754 , isRoot
2755 , fPrefixBuf.getRawBuffer()
2756 );
2757 }
2758
2759 // If the elem stack is empty, then it was an empty root
2760 if (isRoot)
2761 gotData = false;
2762 else
2763 {
2764 // Restore the grammar
2765 fGrammar = fElemStack.getCurrentGrammar();
2766 fGrammarType = fGrammar->getGrammarType();
2767 if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
2768 if (fValidatorFromUser)
2769 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
2770 else {
2771 fValidator = fSchemaValidator;
2772 }
2773 }
2774 else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
2775 if (fValidatorFromUser)
2776 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
2777 else {
2778 fValidator = fDTDValidator;
2779 }
2780 }
2781
2782 fValidator->setGrammar(fGrammar);
2783
2784 // Restore the validation flag
2785 fValidate = fElemStack.getValidationFlag();
2786 }
2787 }
2788 else if (fGrammarType == Grammar::SchemaGrammarType)
2789 {
2790 // send a partial element psvi
2791 if (fPSVIHandler)
2792 {
2793
2794 ComplexTypeInfo* curTypeInfo = 0;
2795 DatatypeValidator* curDV = 0;
2796 XSTypeDefinition* typeDef = 0;
2797
2798 if (fValidate && elemDecl->isDeclared())
2799 {
2800 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
2801
2802 if (curTypeInfo)
2803 {
2804 typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
2805 }
2806 else
2807 {
2808 curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
2809
2810 if (curDV)
2811 {
2812 typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
2813 }
2814 }
2815 }
2816
2817 fPSVIElement->reset
2818 (
2819 PSVIElement::VALIDITY_NOTKNOWN
2820 , PSVIElement::VALIDATION_NONE
2821 , fRootElemName
2822 , ((SchemaValidator*) fValidator)->getIsElemSpecified()
2823 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
2824 , typeDef
2825 , 0 //memberType
2826 , fModel
2827 , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
2828 , 0
2829 , 0
2830 , 0
2831 );
2832
2833
2834 fPSVIHandler->handlePartialElementPSVI
2835 (
2836 elemDecl->getBaseName()
2837 , fURIStringPool->getValueForId(elemDecl->getURI())
2838 , fPSVIElement
2839 );
2840
2841 }
2842
2843 // not empty
2844 fErrorStack->push(fPSVIElemContext.fErrorOccurred);
2845 }
2846
2847 return true;
2848 }
2849
2850
2851 // ---------------------------------------------------------------------------
2852 // IGXMLScanner: Helper methos
2853 // ---------------------------------------------------------------------------
resizeElemState()2854 void IGXMLScanner::resizeElemState() {
2855
2856 unsigned int newSize = fElemStateSize * 2;
2857 unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
2858 (
2859 newSize * sizeof(unsigned int)
2860 ); //new unsigned int[newSize];
2861 unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
2862 (
2863 newSize * sizeof(unsigned int)
2864 ); //new unsigned int[newSize];
2865
2866 // Copy the existing values
2867 unsigned int index = 0;
2868 for (; index < fElemStateSize; index++)
2869 {
2870 newElemState[index] = fElemState[index];
2871 newElemLoopState[index] = fElemLoopState[index];
2872 }
2873
2874 for (; index < newSize; index++)
2875 newElemLoopState[index] = newElemState[index] = 0;
2876
2877 // Delete the old array and udpate our members
2878 fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2879 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemState;
2880 fElemState = newElemState;
2881 fElemLoopState = newElemLoopState;
2882 fElemStateSize = newSize;
2883 }
2884
resizeRawAttrColonList()2885 void IGXMLScanner::resizeRawAttrColonList() {
2886
2887 unsigned int newSize = fRawAttrColonListSize * 2;
2888 int* newRawAttrColonList = (int*) fMemoryManager->allocate
2889 (
2890 newSize * sizeof(int)
2891 ); //new int[newSize];
2892
2893 // Copy the existing values
2894 unsigned int index = 0;
2895 for (; index < fRawAttrColonListSize; index++)
2896 newRawAttrColonList[index] = fRawAttrColonList[index];
2897
2898 // Delete the old array and udpate our members
2899 fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
2900 fRawAttrColonList = newRawAttrColonList;
2901 fRawAttrColonListSize = newSize;
2902 }
2903
2904 // ---------------------------------------------------------------------------
2905 // IGXMLScanner: Grammar preparsing
2906 // ---------------------------------------------------------------------------
loadGrammar(const InputSource & src,const short grammarType,const bool toCache)2907 Grammar* IGXMLScanner::loadGrammar(const InputSource& src
2908 , const short grammarType
2909 , const bool toCache)
2910 {
2911 Grammar* loadedGrammar = 0;
2912
2913 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
2914
2915 try
2916 {
2917 fGrammarResolver->cacheGrammarFromParse(false);
2918 // if the new grammar has to be cached, better use the already cached
2919 // grammars, or the an exception will be thrown when caching an already
2920 // cached grammar
2921 fGrammarResolver->useCachedGrammarInParse(toCache);
2922 fRootGrammar = 0;
2923
2924 if (fValScheme == Val_Auto) {
2925 fValidate = true;
2926 }
2927
2928 // Reset some status flags
2929 fInException = false;
2930 fStandalone = false;
2931 fErrorCount = 0;
2932 fHasNoDTD = true;
2933 fSeeXsi = false;
2934
2935 if (grammarType == Grammar::SchemaGrammarType) {
2936 loadedGrammar = loadXMLSchemaGrammar(src, toCache);
2937 }
2938 else if (grammarType == Grammar::DTDGrammarType) {
2939 loadedGrammar = loadDTDGrammar(src, toCache);
2940 }
2941 }
2942 // NOTE:
2943 //
2944 // In all of the error processing below, the emitError() call MUST come
2945 // before the flush of the reader mgr, or it will fail because it tries
2946 // to find out the position in the XML source of the error.
2947 catch(const XMLErrs::Codes)
2948 {
2949 // This is a 'first fatal error' type exit, so fall through
2950 }
2951 catch(const XMLValid::Codes)
2952 {
2953 // This is a 'first fatal error' type exit, so fall through
2954 }
2955 catch(const XMLException& excToCatch)
2956 {
2957 // Emit the error and catch any user exception thrown from here. Make
2958 // sure in all cases we flush the reader manager.
2959 fInException = true;
2960 try
2961 {
2962 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
2963 emitError
2964 (
2965 XMLErrs::XMLException_Warning
2966 , excToCatch.getCode()
2967 , excToCatch.getMessage()
2968 );
2969 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
2970 emitError
2971 (
2972 XMLErrs::XMLException_Fatal
2973 , excToCatch.getCode()
2974 , excToCatch.getMessage()
2975 );
2976 else
2977 emitError
2978 (
2979 XMLErrs::XMLException_Error
2980 , excToCatch.getCode()
2981 , excToCatch.getMessage()
2982 );
2983 }
2984 catch(const OutOfMemoryException&)
2985 {
2986 // This is a special case for out-of-memory
2987 // conditions, because resetting the ReaderMgr
2988 // can be problematic.
2989 resetReaderMgr.release();
2990
2991 throw;
2992 }
2993 }
2994 catch(const OutOfMemoryException&)
2995 {
2996 // This is a special case for out-of-memory
2997 // conditions, because resetting the ReaderMgr
2998 // can be problematic.
2999 resetReaderMgr.release();
3000
3001 throw;
3002 }
3003
3004 return loadedGrammar;
3005 }
3006
resetCachedGrammar()3007 void IGXMLScanner::resetCachedGrammar ()
3008 {
3009 fCachedSchemaInfoList->removeAll ();
3010 }
3011
loadDTDGrammar(const InputSource & src,const bool toCache)3012 Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
3013 const bool toCache)
3014 {
3015 // Reset the validators
3016 fDTDValidator->reset();
3017 if (fValidatorFromUser)
3018 fValidator->reset();
3019
3020 if (!fValidator->handlesDTD()) {
3021 if (fValidatorFromUser && fValidate)
3022 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
3023 else {
3024 fValidator = fDTDValidator;
3025 }
3026 }
3027
3028 fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
3029
3030 if (fDTDGrammar) {
3031 fDTDGrammar->reset();
3032 }
3033 else {
3034 fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
3035 fGrammarResolver->putGrammar(fDTDGrammar);
3036 }
3037
3038 fGrammar = fDTDGrammar;
3039 fGrammarType = fGrammar->getGrammarType();
3040 fValidator->setGrammar(fGrammar);
3041
3042 // And for all installed handlers, send reset events. This gives them
3043 // a chance to flush any cached data.
3044 if (fDocHandler)
3045 fDocHandler->resetDocument();
3046 if (fEntityHandler)
3047 fEntityHandler->resetEntities();
3048 if (fErrorReporter)
3049 fErrorReporter->resetErrors();
3050
3051 // Clear out the id reference list
3052 resetValidationContext();
3053 // and clear out the darned undeclared DTD element pool...
3054 fDTDElemNonDeclPool->removeAll();
3055
3056 if (toCache) {
3057
3058 unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
3059 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
3060
3061 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
3062 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
3063 fGrammarResolver->putGrammar(fGrammar);
3064 }
3065
3066 // Handle the creation of the XML reader object for this input source.
3067 // This will provide us with transcoding and basic lexing services.
3068 XMLReader* newReader = fReaderMgr.createReader
3069 (
3070 src
3071 , false
3072 , XMLReader::RefFrom_NonLiteral
3073 , XMLReader::Type_General
3074 , XMLReader::Source_External
3075 , fCalculateSrcOfs
3076 , fLowWaterMark
3077 );
3078 if (!newReader) {
3079 if (src.getIssueFatalErrorIfNotFound())
3080 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
3081 else
3082 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
3083 }
3084
3085 // In order to make the processing work consistently, we have to
3086 // make this look like an external entity. So create an entity
3087 // decl and fill it in and push it with the reader, as happens
3088 // with an external entity. Put a janitor on it to insure it gets
3089 // cleaned up. The reader manager does not adopt them.
3090 const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
3091 DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
3092 declDTD->setSystemId(src.getSystemId());
3093 declDTD->setIsExternal(true);
3094 Janitor<DTDEntityDecl> janDecl(declDTD);
3095
3096 // Mark this one as a throw at end
3097 newReader->setThrowAtEnd(true);
3098
3099 // And push it onto the stack, with its pseudo name
3100 fReaderMgr.pushReader(newReader, declDTD);
3101
3102 // If we have a doc type handler and advanced callbacks are enabled,
3103 // call the doctype event.
3104 if (fDocTypeHandler) {
3105
3106 // Create a dummy root
3107 DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
3108 (
3109 gDTDStr
3110 , fEmptyNamespaceId
3111 , DTDElementDecl::Any
3112 , fGrammarPoolMemoryManager
3113 );
3114 rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
3115 rootDecl->setExternalElemDeclaration(true);
3116 Janitor<DTDElementDecl> janSrc(rootDecl);
3117
3118 fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
3119 }
3120
3121 // Create DTDScanner
3122 DTDScanner dtdScanner
3123 (
3124 (DTDGrammar*) fGrammar
3125 , fDocTypeHandler
3126 , fGrammarPoolMemoryManager
3127 , fMemoryManager
3128 );
3129 dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
3130
3131 // Tell it its not in an include section
3132 dtdScanner.scanExtSubsetDecl(false, true);
3133
3134 if (fValidate) {
3135 // validate the DTD scan so far
3136 fValidator->preContentValidation(false, true);
3137 }
3138
3139 if (toCache)
3140 fGrammarResolver->cacheGrammars();
3141
3142 return fDTDGrammar;
3143 }
3144
3145 // ---------------------------------------------------------------------------
3146 // IGXMLScanner: Helper methods
3147 // ---------------------------------------------------------------------------
processSchemaLocation(XMLCh * const schemaLoc)3148 void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc)
3149 {
3150 XMLCh* locStr = schemaLoc;
3151 XMLReader* curReader = fReaderMgr.getCurrentReader();
3152
3153 fLocationPairs->removeAllElements();
3154 while (*locStr)
3155 {
3156 do {
3157 // Do we have an escaped character ?
3158 if (*locStr == 0xFFFF)
3159 continue;
3160
3161 if (!curReader->isWhitespace(*locStr))
3162 break;
3163
3164 *locStr = chNull;
3165 } while (*++locStr);
3166
3167 if (*locStr) {
3168
3169 fLocationPairs->addElement(locStr);
3170
3171 while (*++locStr) {
3172 // Do we have an escaped character ?
3173 if (*locStr == 0xFFFF)
3174 continue;
3175 if (curReader->isWhitespace(*locStr))
3176 break;
3177 }
3178 }
3179 }
3180 }
3181
endElementPSVI(SchemaElementDecl * const elemDecl,DatatypeValidator * const memberDV)3182 void IGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
3183 DatatypeValidator* const memberDV)
3184 {
3185 PSVIElement::ASSESSMENT_TYPE validationAttempted;
3186 PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
3187
3188 if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
3189 validationAttempted = PSVIElement::VALIDATION_FULL;
3190 else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
3191 validationAttempted = PSVIElement::VALIDATION_NONE;
3192 else
3193 {
3194 validationAttempted = PSVIElement::VALIDATION_PARTIAL;
3195 fPSVIElemContext.fFullValidationDepth =
3196 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
3197 }
3198
3199 if (fValidate && elemDecl->isDeclared())
3200 {
3201 validity = (fPSVIElemContext.fErrorOccurred)
3202 ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
3203 }
3204
3205 XSTypeDefinition* typeDef = 0;
3206 bool isMixed = false;
3207 if (fPSVIElemContext.fCurrentTypeInfo)
3208 {
3209 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
3210 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
3211 isMixed = (modelType == SchemaElementDecl::Mixed_Simple
3212 || modelType == SchemaElementDecl::Mixed_Complex);
3213 }
3214 else if (fPSVIElemContext.fCurrentDV)
3215 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
3216
3217 XMLCh* canonicalValue = 0;
3218 if (fPSVIElemContext.fNormalizedValue && !isMixed &&
3219 validity == PSVIElement::VALIDITY_VALID)
3220 {
3221 if (memberDV)
3222 canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
3223 else if (fPSVIElemContext.fCurrentDV)
3224 canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
3225 }
3226
3227 fPSVIElement->reset
3228 (
3229 validity
3230 , validationAttempted
3231 , fRootElemName
3232 , fPSVIElemContext.fIsSpecified
3233 , (elemDecl->isDeclared())
3234 ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
3235 , typeDef
3236 , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
3237 , fModel
3238 , elemDecl->getDefaultValue()
3239 , fPSVIElemContext.fNormalizedValue
3240 , canonicalValue
3241 );
3242
3243 fPSVIHandler->handleElementPSVI
3244 (
3245 elemDecl->getBaseName()
3246 , fURIStringPool->getValueForId(elemDecl->getURI())
3247 , fPSVIElement
3248 );
3249
3250 // decrease element depth
3251 fPSVIElemContext.fElemDepth--;
3252
3253 }
3254
resetPSVIElemContext()3255 void IGXMLScanner::resetPSVIElemContext()
3256 {
3257 fPSVIElemContext.fIsSpecified = false;
3258 fPSVIElemContext.fErrorOccurred = false;
3259 fPSVIElemContext.fElemDepth = -1;
3260 fPSVIElemContext.fFullValidationDepth = -1;
3261 fPSVIElemContext.fNoneValidationDepth = -1;
3262 fPSVIElemContext.fCurrentDV = 0;
3263 fPSVIElemContext.fCurrentTypeInfo = 0;
3264 fPSVIElemContext.fNormalizedValue = 0;
3265 }
3266
3267 XERCES_CPP_NAMESPACE_END
3268