1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id: IGXMLScanner.cpp 882548 2009-11-20 13:44:14Z borisk $
20  */
21 
22 // ---------------------------------------------------------------------------
23 //  Includes
24 // ---------------------------------------------------------------------------
25 #include <xercesc/internal/IGXMLScanner.hpp>
26 #include <xercesc/util/RuntimeException.hpp>
27 #include <xercesc/util/UnexpectedEOFException.hpp>
28 #include <xercesc/sax/InputSource.hpp>
29 #include <xercesc/framework/XMLDocumentHandler.hpp>
30 #include <xercesc/framework/XMLEntityHandler.hpp>
31 #include <xercesc/framework/XMLPScanToken.hpp>
32 #include <xercesc/internal/EndOfEntityException.hpp>
33 #include <xercesc/framework/MemoryManager.hpp>
34 #include <xercesc/framework/XMLGrammarPool.hpp>
35 #include <xercesc/framework/XMLDTDDescription.hpp>
36 #include <xercesc/framework/psvi/PSVIElement.hpp>
37 #include <xercesc/framework/psvi/PSVIHandler.hpp>
38 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
39 #include <xercesc/validators/common/GrammarResolver.hpp>
40 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
41 #include <xercesc/validators/DTD/DTDScanner.hpp>
42 #include <xercesc/validators/DTD/DTDValidator.hpp>
43 #include <xercesc/validators/schema/SchemaValidator.hpp>
44 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
45 #include <xercesc/validators/schema/identity/IC_Selector.hpp>
46 #include <xercesc/util/OutOfMemoryException.hpp>
47 
48 XERCES_CPP_NAMESPACE_BEGIN
49 
50 
51 typedef JanitorMemFunCall<IGXMLScanner> CleanupType;
52 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;
53 
54 
55 // ---------------------------------------------------------------------------
56 //  IGXMLScanner: Constructors and Destructor
57 // ---------------------------------------------------------------------------
IGXMLScanner(XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)58 IGXMLScanner::IGXMLScanner( XMLValidator* const  valToAdopt
59                           , GrammarResolver* const grammarResolver
60                           , MemoryManager* const manager) :
61 
62     XMLScanner(valToAdopt, grammarResolver, manager)
63     , fSeeXsi(false)
64     , fGrammarType(Grammar::UnKnown)
65     , fElemStateSize(16)
66     , fElemState(0)
67     , fElemLoopState(0)
68     , fContent(1023, manager)
69     , fRawAttrList(0)
70     , fRawAttrColonListSize(32)
71     , fRawAttrColonList(0)
72     , fDTDValidator(0)
73     , fSchemaValidator(0)
74     , fDTDGrammar(0)
75     , fICHandler(0)
76     , fLocationPairs(0)
77     , fDTDElemNonDeclPool(0)
78     , fSchemaElemNonDeclPool(0)
79     , fElemCount(0)
80     , fAttDefRegistry(0)
81     , fUndeclaredAttrRegistry(0)
82     , fPSVIAttrList(0)
83     , fModel(0)
84     , fPSVIElement(0)
85     , fErrorStack(0)
86     , fSchemaInfoList(0)
87     , fCachedSchemaInfoList (0)
88 {
89     CleanupType cleanup(this, &IGXMLScanner::cleanUp);
90 
91     try
92     {
93         commonInit();
94     }
95     catch(const OutOfMemoryException&)
96     {
97         // Don't cleanup when out of memory, since executing the
98         // code can cause problems.
99         cleanup.release();
100 
101         throw;
102     }
103 
104     cleanup.release();
105 }
106 
IGXMLScanner(XMLDocumentHandler * const docHandler,DocTypeHandler * const docTypeHandler,XMLEntityHandler * const entityHandler,XMLErrorReporter * const errHandler,XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)107 IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler
108                           , DocTypeHandler* const     docTypeHandler
109                           , XMLEntityHandler* const   entityHandler
110                           , XMLErrorReporter* const   errHandler
111                           , XMLValidator* const       valToAdopt
112                           , GrammarResolver* const    grammarResolver
113                           , MemoryManager* const      manager) :
114 
115     XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
116     , fSeeXsi(false)
117     , fGrammarType(Grammar::UnKnown)
118     , fElemStateSize(16)
119     , fElemState(0)
120     , fElemLoopState(0)
121     , fContent(1023, manager)
122     , fRawAttrList(0)
123     , fRawAttrColonListSize(32)
124     , fRawAttrColonList(0)
125     , fDTDValidator(0)
126     , fSchemaValidator(0)
127     , fDTDGrammar(0)
128     , fICHandler(0)
129     , fLocationPairs(0)
130     , fDTDElemNonDeclPool(0)
131     , fSchemaElemNonDeclPool(0)
132     , fElemCount(0)
133     , fAttDefRegistry(0)
134     , fUndeclaredAttrRegistry(0)
135     , fPSVIAttrList(0)
136     , fModel(0)
137     , fPSVIElement(0)
138     , fErrorStack(0)
139     , fSchemaInfoList(0)
140     , fCachedSchemaInfoList (0)
141 {
142     CleanupType cleanup(this, &IGXMLScanner::cleanUp);
143 
144     try
145     {
146         commonInit();
147     }
148     catch(const OutOfMemoryException&)
149     {
150         // Don't cleanup when out of memory, since executing the
151         // code can cause problems.
152         cleanup.release();
153 
154         throw;
155     }
156 
157     cleanup.release();
158 }
159 
~IGXMLScanner()160 IGXMLScanner::~IGXMLScanner()
161 {
162     cleanUp();
163 }
164 
165 // ---------------------------------------------------------------------------
166 //  XMLScanner: Getter methods
167 // ---------------------------------------------------------------------------
getEntityDeclPool()168 NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool()
169 {
170     if(!fDTDGrammar)
171         return 0;
172     return fDTDGrammar->getEntityDeclPool();
173 }
174 
getEntityDeclPool() const175 const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const
176 {
177     if(!fDTDGrammar)
178         return 0;
179     return fDTDGrammar->getEntityDeclPool();
180 }
181 
182 // ---------------------------------------------------------------------------
183 //  IGXMLScanner: Main entry point to scan a document
184 // ---------------------------------------------------------------------------
scanDocument(const InputSource & src)185 void IGXMLScanner::scanDocument(const InputSource& src)
186 {
187     //  Bump up the sequence id for this parser instance. This will invalidate
188     //  any previous progressive scan tokens.
189     fSequenceId++;
190 
191     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
192 
193     try
194     {
195         //  Reset the scanner and its plugged in stuff for a new run. This
196         //  resets all the data structures, creates the initial reader and
197         //  pushes it on the stack, and sets up the base document path.
198         scanReset(src);
199 
200         // If we have a document handler, then call the start document
201         if (fDocHandler)
202             fDocHandler->startDocument();
203 
204         //  Scan the prolog part, which is everything before the root element
205         //  including the DTD subsets.
206         scanProlog();
207 
208         //  If we got to the end of input, then its not a valid XML file.
209         //  Else, go on to scan the content.
210         if (fReaderMgr.atEOF())
211         {
212             emitError(XMLErrs::EmptyMainEntity);
213         }
214         else
215         {
216             // Scan content, and tell it its not an external entity
217             if (scanContent())
218             {
219                 // Do post-parse validation if required
220                 if (fValidate)
221                 {
222                     //  We handle ID reference semantics at this level since
223                     //  its required by XML 1.0.
224                     checkIDRefs();
225 
226                     // Then allow the validator to do any extra stuff it wants
227 //                    fValidator->postParseValidation();
228                 }
229 
230                 // That went ok, so scan for any miscellaneous stuff
231                 if (!fReaderMgr.atEOF())
232                     scanMiscellaneous();
233             }
234         }
235 
236         // If we have a document handler, then call the end document
237         if (fDocHandler)
238             fDocHandler->endDocument();
239 
240         //cargill debug:
241         //fGrammarResolver->getXSModel();
242     }
243     //  NOTE:
244     //
245     //  In all of the error processing below, the emitError() call MUST come
246     //  before the flush of the reader mgr, or it will fail because it tries
247     //  to find out the position in the XML source of the error.
248     catch(const XMLErrs::Codes)
249     {
250         // This is a 'first failure' exception, so fall through
251     }
252     catch(const XMLValid::Codes)
253     {
254         // This is a 'first fatal error' type exit, so fall through
255     }
256     catch(const XMLException& excToCatch)
257     {
258         //  Emit the error and catch any user exception thrown from here. Make
259         //  sure in all cases we flush the reader manager.
260         fInException = true;
261         try
262         {
263             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
264                 emitError
265                 (
266                     XMLErrs::XMLException_Warning
267                     , excToCatch.getCode()
268                     , excToCatch.getMessage()
269                 );
270             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
271                 emitError
272                 (
273                     XMLErrs::XMLException_Fatal
274                     , excToCatch.getCode()
275                     , excToCatch.getMessage()
276                 );
277             else
278                 emitError
279                 (
280                     XMLErrs::XMLException_Error
281                     , excToCatch.getCode()
282                     , excToCatch.getMessage()
283                 );
284         }
285         catch(const OutOfMemoryException&)
286         {
287             // This is a special case for out-of-memory
288             // conditions, because resetting the ReaderMgr
289             // can be problematic.
290             resetReaderMgr.release();
291 
292             throw;
293         }
294     }
295     catch(const OutOfMemoryException&)
296     {
297         // This is a special case for out-of-memory
298         // conditions, because resetting the ReaderMgr
299         // can be problematic.
300         resetReaderMgr.release();
301 
302         throw;
303     }
304 }
305 
306 
scanNext(XMLPScanToken & token)307 bool IGXMLScanner::scanNext(XMLPScanToken& token)
308 {
309     // Make sure this token is still legal
310     if (!isLegalToken(token))
311         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
312 
313     // Find the next token and remember the reader id
314     XMLSize_t orgReader;
315     XMLTokens curToken;
316 
317     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
318 
319     bool retVal = true;
320 
321     try
322     {
323         while (true)
324         {
325             //  We have to handle any end of entity exceptions that happen here.
326             //  We could be at the end of X nested entities, each of which will
327             //  generate an end of entity exception as we try to move forward.
328             try
329             {
330                 curToken = senseNextToken(orgReader);
331                 break;
332             }
333             catch(const EndOfEntityException& toCatch)
334             {
335                 // Send an end of entity reference event
336                 if (fDocHandler)
337                     fDocHandler->endEntityReference(toCatch.getEntity());
338             }
339         }
340 
341         if (curToken == Token_CharData)
342         {
343             scanCharData(fCDataBuf);
344         }
345         else if (curToken == Token_EOF)
346         {
347             if (!fElemStack.isEmpty())
348             {
349                 const ElemStack::StackElem* topElem = fElemStack.popTop();
350                 emitError
351                 (
352                     XMLErrs::EndedWithTagsOnStack
353                     , topElem->fThisElement->getFullName()
354                 );
355             }
356 
357             retVal = false;
358         }
359         else
360         {
361             // Its some sort of markup
362             bool gotData = true;
363             switch(curToken)
364             {
365                 case Token_CData :
366                     // Make sure we are within content
367                     if (fElemStack.isEmpty())
368                         emitError(XMLErrs::CDATAOutsideOfContent);
369                     scanCDSection();
370                     break;
371 
372                 case Token_Comment :
373                     scanComment();
374                     break;
375 
376                 case Token_EndTag :
377                     scanEndTag(gotData);
378                     break;
379 
380                 case Token_PI :
381                     scanPI();
382                     break;
383 
384                 case Token_StartTag :
385                     if (fDoNamespaces)
386                         scanStartTagNS(gotData);
387                     else
388                         scanStartTag(gotData);
389                     break;
390 
391                 default :
392                     fReaderMgr.skipToChar(chOpenAngle);
393                     break;
394             }
395 
396             if (orgReader != fReaderMgr.getCurrentReaderNum())
397                 emitError(XMLErrs::PartialMarkupInEntity);
398 
399             // If we hit the end, then do the miscellaneous part
400             if (!gotData)
401             {
402                 // Do post-parse validation if required
403                 if (fValidate)
404                 {
405                     //  We handle ID reference semantics at this level since
406                     //  its required by XML 1.0.
407                     checkIDRefs();
408 
409                     // Then allow the validator to do any extra stuff it wants
410 //                    fValidator->postParseValidation();
411                 }
412 
413                 // That went ok, so scan for any miscellaneous stuff
414                 scanMiscellaneous();
415 
416                 if (toCheckIdentityConstraint())
417                     fICHandler->endDocument();
418 
419                 if (fDocHandler)
420                     fDocHandler->endDocument();
421             }
422         }
423     }
424     //  NOTE:
425     //
426     //  In all of the error processing below, the emitError() call MUST come
427     //  before the flush of the reader mgr, or it will fail because it tries
428     //  to find out the position in the XML source of the error.
429     catch(const XMLErrs::Codes)
430     {
431         // This is a 'first failure' exception so return failure
432         retVal = false;
433     }
434     catch(const XMLValid::Codes)
435     {
436         // This is a 'first fatal error' type exit, so return failure
437         retVal = false;
438     }
439     catch(const XMLException& excToCatch)
440     {
441         //  Emit the error and catch any user exception thrown from here. Make
442         //  sure in all cases we flush the reader manager.
443         fInException = true;
444         try
445         {
446             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
447                 emitError
448                 (
449                     XMLErrs::XMLException_Warning
450                     , excToCatch.getCode()
451                     , excToCatch.getMessage()
452                 );
453             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
454                 emitError
455                 (
456                     XMLErrs::XMLException_Fatal
457                     , excToCatch.getCode()
458                     , excToCatch.getMessage()
459                 );
460             else
461                 emitError
462                 (
463                     XMLErrs::XMLException_Error
464                     , excToCatch.getCode()
465                     , excToCatch.getMessage()
466                 );
467         }
468         catch(const OutOfMemoryException&)
469         {
470             // This is a special case for out-of-memory
471             // conditions, because resetting the ReaderMgr
472             // can be problematic.
473             resetReaderMgr.release();
474 
475             throw;
476         }
477 
478         retVal = false;
479     }
480     catch(const OutOfMemoryException&)
481     {
482         // This is a special case for out-of-memory
483         // conditions, because resetting the ReaderMgr
484         // can be problematic.
485         resetReaderMgr.release();
486 
487         throw;
488     }
489 
490     // If we are not at the end, release the object that will
491     // reset the ReaderMgr.
492     if (retVal)
493         resetReaderMgr.release();
494 
495     return retVal;
496 }
497 
498 
499 
500 // ---------------------------------------------------------------------------
501 //  IGXMLScanner: Private helper methods. Most of these are implemented in
502 //  IGXMLScanner2.Cpp.
503 // ---------------------------------------------------------------------------
504 
505 //  This method handles the common initialization, to avoid having to do
506 //  it redundantly in multiple constructors.
commonInit()507 void IGXMLScanner::commonInit()
508 {
509 
510     //  Create the element state array
511     fElemState = (unsigned int*) fMemoryManager->allocate
512     (
513         fElemStateSize * sizeof(unsigned int)
514     ); //new unsigned int[fElemStateSize];
515     fElemLoopState = (unsigned int*) fMemoryManager->allocate
516     (
517         fElemStateSize * sizeof(unsigned int)
518     ); //new unsigned int[fElemStateSize];
519 
520     //  And we need one for the raw attribute scan. This just stores key/
521     //  value string pairs (prior to any processing.)
522     fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
523     fRawAttrColonList = (int*) fMemoryManager->allocate
524     (
525         fRawAttrColonListSize * sizeof(int)
526     );
527 
528     //  Create the Validator and init them
529     fDTDValidator = new (fMemoryManager) DTDValidator();
530     initValidator(fDTDValidator);
531     fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
532     initValidator(fSchemaValidator);
533 
534     // Create IdentityConstraint info
535     fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
536 
537     // Create schemaLocation pair info
538     fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
539     // create pools for undeclared elements
540     fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
541     fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
542     fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
543     (
544         131, false, fMemoryManager
545     );
546     fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
547     fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
548 
549     fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
550     fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
551 
552     // use fDTDValidator as the default validator
553     if (!fValidator)
554         fValidator = fDTDValidator;
555 }
556 
cleanUp()557 void IGXMLScanner::cleanUp()
558 {
559     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
560     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
561     delete fRawAttrList;
562     fMemoryManager->deallocate(fRawAttrColonList);
563     delete fDTDValidator;
564     delete fSchemaValidator;
565     delete fICHandler;
566     delete fLocationPairs;
567     delete fDTDElemNonDeclPool;
568     delete fSchemaElemNonDeclPool;
569     delete fAttDefRegistry;
570     delete fUndeclaredAttrRegistry;
571     delete fPSVIAttrList;
572     delete fPSVIElement;
573     delete fErrorStack;
574     delete fSchemaInfoList;
575     delete fCachedSchemaInfoList;
576 }
577 
578 // ---------------------------------------------------------------------------
579 //  IGXMLScanner: Private scanning methods
580 // ---------------------------------------------------------------------------
581 
582 //  This method is called from scanStartTag() to handle the very raw initial
583 //  scan of the attributes. It just fills in the passed collection with
584 //  key/value pairs for each attribute. No processing is done on them at all.
585 XMLSize_t
rawAttrScan(const XMLCh * const elemName,RefVectorOf<KVStringPair> & toFill,bool & isEmpty)586 IGXMLScanner::rawAttrScan(const   XMLCh* const                elemName
587                           ,       RefVectorOf<KVStringPair>&  toFill
588                           ,       bool&                       isEmpty)
589 {
590     //  Keep up with how many attributes we've seen so far, and how many
591     //  elements are available in the vector. This way we can reuse old
592     //  elements until we run out and then expand it.
593     XMLSize_t attCount = 0;
594     XMLSize_t curVecSize = toFill.size();
595 
596     // Assume it is not empty
597     isEmpty = false;
598 
599     //  We loop until we either see a /> or >, handling key/value pairs util
600     //  we get there. We place them in the passed vector, which we will expand
601     //  as required to hold them.
602     while (true)
603     {
604         // Get the next character, which should be non-space
605         XMLCh nextCh = fReaderMgr.peekNextChar();
606 
607         //  If the next character is not a slash or closed angle bracket,
608         //  then it must be whitespace, since whitespace is required
609         //  between the end of the last attribute and the name of the next
610         //  one.
611         //
612         if (attCount)
613         {
614             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
615             {
616                 bool bFoundSpace;
617                 fReaderMgr.skipPastSpaces(bFoundSpace);
618                 if (!bFoundSpace)
619                 {
620                     // Emit the error but keep on going
621                     emitError(XMLErrs::ExpectedWhitespace);
622                 }
623                 // Ok, peek another char
624                 nextCh = fReaderMgr.peekNextChar();
625             }
626         }
627 
628         //  Ok, here we first check for any of the special case characters.
629         //  If its not one, then we do the normal case processing, which
630         //  assumes that we've hit an attribute value, Otherwise, we do all
631         //  the special case checks.
632         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
633         {
634             //  Assume it's going to be an attribute, so get a name from
635             //  the input.
636             int colonPosition;
637             if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
638             {
639                 if (fAttNameBuf.isEmpty())
640                     emitError(XMLErrs::ExpectedAttrName);
641                 else
642                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
643                 fReaderMgr.skipPastChar(chCloseAngle);
644                 return attCount;
645             }
646 
647             const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
648 
649             // And next must be an equal sign
650             if (!scanEq())
651             {
652                 static const XMLCh tmpList[] =
653                 {
654                     chSingleQuote, chDoubleQuote, chCloseAngle
655                     , chOpenAngle, chForwardSlash, chNull
656                 };
657 
658                 emitError(XMLErrs::ExpectedEqSign);
659 
660                 //  Try to sync back up by skipping forward until we either
661                 //  hit something meaningful.
662                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
663 
664                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
665                 {
666                     // Jump back to top for normal processing of these
667                     continue;
668                 }
669                 else if ((chFound == chSingleQuote)
670                       ||  (chFound == chDoubleQuote)
671                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
672                 {
673                     // Just fall through assuming that the value is to follow
674                 }
675                 else if (chFound == chOpenAngle)
676                 {
677                     // Assume a malformed tag and that new one is starting
678                     emitError(XMLErrs::UnterminatedStartTag, elemName);
679                     return attCount;
680                 }
681                 else
682                 {
683                     // Something went really wrong
684                     return attCount;
685                 }
686             }
687 
688             //  Next should be the quoted attribute value. We just do a simple
689             //  and stupid scan of this value. The only thing we do here
690             //  is to expand entity references.
691             if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
692             {
693                 static const XMLCh tmpList[] =
694                 {
695                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
696                 };
697 
698                 emitError(XMLErrs::ExpectedAttrValue);
699 
700                 //  It failed, so lets try to get synced back up. We skip
701                 //  forward until we find some whitespace or one of the
702                 //  chars in our list.
703                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
704 
705                 if ((chFound == chCloseAngle)
706                 ||  (chFound == chForwardSlash)
707                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
708                 {
709                     //  Just fall through and process this attribute, though
710                     //  the value will be "".
711                 }
712                 else if (chFound == chOpenAngle)
713                 {
714                     // Assume a malformed tag and that new one is starting
715                     emitError(XMLErrs::UnterminatedStartTag, elemName);
716                     return attCount;
717                 }
718                 else
719                 {
720                     // Something went really wrong
721                     return attCount;
722                 }
723             }
724 
725             //  And now lets add it to the passed collection. If we have not
726             //  filled it up yet, then we use the next element. Else we add
727             //  a new one.
728             KVStringPair* curPair = 0;
729             if (attCount >= curVecSize)
730             {
731                 curPair = new (fMemoryManager) KVStringPair
732                 (
733                     curAttNameBuf
734                     , fAttNameBuf.getLen()
735                     , fAttValueBuf.getRawBuffer()
736                     , fAttValueBuf.getLen()
737                     , fMemoryManager
738                 );
739                 toFill.addElement(curPair);
740             }
741              else
742             {
743                 curPair = toFill.elementAt(attCount);
744                 curPair->set
745                 (
746                     curAttNameBuf,
747                     fAttNameBuf.getLen(),
748                     fAttValueBuf.getRawBuffer(),
749                     fAttValueBuf.getLen()
750                 );
751             }
752 
753             if (attCount >= fRawAttrColonListSize) {
754                 resizeRawAttrColonList();
755             }
756             // Set the position of the colon and bump the count of attributes we've gotten
757             fRawAttrColonList[attCount++] = colonPosition;
758 
759             // And go to the top again for another attribute
760             continue;
761         }
762 
763         //  It was some special case character so do all of the checks and
764         //  deal with it.
765         if (!nextCh)
766             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
767 
768         if (nextCh == chForwardSlash)
769         {
770             fReaderMgr.getNextChar();
771             isEmpty = true;
772             if (!fReaderMgr.skippedChar(chCloseAngle))
773                 emitError(XMLErrs::UnterminatedStartTag, elemName);
774             break;
775         }
776         else if (nextCh == chCloseAngle)
777         {
778             fReaderMgr.getNextChar();
779             break;
780         }
781         else if (nextCh == chOpenAngle)
782         {
783             //  Check for this one specially, since its going to be common
784             //  and it is kind of auto-recovering since we've already hit the
785             //  next open bracket, which is what we would have seeked to (and
786             //  skipped this whole tag.)
787             emitError(XMLErrs::UnterminatedStartTag, elemName);
788             break;
789         }
790         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
791         {
792             //  Check for this one specially, which is probably a missing
793             //  attribute name, e.g. ="value". Just issue expected name
794             //  error and eat the quoted string, then jump back to the
795             //  top again.
796             emitError(XMLErrs::ExpectedAttrName);
797             fReaderMgr.getNextChar();
798             fReaderMgr.skipQuotedString(nextCh);
799             fReaderMgr.skipPastSpaces();
800             continue;
801         }
802     }
803 
804     return attCount;
805 }
806 
807 
808 //  This method will kick off the scanning of the primary content of the
809 //  document, i.e. the elements.
scanContent()810 bool IGXMLScanner::scanContent()
811 {
812     //  Go into a loop until we hit the end of the root element, or we fall
813     //  out because there is no root element.
814     //
815     //  We have to do kind of a deeply nested double loop here in order to
816     //  avoid doing the setup/teardown of the exception handler on each
817     //  round. Doing it this way we only do it when an exception actually
818     //  occurs.
819     bool gotData = true;
820     bool inMarkup = false;
821     while (gotData)
822     {
823         try
824         {
825             while (gotData)
826             {
827                 //  Sense what the next top level token is. According to what
828                 //  this tells us, we will call something to handle that kind
829                 //  of thing.
830                 XMLSize_t orgReader;
831                 const XMLTokens curToken = senseNextToken(orgReader);
832 
833                 //  Handle character data and end of file specially. Char data
834                 //  is not markup so we don't want to handle it in the loop
835                 //  below.
836                 if (curToken == Token_CharData)
837                 {
838                     //  Scan the character data and call appropriate events. Let
839                     //  him use our local character data buffer for efficiency.
840                     scanCharData(fCDataBuf);
841                     continue;
842                 }
843                 else if (curToken == Token_EOF)
844                 {
845                     //  The element stack better be empty at this point or we
846                     //  ended prematurely before all elements were closed.
847                     if (!fElemStack.isEmpty())
848                     {
849                         const ElemStack::StackElem* topElem = fElemStack.popTop();
850                         emitError
851                         (
852                             XMLErrs::EndedWithTagsOnStack
853                             , topElem->fThisElement->getFullName()
854                         );
855                     }
856 
857                     // Its the end of file, so clear the got data flag
858                     gotData = false;
859                     continue;
860                 }
861 
862                 // We are in some sort of markup now
863                 inMarkup = true;
864 
865                 //  According to the token we got, call the appropriate
866                 //  scanning method.
867                 switch(curToken)
868                 {
869                     case Token_CData :
870                         // Make sure we are within content
871                         if (fElemStack.isEmpty())
872                             emitError(XMLErrs::CDATAOutsideOfContent);
873                         scanCDSection();
874                         break;
875 
876                     case Token_Comment :
877                         scanComment();
878                         break;
879 
880                     case Token_EndTag :
881                         scanEndTag(gotData);
882                         break;
883 
884                     case Token_PI :
885                         scanPI();
886                         break;
887 
888                     case Token_StartTag :
889                         if (fDoNamespaces)
890                             scanStartTagNS(gotData);
891                         else
892                             scanStartTag(gotData);
893                         break;
894 
895                     default :
896                         fReaderMgr.skipToChar(chOpenAngle);
897                         break;
898                 }
899 
900                 if (orgReader != fReaderMgr.getCurrentReaderNum())
901                     emitError(XMLErrs::PartialMarkupInEntity);
902 
903                 // And we are back out of markup again
904                 inMarkup = false;
905             }
906         }
907         catch(const EndOfEntityException& toCatch)
908         {
909             //  If we were in some markup when this happened, then its a
910             //  partial markup error.
911             if (inMarkup)
912                 emitError(XMLErrs::PartialMarkupInEntity);
913 
914             // Send an end of entity reference event
915             if (fDocHandler)
916                 fDocHandler->endEntityReference(toCatch.getEntity());
917 
918             inMarkup = false;
919         }
920     }
921 
922     // It went ok, so return success
923     return true;
924 }
925 
926 
scanEndTag(bool & gotData)927 void IGXMLScanner::scanEndTag(bool& gotData)
928 {
929     //  Assume we will still have data until proven otherwise. It will only
930     //  ever be false if this is the end of the root element.
931     gotData = true;
932 
933     //  Check if the element stack is empty. If so, then this is an unbalanced
934     //  element (i.e. more ends than starts, perhaps because of bad text
935     //  causing one to be skipped.)
936     if (fElemStack.isEmpty())
937     {
938         emitError(XMLErrs::MoreEndThanStartTags);
939         fReaderMgr.skipPastChar(chCloseAngle);
940         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
941     }
942 
943     //  Pop the stack of the element we are supposed to be ending. Remember
944     //  that we don't own this. The stack just keeps them and reuses them.
945     unsigned int uriId = (fDoNamespaces)
946         ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
947 
948     // these get initialized below
949     const ElemStack::StackElem* topElem = 0;
950     const XMLCh *elemName = 0;
951 
952     // Make sure that its the end of the element that we expect
953     // special case for schema validation, whose element decls,
954     // obviously don't contain prefix information
955     if(fGrammarType == Grammar::SchemaGrammarType)
956     {
957         elemName = fElemStack.getCurrentSchemaElemName();
958         topElem = fElemStack.topElement();
959     }
960     else
961     {
962         topElem = fElemStack.topElement();
963         elemName = topElem->fThisElement->getFullName();
964     }
965     if (!fReaderMgr.skippedStringLong(elemName))
966     {
967         emitError
968         (
969             XMLErrs::ExpectedEndOfTagX
970             , elemName
971         );
972         fReaderMgr.skipPastChar(chCloseAngle);
973         fElemStack.popTop();
974         return;
975     }
976 
977     // Make sure we are back on the same reader as where we started
978     if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
979         emitError(XMLErrs::PartialTagMarkupError);
980 
981     // Skip optional whitespace
982     fReaderMgr.skipPastSpaces();
983 
984     // Make sure we find the closing bracket
985     if (!fReaderMgr.skippedChar(chCloseAngle))
986     {
987         emitError
988         (
989             XMLErrs::UnterminatedEndTag
990             , topElem->fThisElement->getFullName()
991         );
992     }
993 
994     if (fGrammarType == Grammar::SchemaGrammarType)
995     {
996         // reset error occurred
997         fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
998         if (fValidate && topElem->fThisElement->isDeclared())
999         {
1000             fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1001             if(!fPSVIElemContext.fCurrentTypeInfo)
1002                 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1003             else
1004                 fPSVIElemContext.fCurrentDV = 0;
1005             if(fPSVIHandler)
1006             {
1007                 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
1008 
1009                 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
1010                     fPSVIElemContext.fNormalizedValue = 0;
1011             }
1012         }
1013         else
1014         {
1015             fPSVIElemContext.fCurrentDV = 0;
1016             fPSVIElemContext.fCurrentTypeInfo = 0;
1017             fPSVIElemContext.fNormalizedValue = 0;
1018         }
1019     }
1020 
1021     //  If validation is enabled, then lets pass him the list of children and
1022     //  this element and let him validate it.
1023     DatatypeValidator* psviMemberType = 0;
1024     if (fValidate)
1025     {
1026 
1027        //
1028        // XML1.0-3rd
1029        // Validity Constraint:
1030        // The declaration matches EMPTY and the element has no content (not even
1031        // entity references, comments, PIs or white space).
1032        //
1033        if ( (fGrammarType == Grammar::DTDGrammarType) &&
1034             (topElem->fCommentOrPISeen)               &&
1035             (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))
1036        {
1037            fValidator->emitError
1038                (
1039                XMLValid::EmptyElemHasContent
1040                , topElem->fThisElement->getFullName()
1041                );
1042        }
1043 
1044        //
1045        // XML1.0-3rd
1046        // Validity Constraint:
1047        //
1048        // The declaration matches children and the sequence of child elements
1049        // belongs to the language generated by the regular expression in the
1050        // content model, with optional white space, comments and PIs
1051        // (i.e. markup matching production [27] Misc) between the start-tag and
1052        // the first child element, between child elements, or between the last
1053        // child element and the end-tag.
1054        //
1055        // Note that
1056        //    a CDATA section containing only white space or
1057        //    a reference to an entity whose replacement text is character references
1058        //       expanding to white space do not match the nonterminal S, and hence
1059        //       cannot appear in these positions; however,
1060        //    a reference to an internal entity with a literal value consisting
1061        //       of character references expanding to white space does match S,
1062        //       since its replacement text is the white space resulting from expansion
1063        //       of the character references.
1064        //
1065        if ( (fGrammarType == Grammar::DTDGrammarType)  &&
1066             (topElem->fReferenceEscaped)               &&
1067             (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))
1068        {
1069            fValidator->emitError
1070                (
1071                XMLValid::ElemChildrenHasInvalidWS
1072                , topElem->fThisElement->getFullName()
1073                );
1074        }
1075         XMLSize_t failure;
1076         bool res = fValidator->checkContent
1077         (
1078             topElem->fThisElement
1079             , topElem->fChildren
1080             , topElem->fChildCount
1081             , &failure
1082         );
1083 
1084         if (!res)
1085         {
1086             //  One of the elements is not valid for the content. NOTE that
1087             //  if no children were provided but the content model requires
1088             //  them, it comes back with a zero value. But we cannot use that
1089             //  to index the child array in this case, and have to put out a
1090             //  special message.
1091             if (!topElem->fChildCount)
1092             {
1093                 fValidator->emitError
1094                 (
1095                     XMLValid::EmptyNotValidForContent
1096                     , topElem->fThisElement->getFormattedContentModel()
1097                 );
1098             }
1099             else if (failure >= topElem->fChildCount)
1100             {
1101                 fValidator->emitError
1102                 (
1103                     XMLValid::NotEnoughElemsForCM
1104                     , topElem->fThisElement->getFormattedContentModel()
1105                 );
1106             }
1107             else
1108             {
1109                 fValidator->emitError
1110                 (
1111                     XMLValid::ElementNotValidForContent
1112                     , topElem->fChildren[failure]->getRawName()
1113                     , topElem->fThisElement->getFormattedContentModel()
1114                 );
1115             }
1116         }
1117 
1118 
1119         if (fGrammarType == Grammar::SchemaGrammarType) {
1120             if (((SchemaValidator*) fValidator)->getErrorOccurred())
1121                 fPSVIElemContext.fErrorOccurred = true;
1122             else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
1123                 psviMemberType = fValidationContext->getValidatingMemberType();
1124 
1125             if (fPSVIHandler)
1126             {
1127                 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
1128                 if(fPSVIElemContext.fIsSpecified)
1129                     fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
1130             }
1131 
1132             // call matchers and de-activate context
1133             if (toCheckIdentityConstraint())
1134             {
1135                 fICHandler->deactivateContext
1136                              (
1137                               (SchemaElementDecl *) topElem->fThisElement
1138                             , fContent.getRawBuffer()
1139                             , fValidationContext
1140                             , fPSVIElemContext.fCurrentDV
1141                              );
1142             }
1143 
1144         }
1145     }
1146 
1147     // QName dv needed topElem to resolve URIs on the checkContent
1148     fElemStack.popTop();
1149 
1150     // See if it was the root element, to avoid multiple calls below
1151     const bool isRoot = fElemStack.isEmpty();
1152 
1153     if (fGrammarType == Grammar::SchemaGrammarType)
1154     {
1155         if (fPSVIHandler)
1156         {
1157             endElementPSVI(
1158                 (SchemaElementDecl*)topElem->fThisElement, psviMemberType);
1159         }
1160         // now we can reset the datatype buffer, since the
1161         // application has had a chance to copy the characters somewhere else
1162         ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
1163     }
1164 
1165     // If we have a doc handler, tell it about the end tag
1166     if (fDocHandler)
1167     {
1168         if (fGrammarType == Grammar::SchemaGrammarType) {
1169             if (topElem->fPrefixColonPos != -1)
1170                 fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
1171             else
1172                 fPrefixBuf.reset();
1173         }
1174         else {
1175             fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
1176         }
1177         fDocHandler->endElement
1178         (
1179             *topElem->fThisElement
1180             , uriId
1181             , isRoot
1182             , fPrefixBuf.getRawBuffer()
1183         );
1184     }
1185 
1186     if (fGrammarType == Grammar::SchemaGrammarType) {
1187         if (!isRoot)
1188         {
1189             // update error information
1190             fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
1191 
1192 
1193         }
1194     }
1195 
1196     // If this was the root, then done with content
1197     gotData = !isRoot;
1198 
1199     if (gotData) {
1200         if (fDoNamespaces) {
1201             // Restore the grammar
1202             fGrammar = fElemStack.getCurrentGrammar();
1203             fGrammarType = fGrammar->getGrammarType();
1204             if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
1205                 if (fValidatorFromUser)
1206                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
1207                 else {
1208                     fValidator = fSchemaValidator;
1209                 }
1210             }
1211             else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
1212                 if (fValidatorFromUser)
1213                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
1214                 else {
1215                     fValidator = fDTDValidator;
1216                 }
1217             }
1218 
1219             fValidator->setGrammar(fGrammar);
1220         }
1221 
1222         // Restore the validation flag
1223         fValidate = fElemStack.getValidationFlag();
1224     }
1225 }
1226 
1227 
1228 //  This method handles the high level logic of scanning the DOCType
1229 //  declaration. This calls the DTDScanner and kicks off both the scanning of
1230 //  the internal subset and the scanning of the external subset, if any.
1231 //
1232 //  When we get here the '<!DOCTYPE' part has already been scanned, which is
1233 //  what told us that we had a doc type decl to parse.
scanDocTypeDecl()1234 void IGXMLScanner::scanDocTypeDecl()
1235 {
1236     //  We have a doc type. So, switch the Grammar.
1237     switchGrammar(XMLUni::fgDTDEntityString);
1238 
1239     if (fDocTypeHandler)
1240         fDocTypeHandler->resetDocType();
1241 
1242     // There must be some space after DOCTYPE
1243     bool skippedSomething;
1244     fReaderMgr.skipPastSpaces(skippedSomething);
1245     if (!skippedSomething)
1246     {
1247         emitError(XMLErrs::ExpectedWhitespace);
1248 
1249         // Just skip the Doctype declaration and return
1250         fReaderMgr.skipPastChar(chCloseAngle);
1251         return;
1252     }
1253 
1254     // Get a buffer for the root element
1255     XMLBufBid bbRootName(&fBufMgr);
1256 
1257     //  Get a name from the input, which should be the name of the root
1258     //  element of the upcoming content.
1259     int  colonPosition;
1260     bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) :
1261                                      fReaderMgr.getName(bbRootName.getBuffer());
1262     if (!validName)
1263     {
1264         if (bbRootName.isEmpty())
1265             emitError(XMLErrs::NoRootElemInDOCTYPE);
1266         else
1267             emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer());
1268         fReaderMgr.skipPastChar(chCloseAngle);
1269         return;
1270     }
1271 
1272     //  Store the root element name for later check
1273     setRootElemName(bbRootName.getRawBuffer());
1274 
1275     //  This element obviously is not going to exist in the element decl
1276     //  pool yet, but we need to call docTypeDecl. So force it into
1277     //  the element decl pool, marked as being there because it was in
1278     //  the DOCTYPE. Later, when its declared, the status will be updated.
1279     //
1280     //  Only do this if we are not reusing the validator! If we are reusing,
1281     //  then look it up instead. It has to exist!
1282     MemoryManager* const  rootDeclMgr =
1283         fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager;
1284 
1285     DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl
1286     (
1287         bbRootName.getRawBuffer()
1288         , fEmptyNamespaceId
1289         , DTDElementDecl::Any
1290         , rootDeclMgr
1291     );
1292 
1293     Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);
1294     rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
1295     rootDecl->setExternalElemDeclaration(true);
1296     if(!fUseCachedGrammar)
1297     {
1298         fGrammar->putElemDecl(rootDecl);
1299         rootDeclJanitor.release();
1300     } else
1301     {
1302         // attach this to the undeclared element pool so that it gets deleted
1303         XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
1304         if (elemDecl)
1305         {
1306             rootDecl->setId(elemDecl->getId());
1307         }
1308         else
1309         {
1310             rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
1311             rootDeclJanitor.release();
1312         }
1313     }
1314 
1315     // Skip any spaces after the name
1316     fReaderMgr.skipPastSpaces();
1317 
1318     //  And now if we are looking at a >, then we are done. It is not
1319     //  required to have an internal or external subset, though why you
1320     //  would not escapes me.
1321     if (fReaderMgr.skippedChar(chCloseAngle)) {
1322 
1323         //  If we have a doc type handler and advanced callbacks are enabled,
1324         //  call the doctype event.
1325         if (fDocTypeHandler)
1326             fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
1327         return;
1328     }
1329 
1330     // either internal/external subset
1331     if (fValScheme == Val_Auto && !fValidate)
1332         fValidate = true;
1333 
1334     bool    hasIntSubset = false;
1335     bool    hasExtSubset = false;
1336     XMLCh*  sysId = 0;
1337     XMLCh*  pubId = 0;
1338 
1339     DTDScanner dtdScanner
1340     (
1341         (DTDGrammar*) fGrammar
1342         , fDocTypeHandler
1343         , fGrammarPoolMemoryManager
1344         , fMemoryManager
1345     );
1346     dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
1347 
1348     //  If the next character is '[' then we have no external subset cause
1349     //  there is no system id, just the opening character of the internal
1350     //  subset. Else, has to be an id.
1351     //
1352     // Just look at the next char, don't eat it.
1353     if (fReaderMgr.peekNextChar() == chOpenSquare)
1354     {
1355         hasIntSubset = true;
1356     }
1357     else
1358     {
1359         // Indicate we have an external subset
1360         hasExtSubset = true;
1361         fHasNoDTD = false;
1362 
1363         // Get buffers for the ids
1364         XMLBufBid bbPubId(&fBufMgr);
1365         XMLBufBid bbSysId(&fBufMgr);
1366 
1367         // Get the external subset id
1368         if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
1369         {
1370             fReaderMgr.skipPastChar(chCloseAngle);
1371             return;
1372         }
1373 
1374         // Get copies of the ids we got
1375         pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
1376         sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
1377 
1378         // Skip spaces and check again for the opening of an internal subset
1379         fReaderMgr.skipPastSpaces();
1380 
1381         // Just look at the next char, don't eat it.
1382         if (fReaderMgr.peekNextChar() == chOpenSquare) {
1383             hasIntSubset = true;
1384         }
1385     }
1386 
1387     // Insure that the ids get cleaned up, if they got allocated
1388     ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
1389     ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
1390 
1391     //  If we have a doc type handler and advanced callbacks are enabled,
1392     //  call the doctype event.
1393     if (fDocTypeHandler)
1394         fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
1395 
1396     //  Ok, if we had an internal subset, we are just past the [ character
1397     //  and need to parse that first.
1398     if (hasIntSubset)
1399     {
1400         // Eat the opening square bracket
1401         fReaderMgr.getNextChar();
1402 
1403         checkInternalDTD(hasExtSubset, sysId, pubId);
1404 
1405         //  And try to scan the internal subset. If we fail, try to recover
1406         //  by skipping forward tot he close angle and returning.
1407         if (!dtdScanner.scanInternalSubset())
1408         {
1409             fReaderMgr.skipPastChar(chCloseAngle);
1410             return;
1411         }
1412 
1413         //  Do a sanity check that some expanded PE did not propogate out of
1414         //  the doctype. This could happen if it was terminated early by bad
1415         //  syntax.
1416         if (fReaderMgr.getReaderDepth() > 1)
1417         {
1418             emitError(XMLErrs::PEPropogated);
1419 
1420             // Ask the reader manager to pop back down to the main level
1421             fReaderMgr.cleanStackBackTo(1);
1422         }
1423 
1424         fReaderMgr.skipPastSpaces();
1425     }
1426 
1427     // And that should leave us at the closing > of the DOCTYPE line
1428     if (!fReaderMgr.skippedChar(chCloseAngle))
1429     {
1430         //  Do a special check for the common scenario of an extra ] char at
1431         //  the end. This is easy to recover from.
1432         if (fReaderMgr.skippedChar(chCloseSquare)
1433         &&  fReaderMgr.skippedChar(chCloseAngle))
1434         {
1435             emitError(XMLErrs::ExtraCloseSquare);
1436         }
1437          else
1438         {
1439             emitError(XMLErrs::UnterminatedDOCTYPE);
1440             fReaderMgr.skipPastChar(chCloseAngle);
1441         }
1442     }
1443 
1444     //  If we had an external subset, then we need to deal with that one
1445     //  next. If we are reusing the validator, then don't scan it.
1446     if (hasExtSubset) {
1447 
1448         InputSource* srcUsed=0;
1449         Janitor<InputSource> janSrc(srcUsed);
1450         // If we had an internal subset and we're using the cached grammar, it
1451         // means that the ignoreCachedDTD is set, so we ignore the cached
1452         // grammar
1453         if (fUseCachedGrammar && !hasIntSubset)
1454         {
1455             srcUsed = resolveSystemId(sysId, pubId);
1456             if (srcUsed) {
1457                 janSrc.reset(srcUsed);
1458                 Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId());
1459 
1460                 if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
1461 
1462                     fDTDGrammar = (DTDGrammar*) grammar;
1463                     fGrammar = fDTDGrammar;
1464                     fValidator->setGrammar(fGrammar);
1465                     // If we don't report at least the external subset boundaries,
1466                     // an advanced document handler cannot know when the DTD end,
1467                     // since we've already sent a doctype decl that indicates there's
1468                     // there's an external subset.
1469                     if (fDocTypeHandler)
1470                     {
1471                         fDocTypeHandler->startExtSubset();
1472                         fDocTypeHandler->endExtSubset();
1473                     }
1474 
1475                     return;
1476                 }
1477             }
1478         }
1479 
1480         if (fLoadExternalDTD || fValidate)
1481         {
1482             // And now create a reader to read this entity
1483             XMLReader* reader;
1484             if (srcUsed) {
1485                 reader = fReaderMgr.createReader
1486                         (
1487                             *srcUsed
1488                             , false
1489                             , XMLReader::RefFrom_NonLiteral
1490                             , XMLReader::Type_General
1491                             , XMLReader::Source_External
1492                             , fCalculateSrcOfs
1493                             , fLowWaterMark
1494                         );
1495             }
1496             else {
1497                 reader = fReaderMgr.createReader
1498                         (
1499                             sysId
1500                             , pubId
1501                             , false
1502                             , XMLReader::RefFrom_NonLiteral
1503                             , XMLReader::Type_General
1504                             , XMLReader::Source_External
1505                             , srcUsed
1506                             , fCalculateSrcOfs
1507                             , fLowWaterMark
1508                             , fDisableDefaultEntityResolution
1509                         );
1510                 janSrc.reset(srcUsed);
1511             }
1512             //  If it failed then throw an exception
1513             if (!reader)
1514                 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager);
1515 
1516             if (fToCacheGrammar) {
1517 
1518                 unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
1519                 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
1520 
1521                 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
1522                 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
1523                 fGrammarResolver->putGrammar(fGrammar);
1524             }
1525 
1526             //  In order to make the processing work consistently, we have to
1527             //  make this look like an external entity. So create an entity
1528             //  decl and fill it in and push it with the reader, as happens
1529             //  with an external entity. Put a janitor on it to insure it gets
1530             //  cleaned up. The reader manager does not adopt them.
1531             const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
1532             DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
1533             declDTD->setSystemId(sysId);
1534             declDTD->setIsExternal(true);
1535             Janitor<DTDEntityDecl> janDecl(declDTD);
1536 
1537             // Mark this one as a throw at end
1538             reader->setThrowAtEnd(true);
1539 
1540             // And push it onto the stack, with its pseudo name
1541             fReaderMgr.pushReader(reader, declDTD);
1542 
1543             // Tell it its not in an include section
1544             dtdScanner.scanExtSubsetDecl(false, true);
1545         }
1546     }
1547 }
1548 
scanStartTag(bool & gotData)1549 bool IGXMLScanner::scanStartTag(bool& gotData)
1550 {
1551     //  Assume we will still have data until proven otherwise. It will only
1552     //  ever be false if this is the root and its empty.
1553     gotData = true;
1554 
1555     //  Get the QName. In this case, we are not doing namespaces, so we just
1556     //  use it as is and don't have to break it into parts.
1557     if (!fReaderMgr.getName(fQNameBuf))
1558     {
1559         emitError(XMLErrs::ExpectedElementName);
1560         fReaderMgr.skipToChar(chOpenAngle);
1561         return false;
1562     }
1563 
1564     // Assume it won't be an empty tag
1565     bool isEmpty = false;
1566 
1567     //  Lets try to look up the element in the validator's element decl pool
1568     //  We can pass bogus values for the URI id and the base name. We know that
1569     //  this can only be called if we are doing a DTD style validator and that
1570     //  he will only look at the QName.
1571     //
1572     //  We tell him to fault in a decl if he does not find one.
1573     //  Actually, we *don't* tell him to fault in a decl if he does not find one- NG
1574     bool wasAdded = false;
1575     const XMLCh *rawQName = fQNameBuf.getRawBuffer();
1576     XMLElementDecl* elemDecl = fGrammar->getElemDecl
1577     (
1578         fEmptyNamespaceId
1579         , 0
1580         , rawQName
1581         , Grammar::TOP_LEVEL_SCOPE
1582     );
1583     // look for it in the undeclared pool:
1584     if(!elemDecl)
1585     {
1586         elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
1587     }
1588     if(!elemDecl)
1589     {
1590         // we're assuming this must be a DTD element.  DTD's can be
1591         // used with or without namespaces, but schemas cannot be used without
1592         // namespaces.
1593         wasAdded = true;
1594         elemDecl = new (fMemoryManager) DTDElementDecl
1595         (
1596             rawQName
1597             , fEmptyNamespaceId
1598             , DTDElementDecl::Any
1599             , fMemoryManager
1600         );
1601         elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
1602     }
1603 
1604     //  We do something different here according to whether we found the
1605     //  element or not.
1606     if (wasAdded)
1607     {
1608         // If validating then emit an error
1609         if (fValidate)
1610         {
1611             // This is to tell the reuse Validator that this element was
1612             // faulted-in, was not an element in the validator pool originally
1613             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
1614 
1615             fValidator->emitError
1616             (
1617                 XMLValid::ElementNotDefined
1618                 , elemDecl->getFullName()
1619             );
1620         }
1621     }
1622     else
1623     {
1624         // If its not marked declared and validating, then emit an error
1625         if (fValidate && !elemDecl->isDeclared())
1626         {
1627             fValidator->emitError
1628             (
1629                 XMLValid::ElementNotDefined
1630                 , elemDecl->getFullName()
1631             );
1632         }
1633     }
1634 
1635     // See if its the root element
1636     const bool isRoot = fElemStack.isEmpty();
1637 
1638     // Expand the element stack and add the new element
1639     fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
1640     fElemStack.setValidationFlag(fValidate);
1641 
1642     //  Validate the element
1643     if (fValidate)
1644         fValidator->validateElement(elemDecl);
1645 
1646     //  If this is the first element and we are validating, check the root
1647     //  element.
1648     if (isRoot)
1649     {
1650         fRootGrammar = fGrammar;
1651 
1652         if (fValidate)
1653         {
1654             //  If a DocType exists, then check if it matches the root name there.
1655             if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName))
1656                 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
1657         }
1658     }
1659     else
1660     {
1661         //  If the element stack is not empty, then add this element as a
1662         //  child of the previous top element. If its empty, this is the root
1663         //  elem and is not the child of anything.
1664         fElemStack.addChild(elemDecl->getElementName(), true);
1665     }
1666 
1667     // Skip any whitespace after the name
1668     fReaderMgr.skipPastSpaces();
1669 
1670     //  We loop until we either see a /> or >, handling attribute/value
1671     //  pairs until we get there.
1672     XMLSize_t    attCount = 0;
1673     XMLSize_t    curAttListSize = fAttrList->size();
1674     wasAdded = false;
1675 
1676     fElemCount++;
1677 
1678     while (true)
1679     {
1680         // And get the next non-space character
1681         XMLCh nextCh = fReaderMgr.peekNextChar();
1682 
1683         //  If the next character is not a slash or closed angle bracket,
1684         //  then it must be whitespace, since whitespace is required
1685         //  between the end of the last attribute and the name of the next
1686         //  one.
1687         if (attCount)
1688         {
1689             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
1690             {
1691                 bool bFoundSpace;
1692                 fReaderMgr.skipPastSpaces(bFoundSpace);
1693                 if (!bFoundSpace)
1694                 {
1695                     // Emit the error but keep on going
1696                     emitError(XMLErrs::ExpectedWhitespace);
1697                 }
1698                 // Ok, peek another char
1699                 nextCh = fReaderMgr.peekNextChar();
1700             }
1701         }
1702 
1703         //  Ok, here we first check for any of the special case characters.
1704         //  If its not one, then we do the normal case processing, which
1705         //  assumes that we've hit an attribute value, Otherwise, we do all
1706         //  the special case checks.
1707         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
1708         {
1709             //  Assume its going to be an attribute, so get a name from
1710             //  the input.
1711             if (!fReaderMgr.getName(fAttNameBuf))
1712             {
1713                 emitError(XMLErrs::ExpectedAttrName);
1714                 fReaderMgr.skipPastChar(chCloseAngle);
1715                 return false;
1716             }
1717 
1718             // And next must be an equal sign
1719             if (!scanEq())
1720             {
1721                 static const XMLCh tmpList[] =
1722                 {
1723                     chSingleQuote, chDoubleQuote, chCloseAngle
1724                     , chOpenAngle, chForwardSlash, chNull
1725                 };
1726 
1727                 emitError(XMLErrs::ExpectedEqSign);
1728 
1729                 //  Try to sync back up by skipping forward until we either
1730                 //  hit something meaningful.
1731                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
1732 
1733                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
1734                 {
1735                     // Jump back to top for normal processing of these
1736                     continue;
1737                 }
1738                 else if ((chFound == chSingleQuote)
1739                       ||  (chFound == chDoubleQuote)
1740                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
1741                 {
1742                     // Just fall through assuming that the value is to follow
1743                 }
1744                 else if (chFound == chOpenAngle)
1745                 {
1746                     // Assume a malformed tag and that new one is starting
1747                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1748                     return false;
1749                 }
1750                 else
1751                 {
1752                     // Something went really wrong
1753                     return false;
1754                 }
1755             }
1756             //  See if this attribute is declared for this element. If we are
1757             //  not validating of course it will not be at first, but we will
1758             //  fault it into the pool (to avoid lots of redundant errors.)
1759             XMLCh * namePtr = fAttNameBuf.getRawBuffer();
1760             XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
1761 
1762             //  Add this attribute to the attribute list that we use to
1763             //  pass them to the handler. We reuse its existing elements
1764             //  but expand it as required.
1765             // Note that we want to this first since this will
1766             // make a copy of the namePtr; we can then make use of
1767             // that copy in the hashtable lookup that checks
1768             // for duplicates.  This will mean we may have to update
1769             // the type of the XMLAttr later.
1770             XMLAttr* curAtt;
1771             if (attCount >= curAttListSize)
1772             {
1773                 curAtt = new (fMemoryManager) XMLAttr
1774                 (
1775                     0
1776                     , namePtr
1777                     , XMLUni::fgZeroLenString
1778                     , XMLUni::fgZeroLenString
1779                     , (attDef)?attDef->getType():XMLAttDef::CData
1780                     , true
1781                     , fMemoryManager
1782                 );
1783                 fAttrList->addElement(curAtt);
1784             }
1785             else
1786             {
1787                 curAtt = fAttrList->elementAt(attCount);
1788                 curAtt->set
1789                 (
1790                     0
1791                     , namePtr
1792                     , XMLUni::fgZeroLenString
1793                     , XMLUni::fgZeroLenString
1794                     , (attDef)?attDef->getType():XMLAttDef::CData
1795                 );
1796                 curAtt->setSpecified(true);
1797             }
1798             // reset namePtr so it refers to newly-allocated memory
1799             namePtr = (XMLCh *)curAtt->getName();
1800 
1801             if (!attDef)
1802             {
1803                 //  If there is a validation handler, then we are validating
1804                 //  so emit an error.
1805                 if (fValidate)
1806                 {
1807                     fValidator->emitError
1808                     (
1809                         XMLValid::AttNotDefinedForElement
1810                         , fAttNameBuf.getRawBuffer()
1811                         , elemDecl->getFullName()
1812                     );
1813                 }
1814                 if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
1815                 {
1816                     emitError
1817                     (
1818                         XMLErrs::AttrAlreadyUsedInSTag
1819                         , namePtr
1820                         , elemDecl->getFullName()
1821                      );
1822                 }
1823             }
1824             else
1825             {
1826                 // prepare for duplicate detection
1827                 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
1828                 if(!curCountPtr)
1829                 {
1830                     curCountPtr = getNewUIntPtr();
1831                     *curCountPtr = fElemCount;
1832                     fAttDefRegistry->put(attDef, curCountPtr);
1833                 }
1834                 else if(*curCountPtr < fElemCount)
1835                     *curCountPtr = fElemCount;
1836                 else
1837                 {
1838                     emitError
1839                     (
1840                         XMLErrs::AttrAlreadyUsedInSTag
1841                         , attDef->getFullName()
1842                         , elemDecl->getFullName()
1843                     );
1844                 }
1845             }
1846 
1847             //  Skip any whitespace before the value and then scan the att
1848             //  value. This will come back normalized with entity refs and
1849             //  char refs expanded.
1850             fReaderMgr.skipPastSpaces();
1851             if (!scanAttValue(attDef, namePtr, fAttValueBuf))
1852             {
1853                 static const XMLCh tmpList[] =
1854                 {
1855                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
1856                 };
1857 
1858                 emitError(XMLErrs::ExpectedAttrValue);
1859 
1860                 //  It failed, so lets try to get synced back up. We skip
1861                 //  forward until we find some whitespace or one of the
1862                 //  chars in our list.
1863                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
1864 
1865                 if ((chFound == chCloseAngle)
1866                 ||  (chFound == chForwardSlash)
1867                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
1868                 {
1869                     //  Just fall through and process this attribute, though
1870                     //  the value will be "".
1871                 }
1872                 else if (chFound == chOpenAngle)
1873                 {
1874                     // Assume a malformed tag and that new one is starting
1875                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1876                     return false;
1877                 }
1878                 else
1879                 {
1880                     // Something went really wrong
1881                     return false;
1882                 }
1883             }
1884             // must set the newly-minted value on the XMLAttr:
1885             curAtt->setValue(fAttValueBuf.getRawBuffer());
1886 
1887             //  Now that its all stretched out, lets look at its type and
1888             //  determine if it has a valid value. It will output any needed
1889             //  errors, but we just keep going. We only need to do this if
1890             //  we are validating.
1891             if (attDef)
1892             {
1893                 // Let the validator pass judgement on the attribute value
1894                 if (fValidate)
1895                 {
1896                     fValidator->validateAttrValue
1897                     (
1898                         attDef
1899                         , fAttValueBuf.getRawBuffer()
1900                         , false
1901                         , elemDecl
1902                     );
1903                 }
1904             }
1905 
1906             attCount++;
1907             // And jump back to the top of the loop
1908             continue;
1909         }
1910 
1911         //  It was some special case character so do all of the checks and
1912         //  deal with it.
1913         if (!nextCh)
1914             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
1915 
1916         if (nextCh == chForwardSlash)
1917         {
1918             fReaderMgr.getNextChar();
1919             isEmpty = true;
1920             if (!fReaderMgr.skippedChar(chCloseAngle))
1921                 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1922             break;
1923         }
1924         else if (nextCh == chCloseAngle)
1925         {
1926             fReaderMgr.getNextChar();
1927             break;
1928         }
1929         else if (nextCh == chOpenAngle)
1930         {
1931             //  Check for this one specially, since its going to be common
1932             //  and it is kind of auto-recovering since we've already hit the
1933             //  next open bracket, which is what we would have seeked to (and
1934             //  skipped this whole tag.)
1935             emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
1936             break;
1937         }
1938         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
1939         {
1940             //  Check for this one specially, which is probably a missing
1941             //  attribute name, e.g. ="value". Just issue expected name
1942             //  error and eat the quoted string, then jump back to the
1943             //  top again.
1944             emitError(XMLErrs::ExpectedAttrName);
1945             fReaderMgr.getNextChar();
1946             fReaderMgr.skipQuotedString(nextCh);
1947             fReaderMgr.skipPastSpaces();
1948             continue;
1949         }
1950     }
1951 
1952     if(attCount)
1953     {
1954         // clean up after ourselves:
1955         // clear the map used to detect duplicate attributes
1956         fUndeclaredAttrRegistry->removeAll();
1957     }
1958 
1959     //  Ok, so lets get an enumerator for the attributes of this element
1960     //  and run through them for well formedness and validity checks. But
1961     //  make sure that we had any attributes before we do it, since the list
1962     //  would have have gotten faulted in anyway.
1963     if (elemDecl->hasAttDefs())
1964     {
1965         // N.B.:  this assumes DTD validation.
1966         XMLAttDefList& attDefList = elemDecl->getAttDefList();
1967         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
1968         {
1969             // Get the current att def, for convenience and its def type
1970             const XMLAttDef& curDef = attDefList.getAttDef(i);
1971             const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
1972 
1973             unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
1974             if (!attCountPtr || *attCountPtr < fElemCount)
1975             { // did not occur
1976                 if (fValidate)
1977                 {
1978                     // If we are validating and its required, then an error
1979                     if (defType == XMLAttDef::Required)
1980                     {
1981                         fValidator->emitError
1982                         (
1983                             XMLValid::RequiredAttrNotProvided
1984                             , curDef.getFullName()
1985                         );
1986                     }
1987                     else if ((defType == XMLAttDef::Default) ||
1988 		                       (defType == XMLAttDef::Fixed)  )
1989                     {
1990                         if (fStandalone && curDef.isExternal())
1991                         {
1992                             // XML 1.0 Section 2.9
1993                             // Document is standalone, so attributes must not be defaulted.
1994                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
1995 
1996                         }
1997                     }
1998                 }
1999 
2000                 // Fault in the value if needed, and bump the att count
2001                 if ((defType == XMLAttDef::Default)
2002                 ||  (defType == XMLAttDef::Fixed))
2003                 {
2004                     // Let the validator pass judgement on the attribute value
2005                     if (fValidate)
2006                     {
2007                         fValidator->validateAttrValue
2008                         (
2009                             &curDef
2010                             , curDef.getValue()
2011                             , false
2012                             , elemDecl
2013                         );
2014                     }
2015 
2016                     XMLAttr* curAtt;
2017                     if (attCount >= curAttListSize)
2018                     {
2019                         curAtt = new (fMemoryManager) XMLAttr
2020                         (
2021                             0
2022                             , curDef.getFullName()
2023                             , XMLUni::fgZeroLenString
2024                             , curDef.getValue()
2025                             , curDef.getType()
2026                             , false
2027                             , fMemoryManager
2028                         );
2029                         fAttrList->addElement(curAtt);
2030                         curAttListSize++;
2031                     }
2032                     else
2033                     {
2034                         curAtt = fAttrList->elementAt(attCount);
2035                         curAtt->set
2036                         (
2037                             0
2038                             , curDef.getFullName()
2039                             , XMLUni::fgZeroLenString
2040                             , curDef.getValue()
2041                             , curDef.getType()
2042                         );
2043                         curAtt->setSpecified(false);
2044                     }
2045                     attCount++;
2046                 }
2047             }
2048         }
2049     }
2050 
2051     //  If empty, validate content right now if we are validating and then
2052     //  pop the element stack top. Else, we have to update the current stack
2053     //  top's namespace mapping elements.
2054     if (isEmpty)
2055     {
2056         // If validating, then insure that its legal to have no content
2057         if (fValidate)
2058         {
2059             XMLSize_t failure;
2060             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
2061             if (!res)
2062             {
2063                 fValidator->emitError
2064                 (
2065                     XMLValid::ElementNotValidForContent
2066                     , elemDecl->getFullName()
2067                     , elemDecl->getFormattedContentModel()
2068                 );
2069             }
2070         }
2071 
2072         // Pop the element stack back off since it'll never be used now
2073         fElemStack.popTop();
2074 
2075         // If the elem stack is empty, then it was an empty root
2076         if (isRoot)
2077             gotData = false;
2078         else {
2079             // Restore the validation flag
2080             fValidate = fElemStack.getValidationFlag();
2081         }
2082     }
2083 
2084     //  If we have a document handler, then tell it about this start tag. We
2085     //  don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
2086     //  any prefix since its just one big name if we are not doing namespaces.
2087     if (fDocHandler)
2088     {
2089         fDocHandler->startElement
2090         (
2091             *elemDecl
2092             , fEmptyNamespaceId
2093             , 0
2094             , *fAttrList
2095             , attCount
2096             , isEmpty
2097             , isRoot
2098         );
2099     }
2100 
2101     return true;
2102 }
2103 
2104 
2105 //  This method is called to scan a start tag when we are processing
2106 //  namespaces. There are two different versions of this method, one for
2107 //  namespace aware processing and one for non-namespace aware processing.
2108 //
2109 //  This method is called after we've scanned the < of a start tag. So we
2110 //  have to get the element name, then scan the attributes, after which
2111 //  we are either going to see >, />, or attributes followed by one of those
2112 //  sequences.
scanStartTagNS(bool & gotData)2113 bool IGXMLScanner::scanStartTagNS(bool& gotData)
2114 {
2115     //  Assume we will still have data until proven otherwise. It will only
2116     //  ever be false if this is the root and its empty.
2117     gotData = true;
2118 
2119     // Reset element content buffer
2120     fContent.reset();
2121 
2122     //  The current position is after the open bracket, so we need to read in
2123     //  in the element name.
2124     int prefixColonPos;
2125     if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
2126     {
2127         if (fQNameBuf.isEmpty())
2128             emitError(XMLErrs::ExpectedElementName);
2129         else
2130             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
2131         fReaderMgr.skipToChar(chOpenAngle);
2132         return false;
2133     }
2134 
2135     // See if its the root element
2136     const bool isRoot = fElemStack.isEmpty();
2137 
2138     // Skip any whitespace after the name
2139     fReaderMgr.skipPastSpaces();
2140 
2141     //  First we have to do the rawest attribute scan. We don't do any
2142     //  normalization of them at all, since we don't know yet what type they
2143     //  might be (since we need the element decl in order to do that.)
2144     bool isEmpty;
2145     XMLSize_t attCount = rawAttrScan
2146     (
2147         fQNameBuf.getRawBuffer()
2148         , *fRawAttrList
2149         , isEmpty
2150     );
2151 
2152     // save the contentleafname and currentscope before addlevel, for later use
2153     ContentLeafNameTypeVector* cv = 0;
2154     XMLContentModel* cm = 0;
2155     unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
2156     bool laxThisOne = false;
2157 
2158     if (!isRoot && fGrammarType == Grammar::SchemaGrammarType)
2159     {
2160         // schema validator will have correct type if validating
2161         SchemaElementDecl* tempElement = (SchemaElementDecl*)
2162             fElemStack.topElement()->fThisElement;
2163         SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
2164         ComplexTypeInfo *currType = 0;
2165 
2166         if (fValidate)
2167         {
2168             currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
2169             if (currType)
2170                 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
2171             else // something must have gone wrong
2172                 modelType = SchemaElementDecl::Any;
2173         }
2174         else
2175         {
2176             currType = tempElement->getComplexTypeInfo();
2177         }
2178 
2179         if ((modelType == SchemaElementDecl::Mixed_Simple)
2180           ||  (modelType == SchemaElementDecl::Mixed_Complex)
2181           ||  (modelType == SchemaElementDecl::Children))
2182         {
2183             cm = currType->getContentModel();
2184             cv = cm->getContentLeafNameTypeVector();
2185             currentScope = fElemStack.getCurrentScope();
2186         }
2187         else if (modelType == SchemaElementDecl::Any) {
2188             laxThisOne = true;
2189         }
2190     }
2191 
2192     //  Now, since we might have to update the namespace map for this element,
2193     //  but we don't have the element decl yet, we just tell the element stack
2194     //  to expand up to get ready.
2195     XMLSize_t elemDepth = fElemStack.addLevel();
2196     fElemStack.setValidationFlag(fValidate);
2197     fElemStack.setPrefixColonPos(prefixColonPos);
2198 
2199     //  Check if there is any external schema location specified, and if we are at root,
2200     //  go through them first before scanning those specified in the instance document
2201     if (isRoot && fDoSchema
2202         && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
2203 
2204         if (fExternalSchemaLocation)
2205             parseSchemaLocation(fExternalSchemaLocation, true);
2206         if (fExternalNoNamespaceSchemaLocation)
2207             resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
2208     }
2209 
2210     //  Make an initial pass through the list and find any xmlns attributes or
2211     //  schema attributes.
2212     if (attCount) {
2213         scanRawAttrListforNameSpaces(attCount);
2214     }
2215 
2216     //  Also find any default or fixed xmlns attributes in DTD defined for
2217     //  this element.
2218     XMLElementDecl* elemDecl = 0;
2219     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
2220 
2221     if (fGrammarType == Grammar::DTDGrammarType) {
2222 
2223         if (!fSkipDTDValidation) {
2224             elemDecl = fGrammar->getElemDecl(
2225                 fEmptyNamespaceId, 0, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
2226             );
2227 
2228             if (elemDecl) {
2229                 if (elemDecl->hasAttDefs()) {
2230                     XMLAttDefList& attDefList = elemDecl->getAttDefList();
2231                     for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
2232                     {
2233                         // Get the current att def, for convenience and its def type
2234                         const XMLAttDef& curDef = attDefList.getAttDef(i);
2235                         const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
2236 
2237                         // update the NSMap if there are any default/fixed xmlns attributes
2238                         if ((defType == XMLAttDef::Default)
2239                         ||  (defType == XMLAttDef::Fixed))
2240                         {
2241                             const XMLCh* rawPtr = curDef.getFullName();
2242                             if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
2243                             ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
2244                                 updateNSMap(rawPtr, curDef.getValue());
2245                         }
2246                     }
2247                 }
2248             }
2249         }
2250 
2251         if (!elemDecl) {
2252             elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
2253         }
2254     }
2255 
2256     //  Resolve the qualified name to a URI and name so that we can look up
2257     //  the element decl for this element. We have now update the prefix to
2258     //  namespace map so we should get the correct element now.
2259     unsigned int uriId = resolveQNameWithColon(
2260         qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos
2261     );
2262 
2263     //if schema, check if we should lax or skip the validation of this element
2264     bool parentValidation = fValidate;
2265     if (cv) {
2266         QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
2267         // elementDepth will be > 0, as cv is only constructed if element is not
2268         // root.
2269         laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
2270     }
2271 
2272     //  Look up the element now in the grammar. This will get us back a
2273     //  generic element decl object. We tell him to fault one in if he does
2274     //  not find it.
2275     bool wasAdded = false;
2276     const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
2277 
2278     if (fDoSchema) {
2279 
2280         if (fGrammarType == Grammar::DTDGrammarType) {
2281             if (!switchGrammar(getURIText(uriId))) {
2282                 fValidator->emitError(
2283                     XMLValid::GrammarNotFound, getURIText(uriId)
2284                 );
2285             }
2286         }
2287 
2288         if (fGrammarType == Grammar::SchemaGrammarType) {
2289             elemDecl = fGrammar->getElemDecl(
2290                 uriId, nameRawBuf, qnameRawBuf, currentScope
2291             );
2292 
2293             // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
2294             if (!elemDecl) {
2295                 bool checkTopLevel = (currentScope != Grammar::TOP_LEVEL_SCOPE);
2296                 const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
2297                 unsigned int orgGrammarUri = fURIStringPool->getId(original_uriStr);
2298 
2299                 if (orgGrammarUri != uriId) {
2300                     if (switchGrammar(getURIText(uriId))) {
2301                         checkTopLevel = true;
2302                     }
2303                     else {
2304                         // the laxElementValidation routine (called above) will
2305                         // set fValidate to false for a "skipped" element
2306                         if (!laxThisOne && fValidate) {
2307                             fValidator->emitError(
2308                                 XMLValid::GrammarNotFound, getURIText(uriId)
2309                             );
2310                         }
2311                         checkTopLevel = false;
2312                     }
2313                 }
2314 
2315                 if (checkTopLevel) {
2316                     elemDecl = fGrammar->getElemDecl(
2317                         uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
2318                     );
2319                 }
2320 
2321                 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
2322 
2323                     if (orgGrammarUri == uriId) {
2324                         // still not found in specified uri
2325                         // try emptyNamespace see if element should be
2326                         // un-qualified.
2327                         // Use a temp variable until we decide this is the case
2328                         if (uriId != fEmptyNamespaceId) {
2329                             XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
2330                                 fEmptyNamespaceId, nameRawBuf, qnameRawBuf, currentScope
2331                             );
2332 
2333                             if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
2334                                 fValidator->emitError(
2335                                     XMLValid::ElementNotUnQualified, qnameRawBuf
2336                                 );
2337                                 elemDecl = tempElemDecl;
2338                             }
2339                         }
2340                     }
2341                     // still Not found in specified uri
2342                     // go to original Grammar again to see if element needs
2343                     // to be fully qualified.
2344                     // Use a temp variable until we decide this is the case
2345                     else if (uriId == fEmptyNamespaceId) {
2346 
2347                         if (switchGrammar(original_uriStr)) {
2348                             XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
2349                                 orgGrammarUri, nameRawBuf, qnameRawBuf, currentScope
2350                             );
2351                             if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
2352                                 fValidator->emitError(
2353                                     XMLValid::ElementNotQualified, qnameRawBuf
2354                                 );
2355                                 elemDecl = tempElemDecl;
2356                             }
2357                         }
2358                         else if (!laxThisOne && fValidate) {
2359                             fValidator->emitError(
2360                                 XMLValid::GrammarNotFound,original_uriStr
2361                             );
2362                         }
2363                     }
2364                 }
2365 
2366                 if (!elemDecl) {
2367                     // still not found
2368                     // switch back to original grammar first if necessary
2369                     if (orgGrammarUri != uriId) {
2370                         switchGrammar(original_uriStr);
2371                     }
2372 
2373                     // look in the list of undeclared elements, as would have been
2374                     // done before we made grammars stateless:
2375                     elemDecl = fSchemaElemNonDeclPool->getByKey(
2376                         nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE
2377                     );
2378                 }
2379             }
2380         }
2381     }
2382 
2383     if (!elemDecl) {
2384 
2385         if (fGrammarType == Grammar::DTDGrammarType) {
2386             elemDecl = new (fMemoryManager) DTDElementDecl(
2387                 qnameRawBuf, uriId, DTDElementDecl::Any, fMemoryManager
2388             );
2389             elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
2390         }
2391         else if (fGrammarType == Grammar::SchemaGrammarType)  {
2392             elemDecl = new (fMemoryManager) SchemaElementDecl(
2393                 fPrefixBuf.getRawBuffer(), nameRawBuf, uriId
2394                 , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE
2395                 , fMemoryManager
2396             );
2397             elemDecl->setId(
2398                 fSchemaElemNonDeclPool->put((void*)elemDecl->getBaseName()
2399                 , uriId, (int)Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl)
2400             );
2401         }
2402         wasAdded = true;
2403     }
2404 
2405     // this info needed for DOMTypeInfo
2406     fPSVIElemContext.fErrorOccurred = false;
2407 
2408     //  We do something different here according to whether we found the
2409     //  element or not.
2410     bool bXsiTypeSet= (fValidator && fGrammarType == Grammar::SchemaGrammarType)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
2411     if (wasAdded)
2412     {
2413         if (laxThisOne && !bXsiTypeSet) {
2414             fValidate = false;
2415             fElemStack.setValidationFlag(fValidate);
2416         }
2417         else if (fValidate)
2418         {
2419             // If validating then emit an error
2420 
2421             // This is to tell the reuse Validator that this element was
2422             // faulted-in, was not an element in the grammar pool originally
2423             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
2424 
2425             // xsi:type was specified, don't complain about missing definition
2426             if(!bXsiTypeSet)
2427             {
2428                 fValidator->emitError
2429                 (
2430                     XMLValid::ElementNotDefined
2431                     , elemDecl->getFullName()
2432                 );
2433 
2434                 if(fGrammarType == Grammar::SchemaGrammarType)
2435                 {
2436                     fPSVIElemContext.fErrorOccurred = true;
2437                 }
2438             }
2439         }
2440     }
2441     else
2442     {
2443         // If its not marked declared and validating, then emit an error
2444         if (!elemDecl->isDeclared()) {
2445             if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
2446                 if(!bXsiTypeSet && fGrammarType == Grammar::SchemaGrammarType) {
2447                     fPSVIElemContext.fErrorOccurred = true;
2448                 }
2449             }
2450 
2451             if (laxThisOne) {
2452                 fValidate = false;
2453                 fElemStack.setValidationFlag(fValidate);
2454             }
2455             else if (fValidate && !bXsiTypeSet)
2456             {
2457                 fValidator->emitError
2458                 (
2459                     XMLValid::ElementNotDefined
2460                     , elemDecl->getFullName()
2461                 );
2462             }
2463         }
2464     }
2465 
2466     //  Now we can update the element stack to set the current element
2467     //  decl. We expanded the stack above, but couldn't store the element
2468     //  decl because we didn't know it yet.
2469     fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
2470     fElemStack.setCurrentURI(uriId);
2471 
2472     if (isRoot)
2473     {
2474         fRootGrammar = fGrammar;
2475         if (fGrammarType == Grammar::SchemaGrammarType && !fRootElemName)
2476             fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
2477     }
2478 
2479     if (fGrammarType == Grammar::SchemaGrammarType && fPSVIHandler)
2480     {
2481 
2482         fPSVIElemContext.fElemDepth++;
2483         if (elemDecl->isDeclared())
2484         {
2485             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
2486         }
2487         else
2488         {
2489             fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
2490 
2491             /******
2492              * While we report an error for historical reasons, this should
2493              * actually result in lax assessment - NG.
2494             if (isRoot && fValidate)
2495                 fPSVIElemContext.fErrorOccurred = true;
2496             *****/
2497         }
2498     }
2499 
2500     //  Validate the element
2501     if (fValidate)
2502     {
2503         fValidator->validateElement(elemDecl);
2504         if (fValidator->handlesSchema())
2505         {
2506             if (((SchemaValidator*) fValidator)->getErrorOccurred())
2507                 fPSVIElemContext.fErrorOccurred = true;
2508         }
2509     }
2510 
2511     if (fGrammarType == Grammar::SchemaGrammarType) {
2512 
2513         // squirrel away the element's QName, so that we can do an efficient
2514         // end-tag match
2515         fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
2516 
2517         ComplexTypeInfo* typeinfo = (fValidate)
2518             ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
2519             : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
2520 
2521         if (typeinfo) {
2522             currentScope = typeinfo->getScopeDefined();
2523 
2524             // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
2525             XMLCh* typeName = typeinfo->getTypeName();
2526             const int comma = XMLString::indexOf(typeName, chComma);
2527             if (comma > 0) {
2528                 XMLBuffer prefixBuf(comma+1, fMemoryManager);
2529                 prefixBuf.append(typeName, comma);
2530                 const XMLCh* uriStr = prefixBuf.getRawBuffer();
2531 
2532                 bool errorCondition = !switchGrammar(uriStr) && fValidate;
2533                 if (errorCondition && !laxThisOne)
2534                 {
2535                     fValidator->emitError
2536                     (
2537                         XMLValid::GrammarNotFound
2538                         , prefixBuf.getRawBuffer()
2539                     );
2540                 }
2541             }
2542             else if (comma == 0) {
2543                 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
2544                 if (errorCondition && !laxThisOne)
2545                 {
2546                     fValidator->emitError
2547                     (
2548                         XMLValid::GrammarNotFound
2549                         , XMLUni::fgZeroLenString
2550                     );
2551                 }
2552             }
2553         }
2554         fElemStack.setCurrentScope(currentScope);
2555 
2556         // Set element next state
2557         if (elemDepth >= fElemStateSize) {
2558             resizeElemState();
2559         }
2560 
2561         fElemState[elemDepth] = 0;
2562         fElemLoopState[elemDepth] = 0;
2563     }
2564 
2565     fElemStack.setCurrentGrammar(fGrammar);
2566 
2567     //  If this is the first element and we are validating, check the root
2568     //  element.
2569     if (isRoot)
2570     {
2571         if (fValidate)
2572         {
2573             //  If a DocType exists, then check if it matches the root name there.
2574             if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
2575                 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
2576         }
2577     }
2578     else if (parentValidation)
2579     {
2580         //  If the element stack is not empty, then add this element as a
2581         //  child of the previous top element. If its empty, this is the root
2582         //  elem and is not the child of anything.
2583         fElemStack.addChild(elemDecl->getElementName(), true);
2584     }
2585 
2586     // PSVI handling:  even if it turns out there are
2587     // no attributes, we need to reset this list...
2588     if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType )
2589         fPSVIAttrList->reset();
2590 
2591     //  Now lets get the fAttrList filled in. This involves faulting in any
2592     //  defaulted and fixed attributes and normalizing the values of any that
2593     //  we got explicitly.
2594     //
2595     //  We update the attCount value with the total number of attributes, but
2596     //  it goes in with the number of values we got during the raw scan of
2597     //  explictly provided attrs above.
2598     attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
2599     if(attCount)
2600     {
2601         // clean up after ourselves:
2602         // clear the map used to detect duplicate attributes
2603         fUndeclaredAttrRegistry->removeAll();
2604     }
2605 
2606     // activate identity constraints
2607     if (fGrammar  &&
2608         fGrammarType == Grammar::SchemaGrammarType &&
2609         toCheckIdentityConstraint())
2610     {
2611         fICHandler->activateIdentityConstraint
2612                         (
2613                           (SchemaElementDecl*) elemDecl
2614                         , (int) elemDepth
2615                         , uriId
2616                         , fPrefixBuf.getRawBuffer()
2617                         , *fAttrList
2618                         , attCount
2619                         , fValidationContext
2620                         );
2621     }
2622 
2623     // Since the element may have default values, call start tag now regardless if it is empty or not
2624     // If we have a document handler, then tell it about this start tag
2625     if (fDocHandler)
2626     {
2627         fDocHandler->startElement
2628         (
2629             *elemDecl
2630             , uriId
2631             , fPrefixBuf.getRawBuffer()
2632             , *fAttrList
2633             , attCount
2634             , false
2635             , isRoot
2636         );
2637     }
2638 
2639     // if we have a PSVIHandler, now's the time to call
2640     // its handleAttributesPSVI method:
2641     if(fPSVIHandler && fGrammarType == Grammar::SchemaGrammarType)
2642     {
2643         QName *eName = elemDecl->getElementName();
2644         fPSVIHandler->handleAttributesPSVI
2645         (
2646             eName->getLocalPart()
2647             , fURIStringPool->getValueForId(eName->getURI())
2648             , fPSVIAttrList
2649         );
2650     }
2651 
2652     //  If empty, validate content right now if we are validating and then
2653     //  pop the element stack top. Else, we have to update the current stack
2654     //  top's namespace mapping elements.
2655     if (isEmpty)
2656     {
2657         // Pop the element stack back off since it'll never be used now
2658         fElemStack.popTop();
2659 
2660         // reset current type info
2661         DatatypeValidator* psviMemberType = 0;
2662         if (fGrammarType == Grammar::SchemaGrammarType)
2663         {
2664             if (fValidate && elemDecl->isDeclared())
2665             {
2666                 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
2667                 if(!fPSVIElemContext.fCurrentTypeInfo)
2668                     fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
2669                 else
2670                     fPSVIElemContext.fCurrentDV = 0;
2671                 if(fPSVIHandler)
2672                 {
2673                     fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
2674 
2675                     if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
2676                         fPSVIElemContext.fNormalizedValue = 0;
2677                 }
2678             }
2679             else
2680             {
2681                 fPSVIElemContext.fCurrentDV = 0;
2682                 fPSVIElemContext.fCurrentTypeInfo = 0;
2683                 fPSVIElemContext.fNormalizedValue = 0;
2684             }
2685         }
2686 
2687         // If validating, then insure that its legal to have no content
2688         if (fValidate)
2689         {
2690             XMLSize_t failure;
2691             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
2692             if (!res)
2693             {
2694                 fValidator->emitError
2695                 (
2696                     XMLValid::ElementNotValidForContent
2697                     , elemDecl->getFullName()
2698                     , elemDecl->getFormattedContentModel()
2699                 );
2700             }
2701 
2702             if (fGrammarType == Grammar::SchemaGrammarType) {
2703 
2704                 if (((SchemaValidator*) fValidator)->getErrorOccurred())
2705                 {
2706                     fPSVIElemContext.fErrorOccurred = true;
2707                 }
2708                 else
2709                 {
2710                     if (fPSVIHandler)
2711                     {
2712                         fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
2713                         if(fPSVIElemContext.fIsSpecified)
2714                             fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
2715                     }
2716                     // note that if we're empty, won't be a current DV
2717                     if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
2718                         psviMemberType = fValidationContext->getValidatingMemberType();
2719                 }
2720 
2721                 // call matchers and de-activate context
2722                 if (toCheckIdentityConstraint())
2723                 {
2724                     fICHandler->deactivateContext
2725                                    (
2726                                     (SchemaElementDecl *) elemDecl
2727                                   , fContent.getRawBuffer()
2728                                   , fValidationContext
2729                                   , fPSVIElemContext.fCurrentDV
2730                                    );
2731                 }
2732 
2733             }
2734         }
2735         else if (fGrammarType == Grammar::SchemaGrammarType) {
2736             ((SchemaValidator*)fValidator)->resetNillable();
2737         }
2738 
2739         if (fGrammarType == Grammar::SchemaGrammarType)
2740         {
2741             if (fPSVIHandler)
2742             {
2743                 endElementPSVI((SchemaElementDecl*)elemDecl, psviMemberType);
2744             }
2745         }
2746 
2747         // If we have a doc handler, tell it about the end tag
2748         if (fDocHandler)
2749         {
2750             fDocHandler->endElement
2751             (
2752                 *elemDecl
2753                 , uriId
2754                 , isRoot
2755                 , fPrefixBuf.getRawBuffer()
2756             );
2757         }
2758 
2759         // If the elem stack is empty, then it was an empty root
2760         if (isRoot)
2761             gotData = false;
2762         else
2763         {
2764             // Restore the grammar
2765             fGrammar = fElemStack.getCurrentGrammar();
2766             fGrammarType = fGrammar->getGrammarType();
2767             if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
2768                 if (fValidatorFromUser)
2769                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
2770                 else {
2771                     fValidator = fSchemaValidator;
2772                 }
2773             }
2774             else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
2775                 if (fValidatorFromUser)
2776                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
2777                 else {
2778                     fValidator = fDTDValidator;
2779                 }
2780             }
2781 
2782             fValidator->setGrammar(fGrammar);
2783 
2784             // Restore the validation flag
2785             fValidate = fElemStack.getValidationFlag();
2786         }
2787     }
2788     else if (fGrammarType == Grammar::SchemaGrammarType)
2789     {
2790         // send a partial element psvi
2791         if (fPSVIHandler)
2792         {
2793 
2794             ComplexTypeInfo*   curTypeInfo = 0;
2795             DatatypeValidator* curDV = 0;
2796             XSTypeDefinition*  typeDef = 0;
2797 
2798             if (fValidate && elemDecl->isDeclared())
2799             {
2800                 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
2801 
2802                 if (curTypeInfo)
2803                 {
2804                     typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
2805                 }
2806                 else
2807                 {
2808                     curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
2809 
2810                     if (curDV)
2811                     {
2812                         typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
2813                     }
2814                 }
2815             }
2816 
2817             fPSVIElement->reset
2818                 (
2819                   PSVIElement::VALIDITY_NOTKNOWN
2820                 , PSVIElement::VALIDATION_NONE
2821                 , fRootElemName
2822                 , ((SchemaValidator*) fValidator)->getIsElemSpecified()
2823                 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
2824                 , typeDef
2825                 , 0 //memberType
2826                 , fModel
2827                 , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
2828                 , 0
2829                 , 0
2830                 , 0
2831                 );
2832 
2833 
2834             fPSVIHandler->handlePartialElementPSVI
2835                 (
2836                   elemDecl->getBaseName()
2837                 , fURIStringPool->getValueForId(elemDecl->getURI())
2838                 , fPSVIElement
2839                 );
2840 
2841         }
2842 
2843         // not empty
2844         fErrorStack->push(fPSVIElemContext.fErrorOccurred);
2845     }
2846 
2847     return true;
2848 }
2849 
2850 
2851 // ---------------------------------------------------------------------------
2852 //  IGXMLScanner: Helper methos
2853 // ---------------------------------------------------------------------------
resizeElemState()2854 void IGXMLScanner::resizeElemState() {
2855 
2856     unsigned int newSize = fElemStateSize * 2;
2857     unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
2858     (
2859         newSize * sizeof(unsigned int)
2860     ); //new unsigned int[newSize];
2861     unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
2862     (
2863         newSize * sizeof(unsigned int)
2864     ); //new unsigned int[newSize];
2865 
2866     // Copy the existing values
2867     unsigned int index = 0;
2868     for (; index < fElemStateSize; index++)
2869     {
2870         newElemState[index] = fElemState[index];
2871         newElemLoopState[index] = fElemLoopState[index];
2872     }
2873 
2874     for (; index < newSize; index++)
2875         newElemLoopState[index] = newElemState[index] = 0;
2876 
2877     // Delete the old array and udpate our members
2878     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2879     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemState;
2880     fElemState = newElemState;
2881     fElemLoopState = newElemLoopState;
2882     fElemStateSize = newSize;
2883 }
2884 
resizeRawAttrColonList()2885 void IGXMLScanner::resizeRawAttrColonList() {
2886 
2887     unsigned int newSize = fRawAttrColonListSize * 2;
2888     int* newRawAttrColonList = (int*) fMemoryManager->allocate
2889     (
2890         newSize * sizeof(int)
2891     ); //new int[newSize];
2892 
2893     // Copy the existing values
2894     unsigned int index = 0;
2895     for (; index < fRawAttrColonListSize; index++)
2896         newRawAttrColonList[index] = fRawAttrColonList[index];
2897 
2898     // Delete the old array and udpate our members
2899     fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
2900     fRawAttrColonList = newRawAttrColonList;
2901     fRawAttrColonListSize = newSize;
2902 }
2903 
2904 // ---------------------------------------------------------------------------
2905 //  IGXMLScanner: Grammar preparsing
2906 // ---------------------------------------------------------------------------
loadGrammar(const InputSource & src,const short grammarType,const bool toCache)2907 Grammar* IGXMLScanner::loadGrammar(const   InputSource& src
2908                                    , const short        grammarType
2909                                    , const bool         toCache)
2910 {
2911     Grammar* loadedGrammar = 0;
2912 
2913     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
2914 
2915     try
2916     {
2917         fGrammarResolver->cacheGrammarFromParse(false);
2918 		// if the new grammar has to be cached, better use the already cached
2919 		// grammars, or the an exception will be thrown when caching an already
2920 		// cached grammar
2921         fGrammarResolver->useCachedGrammarInParse(toCache);
2922         fRootGrammar = 0;
2923 
2924         if (fValScheme == Val_Auto) {
2925             fValidate = true;
2926         }
2927 
2928         // Reset some status flags
2929         fInException = false;
2930         fStandalone = false;
2931         fErrorCount = 0;
2932         fHasNoDTD = true;
2933         fSeeXsi = false;
2934 
2935         if (grammarType == Grammar::SchemaGrammarType) {
2936             loadedGrammar = loadXMLSchemaGrammar(src, toCache);
2937         }
2938         else if (grammarType == Grammar::DTDGrammarType) {
2939             loadedGrammar = loadDTDGrammar(src, toCache);
2940         }
2941     }
2942     //  NOTE:
2943     //
2944     //  In all of the error processing below, the emitError() call MUST come
2945     //  before the flush of the reader mgr, or it will fail because it tries
2946     //  to find out the position in the XML source of the error.
2947     catch(const XMLErrs::Codes)
2948     {
2949         // This is a 'first fatal error' type exit, so fall through
2950     }
2951     catch(const XMLValid::Codes)
2952     {
2953         // This is a 'first fatal error' type exit, so fall through
2954     }
2955     catch(const XMLException& excToCatch)
2956     {
2957         //  Emit the error and catch any user exception thrown from here. Make
2958         //  sure in all cases we flush the reader manager.
2959         fInException = true;
2960         try
2961         {
2962             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
2963                 emitError
2964                 (
2965                     XMLErrs::XMLException_Warning
2966                     , excToCatch.getCode()
2967                     , excToCatch.getMessage()
2968                 );
2969             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
2970                 emitError
2971                 (
2972                     XMLErrs::XMLException_Fatal
2973                     , excToCatch.getCode()
2974                     , excToCatch.getMessage()
2975                 );
2976             else
2977                 emitError
2978                 (
2979                     XMLErrs::XMLException_Error
2980                     , excToCatch.getCode()
2981                     , excToCatch.getMessage()
2982                 );
2983         }
2984         catch(const OutOfMemoryException&)
2985         {
2986             // This is a special case for out-of-memory
2987             // conditions, because resetting the ReaderMgr
2988             // can be problematic.
2989             resetReaderMgr.release();
2990 
2991             throw;
2992         }
2993     }
2994     catch(const OutOfMemoryException&)
2995     {
2996         // This is a special case for out-of-memory
2997         // conditions, because resetting the ReaderMgr
2998         // can be problematic.
2999         resetReaderMgr.release();
3000 
3001         throw;
3002     }
3003 
3004     return loadedGrammar;
3005 }
3006 
resetCachedGrammar()3007 void IGXMLScanner::resetCachedGrammar ()
3008 {
3009   fCachedSchemaInfoList->removeAll ();
3010 }
3011 
loadDTDGrammar(const InputSource & src,const bool toCache)3012 Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
3013                                       const bool toCache)
3014 {
3015     // Reset the validators
3016     fDTDValidator->reset();
3017     if (fValidatorFromUser)
3018         fValidator->reset();
3019 
3020     if (!fValidator->handlesDTD()) {
3021         if (fValidatorFromUser && fValidate)
3022             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
3023         else {
3024             fValidator = fDTDValidator;
3025         }
3026     }
3027 
3028     fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
3029 
3030     if (fDTDGrammar) {
3031         fDTDGrammar->reset();
3032     }
3033     else {
3034         fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
3035         fGrammarResolver->putGrammar(fDTDGrammar);
3036     }
3037 
3038     fGrammar = fDTDGrammar;
3039     fGrammarType = fGrammar->getGrammarType();
3040     fValidator->setGrammar(fGrammar);
3041 
3042     //  And for all installed handlers, send reset events. This gives them
3043     //  a chance to flush any cached data.
3044     if (fDocHandler)
3045         fDocHandler->resetDocument();
3046     if (fEntityHandler)
3047         fEntityHandler->resetEntities();
3048     if (fErrorReporter)
3049         fErrorReporter->resetErrors();
3050 
3051     // Clear out the id reference list
3052     resetValidationContext();
3053     // and clear out the darned undeclared DTD element pool...
3054     fDTDElemNonDeclPool->removeAll();
3055 
3056     if (toCache) {
3057 
3058         unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
3059         const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
3060 
3061         fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
3062         ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
3063         fGrammarResolver->putGrammar(fGrammar);
3064     }
3065 
3066     //  Handle the creation of the XML reader object for this input source.
3067     //  This will provide us with transcoding and basic lexing services.
3068     XMLReader* newReader = fReaderMgr.createReader
3069     (
3070         src
3071         , false
3072         , XMLReader::RefFrom_NonLiteral
3073         , XMLReader::Type_General
3074         , XMLReader::Source_External
3075         , fCalculateSrcOfs
3076         , fLowWaterMark
3077     );
3078     if (!newReader) {
3079         if (src.getIssueFatalErrorIfNotFound())
3080             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
3081         else
3082             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
3083     }
3084 
3085     //  In order to make the processing work consistently, we have to
3086     //  make this look like an external entity. So create an entity
3087     //  decl and fill it in and push it with the reader, as happens
3088     //  with an external entity. Put a janitor on it to insure it gets
3089     //  cleaned up. The reader manager does not adopt them.
3090     const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
3091     DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
3092     declDTD->setSystemId(src.getSystemId());
3093     declDTD->setIsExternal(true);
3094     Janitor<DTDEntityDecl> janDecl(declDTD);
3095 
3096     // Mark this one as a throw at end
3097     newReader->setThrowAtEnd(true);
3098 
3099     // And push it onto the stack, with its pseudo name
3100     fReaderMgr.pushReader(newReader, declDTD);
3101 
3102     //  If we have a doc type handler and advanced callbacks are enabled,
3103     //  call the doctype event.
3104     if (fDocTypeHandler) {
3105 
3106         // Create a dummy root
3107         DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
3108         (
3109             gDTDStr
3110             , fEmptyNamespaceId
3111             , DTDElementDecl::Any
3112             , fGrammarPoolMemoryManager
3113         );
3114         rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
3115         rootDecl->setExternalElemDeclaration(true);
3116         Janitor<DTDElementDecl> janSrc(rootDecl);
3117 
3118         fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
3119     }
3120 
3121     // Create DTDScanner
3122     DTDScanner dtdScanner
3123     (
3124         (DTDGrammar*) fGrammar
3125         , fDocTypeHandler
3126         , fGrammarPoolMemoryManager
3127         , fMemoryManager
3128     );
3129     dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
3130 
3131     // Tell it its not in an include section
3132     dtdScanner.scanExtSubsetDecl(false, true);
3133 
3134     if (fValidate) {
3135         //  validate the DTD scan so far
3136         fValidator->preContentValidation(false, true);
3137     }
3138 
3139     if (toCache)
3140         fGrammarResolver->cacheGrammars();
3141 
3142     return fDTDGrammar;
3143 }
3144 
3145 // ---------------------------------------------------------------------------
3146 //  IGXMLScanner: Helper methods
3147 // ---------------------------------------------------------------------------
processSchemaLocation(XMLCh * const schemaLoc)3148 void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc)
3149 {
3150     XMLCh* locStr = schemaLoc;
3151     XMLReader* curReader = fReaderMgr.getCurrentReader();
3152 
3153     fLocationPairs->removeAllElements();
3154     while (*locStr)
3155     {
3156         do {
3157             // Do we have an escaped character ?
3158             if (*locStr == 0xFFFF)
3159                 continue;
3160 
3161             if (!curReader->isWhitespace(*locStr))
3162                break;
3163 
3164             *locStr = chNull;
3165         } while (*++locStr);
3166 
3167         if (*locStr) {
3168 
3169             fLocationPairs->addElement(locStr);
3170 
3171             while (*++locStr) {
3172                 // Do we have an escaped character ?
3173                 if (*locStr == 0xFFFF)
3174                     continue;
3175                 if (curReader->isWhitespace(*locStr))
3176                     break;
3177             }
3178         }
3179     }
3180 }
3181 
endElementPSVI(SchemaElementDecl * const elemDecl,DatatypeValidator * const memberDV)3182 void IGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
3183                                   DatatypeValidator* const memberDV)
3184 {
3185     PSVIElement::ASSESSMENT_TYPE validationAttempted;
3186     PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
3187 
3188     if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
3189         validationAttempted = PSVIElement::VALIDATION_FULL;
3190     else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
3191         validationAttempted = PSVIElement::VALIDATION_NONE;
3192     else
3193     {
3194         validationAttempted  = PSVIElement::VALIDATION_PARTIAL;
3195 		fPSVIElemContext.fFullValidationDepth =
3196             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
3197     }
3198 
3199     if (fValidate && elemDecl->isDeclared())
3200     {
3201         validity = (fPSVIElemContext.fErrorOccurred)
3202             ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
3203     }
3204 
3205     XSTypeDefinition* typeDef = 0;
3206     bool isMixed = false;
3207     if (fPSVIElemContext.fCurrentTypeInfo)
3208     {
3209         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
3210         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
3211         isMixed = (modelType == SchemaElementDecl::Mixed_Simple
3212                 || modelType == SchemaElementDecl::Mixed_Complex);
3213     }
3214     else if (fPSVIElemContext.fCurrentDV)
3215         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
3216 
3217     XMLCh* canonicalValue = 0;
3218     if (fPSVIElemContext.fNormalizedValue && !isMixed &&
3219             validity == PSVIElement::VALIDITY_VALID)
3220     {
3221         if (memberDV)
3222             canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
3223         else if (fPSVIElemContext.fCurrentDV)
3224             canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
3225     }
3226 
3227     fPSVIElement->reset
3228     (
3229         validity
3230         , validationAttempted
3231         , fRootElemName
3232         , fPSVIElemContext.fIsSpecified
3233         , (elemDecl->isDeclared())
3234             ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
3235         , typeDef
3236         , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
3237         , fModel
3238         , elemDecl->getDefaultValue()
3239         , fPSVIElemContext.fNormalizedValue
3240         , canonicalValue
3241     );
3242 
3243     fPSVIHandler->handleElementPSVI
3244     (
3245         elemDecl->getBaseName()
3246         , fURIStringPool->getValueForId(elemDecl->getURI())
3247         , fPSVIElement
3248     );
3249 
3250     // decrease element depth
3251     fPSVIElemContext.fElemDepth--;
3252 
3253 }
3254 
resetPSVIElemContext()3255 void IGXMLScanner::resetPSVIElemContext()
3256 {
3257     fPSVIElemContext.fIsSpecified = false;
3258     fPSVIElemContext.fErrorOccurred = false;
3259     fPSVIElemContext.fElemDepth = -1;
3260     fPSVIElemContext.fFullValidationDepth = -1;
3261     fPSVIElemContext.fNoneValidationDepth = -1;
3262     fPSVIElemContext.fCurrentDV = 0;
3263     fPSVIElemContext.fCurrentTypeInfo = 0;
3264     fPSVIElemContext.fNormalizedValue = 0;
3265 }
3266 
3267 XERCES_CPP_NAMESPACE_END
3268