1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id: SGXMLScanner.cpp 925236 2010-03-19 14:29:47Z borisk $
20  */
21 
22 
23 // ---------------------------------------------------------------------------
24 //  Includes
25 // ---------------------------------------------------------------------------
26 #include <xercesc/internal/SGXMLScanner.hpp>
27 #include <xercesc/util/RuntimeException.hpp>
28 #include <xercesc/util/UnexpectedEOFException.hpp>
29 #include <xercesc/util/XMLUri.hpp>
30 #include <xercesc/framework/LocalFileInputSource.hpp>
31 #include <xercesc/framework/URLInputSource.hpp>
32 #include <xercesc/framework/XMLDocumentHandler.hpp>
33 #include <xercesc/framework/XMLEntityHandler.hpp>
34 #include <xercesc/framework/XMLPScanToken.hpp>
35 #include <xercesc/framework/MemoryManager.hpp>
36 #include <xercesc/framework/XMLGrammarPool.hpp>
37 #include <xercesc/framework/psvi/PSVIElement.hpp>
38 #include <xercesc/framework/psvi/PSVIHandler.hpp>
39 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
40 #include <xercesc/framework/psvi/XSAnnotation.hpp>
41 #include <xercesc/internal/EndOfEntityException.hpp>
42 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
43 #include <xercesc/validators/schema/SchemaValidator.hpp>
44 #include <xercesc/validators/schema/TraverseSchema.hpp>
45 #include <xercesc/validators/schema/XSDDOMParser.hpp>
46 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
47 #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
48 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
49 #include <xercesc/validators/schema/identity/IC_Selector.hpp>
50 #include <xercesc/validators/schema/identity/ValueStore.hpp>
51 #include <xercesc/util/OutOfMemoryException.hpp>
52 #include <xercesc/util/XMLStringTokenizer.hpp>
53 
54 XERCES_CPP_NAMESPACE_BEGIN
55 
56 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl);
57 
58 
59 typedef JanitorMemFunCall<SGXMLScanner> CleanupType;
60 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;
61 
62 
63 // ---------------------------------------------------------------------------
64 //  SGXMLScanner: Constructors and Destructor
65 // ---------------------------------------------------------------------------
SGXMLScanner(XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)66 SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt
67                           , GrammarResolver* const grammarResolver
68                           , MemoryManager* const manager) :
69 
70     XMLScanner(valToAdopt, grammarResolver, manager)
71     , fSeeXsi(false)
72     , fGrammarType(Grammar::UnKnown)
73     , fElemStateSize(16)
74     , fElemState(0)
75     , fElemLoopState(0)
76     , fContent(1023, manager)
77     , fEntityTable(0)
78     , fRawAttrList(0)
79     , fRawAttrColonListSize(32)
80     , fRawAttrColonList(0)
81     , fSchemaGrammar(0)
82     , fSchemaValidator(0)
83     , fICHandler(0)
84     , fElemNonDeclPool(0)
85     , fElemCount(0)
86     , fAttDefRegistry(0)
87     , fUndeclaredAttrRegistry(0)
88     , fPSVIAttrList(0)
89     , fModel(0)
90     , fPSVIElement(0)
91     , fErrorStack(0)
92     , fSchemaInfoList(0)
93     , fCachedSchemaInfoList(0)
94 {
95     CleanupType cleanup(this, &SGXMLScanner::cleanUp);
96 
97     try
98     {
99          commonInit();
100     }
101     catch(const OutOfMemoryException&)
102     {
103         // Don't cleanup when out of memory, since executing the
104         // code can cause problems.
105         cleanup.release();
106 
107         throw;
108     }
109 
110     cleanup.release();
111 }
112 
SGXMLScanner(XMLDocumentHandler * const docHandler,DocTypeHandler * const docTypeHandler,XMLEntityHandler * const entityHandler,XMLErrorReporter * const errHandler,XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)113 SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler
114                           , DocTypeHandler* const     docTypeHandler
115                           , XMLEntityHandler* const   entityHandler
116                           , XMLErrorReporter* const   errHandler
117                           , XMLValidator* const       valToAdopt
118                           , GrammarResolver* const    grammarResolver
119                           , MemoryManager* const      manager) :
120 
121     XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
122     , fSeeXsi(false)
123     , fGrammarType(Grammar::UnKnown)
124     , fElemStateSize(16)
125     , fElemState(0)
126     , fElemLoopState(0)
127     , fContent(1023, manager)
128     , fEntityTable(0)
129     , fRawAttrList(0)
130     , fRawAttrColonListSize(32)
131     , fRawAttrColonList(0)
132     , fSchemaGrammar(0)
133     , fSchemaValidator(0)
134     , fICHandler(0)
135     , fElemNonDeclPool(0)
136     , fElemCount(0)
137     , fAttDefRegistry(0)
138     , fUndeclaredAttrRegistry(0)
139     , fPSVIAttrList(0)
140     , fModel(0)
141     , fPSVIElement(0)
142     , fErrorStack(0)
143     , fSchemaInfoList(0)
144     , fCachedSchemaInfoList(0)
145 {
146     CleanupType cleanup(this, &SGXMLScanner::cleanUp);
147 
148     try
149     {
150         commonInit();
151     }
152     catch(const OutOfMemoryException&)
153     {
154         // Don't cleanup when out of memory, since executing the
155         // code can cause problems.
156         cleanup.release();
157 
158         throw;
159     }
160 
161     cleanup.release();
162 }
163 
~SGXMLScanner()164 SGXMLScanner::~SGXMLScanner()
165 {
166     cleanUp();
167 }
168 
169 // ---------------------------------------------------------------------------
170 //  XMLScanner: Getter methods
171 // ---------------------------------------------------------------------------
getEntityDeclPool()172 NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool()
173 {
174     return 0;
175 }
176 
getEntityDeclPool() const177 const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const
178 {
179     return 0;
180 }
181 
182 // ---------------------------------------------------------------------------
183 //  SGXMLScanner: Main entry point to scan a document
184 // ---------------------------------------------------------------------------
scanDocument(const InputSource & src)185 void SGXMLScanner::scanDocument(const InputSource& src)
186 {
187     //  Bump up the sequence id for this parser instance. This will invalidate
188     //  any previous progressive scan tokens.
189     fSequenceId++;
190 
191     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
192 
193     try
194     {
195         //  Reset the scanner and its plugged in stuff for a new run. This
196         //  resets all the data structures, creates the initial reader and
197         //  pushes it on the stack, and sets up the base document path.
198         scanReset(src);
199 
200         // If we have a document handler, then call the start document
201         if (fDocHandler)
202             fDocHandler->startDocument();
203 
204         //  Scan the prolog part, which is everything before the root element
205         //  including the DTD subsets.
206         scanProlog();
207 
208         //  If we got to the end of input, then its not a valid XML file.
209         //  Else, go on to scan the content.
210         if (fReaderMgr.atEOF())
211         {
212             emitError(XMLErrs::EmptyMainEntity);
213         }
214         else
215         {
216             // Scan content, and tell it its not an external entity
217             if (scanContent())
218             {
219                 // Do post-parse validation if required
220                 if (fValidate)
221                 {
222                     //  We handle ID reference semantics at this level since
223                     //  its required by XML 1.0.
224                     checkIDRefs();
225 
226                     // Then allow the validator to do any extra stuff it wants
227 //                    fValidator->postParseValidation();
228                 }
229 
230                 // That went ok, so scan for any miscellaneous stuff
231                 if (!fReaderMgr.atEOF())
232                     scanMiscellaneous();
233             }
234         }
235 
236         // If we have a document handler, then call the end document
237         if (fDocHandler)
238             fDocHandler->endDocument();
239     }
240     //  NOTE:
241     //
242     //  In all of the error processing below, the emitError() call MUST come
243     //  before the flush of the reader mgr, or it will fail because it tries
244     //  to find out the position in the XML source of the error.
245     catch(const XMLErrs::Codes)
246     {
247         // This is a 'first failure' exception, so fall through
248     }
249     catch(const XMLValid::Codes)
250     {
251         // This is a 'first fatal error' type exit, so fall through
252     }
253     catch(const XMLException& excToCatch)
254     {
255         //  Emit the error and catch any user exception thrown from here. Make
256         //  sure in all cases we flush the reader manager.
257         fInException = true;
258         try
259         {
260             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
261                 emitError
262                 (
263                     XMLErrs::XMLException_Warning
264                     , excToCatch.getCode()
265                     , excToCatch.getMessage()
266                 );
267             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
268                 emitError
269                 (
270                     XMLErrs::XMLException_Fatal
271                     , excToCatch.getCode()
272                     , excToCatch.getMessage()
273                 );
274             else
275                 emitError
276                 (
277                     XMLErrs::XMLException_Error
278                     , excToCatch.getCode()
279                     , excToCatch.getMessage()
280                 );
281         }
282         catch(const OutOfMemoryException&)
283         {
284             // This is a special case for out-of-memory
285             // conditions, because resetting the ReaderMgr
286             // can be problematic.
287             resetReaderMgr.release();
288 
289             throw;
290         }
291     }
292     catch(const OutOfMemoryException&)
293     {
294         // This is a special case for out-of-memory
295         // conditions, because resetting the ReaderMgr
296         // can be problematic.
297         resetReaderMgr.release();
298 
299         throw;
300     }
301 }
302 
303 
scanNext(XMLPScanToken & token)304 bool SGXMLScanner::scanNext(XMLPScanToken& token)
305 {
306     // Make sure this token is still legal
307     if (!isLegalToken(token))
308         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
309 
310     // Find the next token and remember the reader id
311     XMLSize_t orgReader;
312     XMLTokens curToken;
313 
314     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
315 
316     bool retVal = true;
317 
318     try
319     {
320         while (true)
321         {
322             //  We have to handle any end of entity exceptions that happen here.
323             //  We could be at the end of X nested entities, each of which will
324             //  generate an end of entity exception as we try to move forward.
325             try
326             {
327                 curToken = senseNextToken(orgReader);
328                 break;
329             }
330             catch(const EndOfEntityException& toCatch)
331             {
332                 // Send an end of entity reference event
333                 if (fDocHandler)
334                     fDocHandler->endEntityReference(toCatch.getEntity());
335             }
336         }
337 
338         if (curToken == Token_CharData)
339         {
340             scanCharData(fCDataBuf);
341         }
342         else if (curToken == Token_EOF)
343         {
344             if (!fElemStack.isEmpty())
345             {
346                 const ElemStack::StackElem* topElem = fElemStack.popTop();
347                 emitError
348                 (
349                     XMLErrs::EndedWithTagsOnStack
350                     , topElem->fThisElement->getFullName()
351                 );
352             }
353 
354             retVal = false;
355         }
356         else
357         {
358             // Its some sort of markup
359             bool gotData = true;
360             switch(curToken)
361             {
362                 case Token_CData :
363                     // Make sure we are within content
364                     if (fElemStack.isEmpty())
365                         emitError(XMLErrs::CDATAOutsideOfContent);
366                     scanCDSection();
367                     break;
368 
369                 case Token_Comment :
370                     scanComment();
371                     break;
372 
373                 case Token_EndTag :
374                     scanEndTag(gotData);
375                     break;
376 
377                 case Token_PI :
378                     scanPI();
379                     break;
380 
381                 case Token_StartTag :
382                     scanStartTag(gotData);
383                     break;
384 
385                 default :
386                     fReaderMgr.skipToChar(chOpenAngle);
387                     break;
388             }
389 
390             if (orgReader != fReaderMgr.getCurrentReaderNum())
391                 emitError(XMLErrs::PartialMarkupInEntity);
392 
393             // If we hit the end, then do the miscellaneous part
394             if (!gotData)
395             {
396                 // Do post-parse validation if required
397                 if (fValidate)
398                 {
399                     //  We handle ID reference semantics at this level since
400                     //  its required by XML 1.0.
401                     checkIDRefs();
402 
403                     // Then allow the validator to do any extra stuff it wants
404 //                    fValidator->postParseValidation();
405                 }
406 
407                 // That went ok, so scan for any miscellaneous stuff
408                 scanMiscellaneous();
409 
410                 if (toCheckIdentityConstraint())
411                     fICHandler->endDocument();
412 
413                 if (fDocHandler)
414                     fDocHandler->endDocument();
415             }
416         }
417     }
418     //  NOTE:
419     //
420     //  In all of the error processing below, the emitError() call MUST come
421     //  before the flush of the reader mgr, or it will fail because it tries
422     //  to find out the position in the XML source of the error.
423     catch(const XMLErrs::Codes)
424     {
425         // This is a 'first failure' exception, so return failure
426         retVal = false;
427     }
428     catch(const XMLValid::Codes)
429     {
430         // This is a 'first fatal error' type exit, so return failure
431         retVal = false;
432     }
433     catch(const XMLException& excToCatch)
434     {
435         //  Emit the error and catch any user exception thrown from here. Make
436         //  sure in all cases we flush the reader manager.
437         fInException = true;
438         try
439         {
440             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
441                 emitError
442                 (
443                     XMLErrs::XMLException_Warning
444                     , excToCatch.getCode()
445                     , excToCatch.getMessage()
446                 );
447             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
448                 emitError
449                 (
450                     XMLErrs::XMLException_Fatal
451                     , excToCatch.getCode()
452                     , excToCatch.getMessage()
453                 );
454             else
455                 emitError
456                 (
457                     XMLErrs::XMLException_Error
458                     , excToCatch.getCode()
459                     , excToCatch.getMessage()
460                 );
461         }
462         catch(const OutOfMemoryException&)
463         {
464             // This is a special case for out-of-memory
465             // conditions, because resetting the ReaderMgr
466             // can be problematic.
467             resetReaderMgr.release();
468 
469             throw;
470         }
471 
472         retVal = false;
473     }
474     catch(const OutOfMemoryException&)
475     {
476         // This is a special case for out-of-memory
477         // conditions, because resetting the ReaderMgr
478         // can be problematic.
479         resetReaderMgr.release();
480 
481         throw;
482     }
483 
484     // If we are not at the end, release the object that will
485     // reset the ReaderMgr.
486     if (retVal)
487         resetReaderMgr.release();
488 
489     return retVal;
490 }
491 
492 // ---------------------------------------------------------------------------
493 //  SGXMLScanner: Private scanning methods
494 // ---------------------------------------------------------------------------
495 
496 //  This method is called from scanStartTag() to handle the very raw initial
497 //  scan of the attributes. It just fills in the passed collection with
498 //  key/value pairs for each attribute. No processing is done on them at all.
499 XMLSize_t
rawAttrScan(const XMLCh * const elemName,RefVectorOf<KVStringPair> & toFill,bool & isEmpty)500 SGXMLScanner::rawAttrScan(const   XMLCh* const                elemName
501                           ,       RefVectorOf<KVStringPair>&  toFill
502                           ,       bool&                       isEmpty)
503 {
504     //  Keep up with how many attributes we've seen so far, and how many
505     //  elements are available in the vector. This way we can reuse old
506     //  elements until we run out and then expand it.
507     XMLSize_t attCount = 0;
508     XMLSize_t curVecSize = toFill.size();
509 
510     // Assume it is not empty
511     isEmpty = false;
512 
513     //  We loop until we either see a /> or >, handling key/value pairs util
514     //  we get there. We place them in the passed vector, which we will expand
515     //  as required to hold them.
516     while (true)
517     {
518         // Get the next character, which should be non-space
519         XMLCh nextCh = fReaderMgr.peekNextChar();
520 
521         //  If the next character is not a slash or closed angle bracket,
522         //  then it must be whitespace, since whitespace is required
523         //  between the end of the last attribute and the name of the next
524         //  one.
525         //
526         if (attCount)
527         {
528             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
529             {
530                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
531                 {
532                     // Ok, skip by them and get another char
533                     fReaderMgr.getNextChar();
534                     fReaderMgr.skipPastSpaces();
535                     nextCh = fReaderMgr.peekNextChar();
536                 }
537                  else
538                 {
539                     // Emit the error but keep on going
540                     emitError(XMLErrs::ExpectedWhitespace);
541                 }
542             }
543         }
544 
545         //  Ok, here we first check for any of the special case characters.
546         //  If its not one, then we do the normal case processing, which
547         //  assumes that we've hit an attribute value, Otherwise, we do all
548         //  the special case checks.
549         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
550         {
551             //  Assume its going to be an attribute, so get a name from
552             //  the input.
553             int colonPosition;
554             if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
555             {
556                 if (fAttNameBuf.isEmpty())
557                     emitError(XMLErrs::ExpectedAttrName);
558                 else
559                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
560                 fReaderMgr.skipPastChar(chCloseAngle);
561                 return attCount;
562             }
563 
564             const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
565 
566             // And next must be an equal sign
567             if (!scanEq())
568             {
569                 static const XMLCh tmpList[] =
570                 {
571                     chSingleQuote, chDoubleQuote, chCloseAngle
572                     , chOpenAngle, chForwardSlash, chNull
573                 };
574 
575                 emitError(XMLErrs::ExpectedEqSign);
576 
577                 //  Try to sync back up by skipping forward until we either
578                 //  hit something meaningful.
579                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
580 
581                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
582                 {
583                     // Jump back to top for normal processing of these
584                     continue;
585                 }
586                 else if ((chFound == chSingleQuote)
587                       ||  (chFound == chDoubleQuote)
588                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
589                 {
590                     // Just fall through assuming that the value is to follow
591                 }
592                 else if (chFound == chOpenAngle)
593                 {
594                     // Assume a malformed tag and that new one is starting
595                     emitError(XMLErrs::UnterminatedStartTag, elemName);
596                     return attCount;
597                 }
598                 else
599                 {
600                     // Something went really wrong
601                     return attCount;
602                 }
603             }
604 
605             //  Next should be the quoted attribute value. We just do a simple
606             //  and stupid scan of this value. The only thing we do here
607             //  is to expand entity references.
608             if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
609             {
610                 static const XMLCh tmpList[] =
611                 {
612                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
613                 };
614 
615                 emitError(XMLErrs::ExpectedAttrValue);
616 
617                 //  It failed, so lets try to get synced back up. We skip
618                 //  forward until we find some whitespace or one of the
619                 //  chars in our list.
620                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
621 
622                 if ((chFound == chCloseAngle)
623                 ||  (chFound == chForwardSlash)
624                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
625                 {
626                     //  Just fall through and process this attribute, though
627                     //  the value will be "".
628                 }
629                 else if (chFound == chOpenAngle)
630                 {
631                     // Assume a malformed tag and that new one is starting
632                     emitError(XMLErrs::UnterminatedStartTag, elemName);
633                     return attCount;
634                 }
635                 else
636                 {
637                     // Something went really wrong
638                     return attCount;
639                 }
640             }
641 
642             //  And now lets add it to the passed collection. If we have not
643             //  filled it up yet, then we use the next element. Else we add
644             //  a new one.
645             KVStringPair* curPair = 0;
646             if (attCount >= curVecSize)
647             {
648                 curPair = new (fMemoryManager) KVStringPair
649                 (
650                     curAttNameBuf
651                     , fAttNameBuf.getLen()
652                     , fAttValueBuf.getRawBuffer()
653                     , fAttValueBuf.getLen()
654                     , fMemoryManager
655                 );
656                 toFill.addElement(curPair);
657             }
658              else
659             {
660                 curPair = toFill.elementAt(attCount);
661                 curPair->set
662                 (
663                     curAttNameBuf
664                     , fAttNameBuf.getLen()
665                     , fAttValueBuf.getRawBuffer()
666                     , fAttValueBuf.getLen()
667                 );
668             }
669             if (attCount >= fRawAttrColonListSize) {
670                 resizeRawAttrColonList();
671             }
672             fRawAttrColonList[attCount] = colonPosition;
673 
674             // And bump the count of attributes we've gotten
675             attCount++;
676 
677             // And go to the top again for another attribute
678             continue;
679         }
680 
681         //  It was some special case character so do all of the checks and
682         //  deal with it.
683         if (!nextCh)
684             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
685 
686         if (nextCh == chForwardSlash)
687         {
688             fReaderMgr.getNextChar();
689             isEmpty = true;
690             if (!fReaderMgr.skippedChar(chCloseAngle))
691                 emitError(XMLErrs::UnterminatedStartTag, elemName);
692             break;
693         }
694         else if (nextCh == chCloseAngle)
695         {
696             fReaderMgr.getNextChar();
697             break;
698         }
699         else if (nextCh == chOpenAngle)
700         {
701             //  Check for this one specially, since its going to be common
702             //  and it is kind of auto-recovering since we've already hit the
703             //  next open bracket, which is what we would have seeked to (and
704             //  skipped this whole tag.)
705             emitError(XMLErrs::UnterminatedStartTag, elemName);
706             break;
707         }
708         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
709         {
710             //  Check for this one specially, which is probably a missing
711             //  attribute name, e.g. ="value". Just issue expected name
712             //  error and eat the quoted string, then jump back to the
713             //  top again.
714             emitError(XMLErrs::ExpectedAttrName);
715             fReaderMgr.getNextChar();
716             fReaderMgr.skipQuotedString(nextCh);
717             fReaderMgr.skipPastSpaces();
718             continue;
719         }
720     }
721 
722     return attCount;
723 }
724 
725 
726 //  This method will kick off the scanning of the primary content of the
727 //  document, i.e. the elements.
scanContent()728 bool SGXMLScanner::scanContent()
729 {
730     //  Go into a loop until we hit the end of the root element, or we fall
731     //  out because there is no root element.
732     //
733     //  We have to do kind of a deeply nested double loop here in order to
734     //  avoid doing the setup/teardown of the exception handler on each
735     //  round. Doing it this way we only do it when an exception actually
736     //  occurs.
737     bool gotData = true;
738     bool inMarkup = false;
739     while (gotData)
740     {
741         try
742         {
743             while (gotData)
744             {
745                 //  Sense what the next top level token is. According to what
746                 //  this tells us, we will call something to handle that kind
747                 //  of thing.
748                 XMLSize_t orgReader;
749                 const XMLTokens curToken = senseNextToken(orgReader);
750 
751                 //  Handle character data and end of file specially. Char data
752                 //  is not markup so we don't want to handle it in the loop
753                 //  below.
754                 if (curToken == Token_CharData)
755                 {
756                     //  Scan the character data and call appropriate events. Let
757                     //  him use our local character data buffer for efficiency.
758                     scanCharData(fCDataBuf);
759                     continue;
760                 }
761                 else if (curToken == Token_EOF)
762                 {
763                     //  The element stack better be empty at this point or we
764                     //  ended prematurely before all elements were closed.
765                     if (!fElemStack.isEmpty())
766                     {
767                         const ElemStack::StackElem* topElem = fElemStack.popTop();
768                         emitError
769                         (
770                             XMLErrs::EndedWithTagsOnStack
771                             , topElem->fThisElement->getFullName()
772                         );
773                     }
774 
775                     // Its the end of file, so clear the got data flag
776                     gotData = false;
777                     continue;
778                 }
779 
780                 // We are in some sort of markup now
781                 inMarkup = true;
782 
783                 //  According to the token we got, call the appropriate
784                 //  scanning method.
785                 switch(curToken)
786                 {
787                     case Token_CData :
788                         // Make sure we are within content
789                         if (fElemStack.isEmpty())
790                             emitError(XMLErrs::CDATAOutsideOfContent);
791                         scanCDSection();
792                         break;
793 
794                     case Token_Comment :
795                         scanComment();
796                         break;
797 
798                     case Token_EndTag :
799                         scanEndTag(gotData);
800                         break;
801 
802                     case Token_PI :
803                         scanPI();
804                         break;
805 
806                     case Token_StartTag :
807                         scanStartTag(gotData);
808                         break;
809 
810                     default :
811                         fReaderMgr.skipToChar(chOpenAngle);
812                         break;
813                 }
814 
815                 if (orgReader != fReaderMgr.getCurrentReaderNum())
816                     emitError(XMLErrs::PartialMarkupInEntity);
817 
818                 // And we are back out of markup again
819                 inMarkup = false;
820             }
821         }
822         catch(const EndOfEntityException& toCatch)
823         {
824             //  If we were in some markup when this happened, then its a
825             //  partial markup error.
826             if (inMarkup)
827                 emitError(XMLErrs::PartialMarkupInEntity);
828 
829             // Send an end of entity reference event
830             if (fDocHandler)
831                 fDocHandler->endEntityReference(toCatch.getEntity());
832 
833             inMarkup = false;
834         }
835     }
836 
837     // It went ok, so return success
838     return true;
839 }
840 
841 
scanEndTag(bool & gotData)842 void SGXMLScanner::scanEndTag(bool& gotData)
843 {
844     //  Assume we will still have data until proven otherwise. It will only
845     //  ever be false if this is the end of the root element.
846     gotData = true;
847 
848     //  Check if the element stack is empty. If so, then this is an unbalanced
849     //  element (i.e. more ends than starts, perhaps because of bad text
850     //  causing one to be skipped.)
851     if (fElemStack.isEmpty())
852     {
853         emitError(XMLErrs::MoreEndThanStartTags);
854         fReaderMgr.skipPastChar(chCloseAngle);
855         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
856     }
857 
858     //  Pop the stack of the element we are supposed to be ending. Remember
859     //  that we don't own this. The stack just keeps them and reuses them.
860     unsigned int uriId = (fDoNamespaces)
861         ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
862 
863     // Make sure that its the end of the element that we expect
864     const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
865     const ElemStack::StackElem* topElem = fElemStack.topElement();
866     if (!fReaderMgr.skippedStringLong(elemName))
867     {
868         emitError
869         (
870             XMLErrs::ExpectedEndOfTagX
871             , elemName
872         );
873         fReaderMgr.skipPastChar(chCloseAngle);
874         fElemStack.popTop();
875         return;
876     }
877 
878     fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
879 
880     // Make sure we are back on the same reader as where we started
881     if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
882         emitError(XMLErrs::PartialTagMarkupError);
883 
884     // Skip optional whitespace
885     fReaderMgr.skipPastSpaces();
886 
887     // Make sure we find the closing bracket
888     if (!fReaderMgr.skippedChar(chCloseAngle))
889     {
890         emitError
891         (
892             XMLErrs::UnterminatedEndTag
893             , topElem->fThisElement->getFullName()
894         );
895     }
896 
897     if (fValidate && topElem->fThisElement->isDeclared())
898     {
899         fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
900         if(!fPSVIElemContext.fCurrentTypeInfo)
901             fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
902         else
903             fPSVIElemContext.fCurrentDV = 0;
904         if (fPSVIHandler)
905         {
906             fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
907 
908             if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
909                 fPSVIElemContext.fNormalizedValue = 0;
910 
911         }
912     }
913     else
914     {
915         fPSVIElemContext.fCurrentDV = 0;
916         fPSVIElemContext.fCurrentTypeInfo = 0;
917         fPSVIElemContext.fNormalizedValue = 0;
918     }
919 
920     //  If validation is enabled, then lets pass him the list of children and
921     //  this element and let him validate it.
922     DatatypeValidator* psviMemberType = 0;
923     if (fValidate)
924     {
925         XMLSize_t failure;
926         bool res = fValidator->checkContent
927         (
928             topElem->fThisElement
929             , topElem->fChildren
930             , topElem->fChildCount
931             , &failure
932         );
933 
934         if (!res)
935         {
936             //  One of the elements is not valid for the content. NOTE that
937             //  if no children were provided but the content model requires
938             //  them, it comes back with a zero value. But we cannot use that
939             //  to index the child array in this case, and have to put out a
940             //  special message.
941             if (!topElem->fChildCount)
942             {
943                 fValidator->emitError
944                 (
945                     XMLValid::EmptyNotValidForContent
946                     , topElem->fThisElement->getFormattedContentModel()
947                 );
948             }
949             else if (failure >= topElem->fChildCount)
950             {
951                 fValidator->emitError
952                 (
953                     XMLValid::NotEnoughElemsForCM
954                     , topElem->fThisElement->getFormattedContentModel()
955                 );
956             }
957             else
958             {
959                 fValidator->emitError
960                 (
961                     XMLValid::ElementNotValidForContent
962                     , topElem->fChildren[failure]->getRawName()
963                     , topElem->fThisElement->getFormattedContentModel()
964                 );
965             }
966 
967         }
968 
969         // update PSVI info
970         if (((SchemaValidator*) fValidator)->getErrorOccurred())
971             fPSVIElemContext.fErrorOccurred = true;
972         else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
973             psviMemberType = fValidationContext->getValidatingMemberType();
974         if (fPSVIHandler)
975         {
976             fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
977             if(fPSVIElemContext.fIsSpecified)
978                 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
979         }
980 
981         // call matchers and de-activate context
982         if (toCheckIdentityConstraint())
983         {
984             fICHandler->deactivateContext
985                         (
986                          (SchemaElementDecl *) topElem->fThisElement
987                        , fContent.getRawBuffer()
988                        , fValidationContext
989                        , fPSVIElemContext.fCurrentDV
990                         );
991         }
992 
993     }
994 
995     // QName dv needed topElem to resolve URIs on the checkContent
996     fElemStack.popTop();
997 
998     // See if it was the root element, to avoid multiple calls below
999     const bool isRoot = fElemStack.isEmpty();
1000 
1001     if (fPSVIHandler)
1002     {
1003         endElementPSVI
1004         (
1005             (SchemaElementDecl*)topElem->fThisElement, psviMemberType
1006         );
1007     }
1008     // now we can reset the datatype buffer, since the
1009     // application has had a chance to copy the characters somewhere else
1010     ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
1011 
1012     // If we have a doc handler, tell it about the end tag
1013     if (fDocHandler)
1014     {
1015         if (fGrammarType == Grammar::SchemaGrammarType) {
1016             if (topElem->fPrefixColonPos != -1)
1017                 fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
1018             else
1019                 fPrefixBuf.reset();
1020         }
1021         else {
1022             fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
1023         }
1024         fDocHandler->endElement
1025         (
1026             *topElem->fThisElement
1027             , uriId
1028             , isRoot
1029             , fPrefixBuf.getRawBuffer()
1030         );
1031     }
1032 
1033     if (!isRoot)
1034     {
1035         // update error information
1036         fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
1037     }
1038 
1039     // If this was the root, then done with content
1040     gotData = !isRoot;
1041 
1042     if (gotData) {
1043 
1044         // Restore the grammar
1045         fGrammar = fElemStack.getCurrentGrammar();
1046         fGrammarType = fGrammar->getGrammarType();
1047         fValidator->setGrammar(fGrammar);
1048 
1049         // Restore the validation flag
1050         fValidate = fElemStack.getValidationFlag();
1051     }
1052 }
1053 
1054 
1055 //  This method handles the high level logic of scanning the DOCType
1056 //  declaration. This calls the DTDScanner and kicks off both the scanning of
1057 //  the internal subset and the scanning of the external subset, if any.
1058 //
1059 //  When we get here the '<!DOCTYPE' part has already been scanned, which is
1060 //  what told us that we had a doc type decl to parse.
scanDocTypeDecl()1061 void SGXMLScanner::scanDocTypeDecl()
1062 {
1063     // Just skips over it
1064     // REVISIT: Should we issue a warning
1065     static const XMLCh doctypeIE[] =
1066     {
1067             chOpenSquare, chCloseAngle, chNull
1068     };
1069     XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);
1070 
1071     if (nextCh == chOpenSquare)
1072         fReaderMgr.skipPastChar(chCloseSquare);
1073 
1074     fReaderMgr.skipPastChar(chCloseAngle);
1075 }
1076 
1077 //  This method is called to scan a start tag when we are processing
1078 //  namespaces. This method is called after we've scanned the < of a
1079 //  start tag. So we have to get the element name, then scan the attributes,
1080 //  after which we are either going to see >, />, or attributes followed
1081 //  by one of those sequences.
scanStartTag(bool & gotData)1082 bool SGXMLScanner::scanStartTag(bool& gotData)
1083 {
1084     //  Assume we will still have data until proven otherwise. It will only
1085     //  ever be false if this is the root and its empty.
1086     gotData = true;
1087 
1088     // Reset element content
1089     fContent.reset();
1090 
1091     //  The current position is after the open bracket, so we need to read in
1092     //  in the element name.
1093     int prefixColonPos;
1094     if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
1095     {
1096         if (fQNameBuf.isEmpty())
1097             emitError(XMLErrs::ExpectedElementName);
1098         else
1099             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
1100         fReaderMgr.skipToChar(chOpenAngle);
1101         return false;
1102     }
1103 
1104     // See if its the root element
1105     const bool isRoot = fElemStack.isEmpty();
1106 
1107     // Skip any whitespace after the name
1108     fReaderMgr.skipPastSpaces();
1109 
1110     //  First we have to do the rawest attribute scan. We don't do any
1111     //  normalization of them at all, since we don't know yet what type they
1112     //  might be (since we need the element decl in order to do that.)
1113     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
1114     bool isEmpty;
1115     XMLSize_t attCount = rawAttrScan
1116     (
1117         qnameRawBuf
1118         , *fRawAttrList
1119         , isEmpty
1120     );
1121 
1122     // save the contentleafname and currentscope before addlevel, for later use
1123     ContentLeafNameTypeVector* cv = 0;
1124     XMLContentModel* cm = 0;
1125     unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
1126     bool laxThisOne = false;
1127     if (!isRoot)
1128     {
1129         // schema validator will have correct type if validating
1130         SchemaElementDecl* tempElement = (SchemaElementDecl*)
1131             fElemStack.topElement()->fThisElement;
1132         SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
1133         ComplexTypeInfo *currType = 0;
1134 
1135         if (fValidate)
1136         {
1137             currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
1138             if (currType)
1139                 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
1140             else // something must have gone wrong
1141                 modelType = SchemaElementDecl::Any;
1142         }
1143         else
1144         {
1145             currType = tempElement->getComplexTypeInfo();
1146         }
1147 
1148         if ((modelType == SchemaElementDecl::Mixed_Simple)
1149           ||  (modelType == SchemaElementDecl::Mixed_Complex)
1150           ||  (modelType == SchemaElementDecl::Children))
1151         {
1152             cm = currType->getContentModel();
1153             cv = cm->getContentLeafNameTypeVector();
1154             currentScope = fElemStack.getCurrentScope();
1155         }
1156         else if (modelType == SchemaElementDecl::Any) {
1157             laxThisOne = true;
1158         }
1159     }
1160 
1161     //  Now, since we might have to update the namespace map for this element,
1162     //  but we don't have the element decl yet, we just tell the element stack
1163     //  to expand up to get ready.
1164     XMLSize_t elemDepth = fElemStack.addLevel();
1165     fElemStack.setValidationFlag(fValidate);
1166     fElemStack.setPrefixColonPos(prefixColonPos);
1167 
1168     //  Check if there is any external schema location specified, and if we are at root,
1169     //  go through them first before scanning those specified in the instance document
1170     if (isRoot
1171         && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
1172 
1173         if (fExternalSchemaLocation)
1174             parseSchemaLocation(fExternalSchemaLocation, true);
1175         if (fExternalNoNamespaceSchemaLocation)
1176             resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
1177     }
1178 
1179     //  Make an initial pass through the list and find any xmlns attributes or
1180     //  schema attributes.
1181     if (attCount)
1182         scanRawAttrListforNameSpaces(attCount);
1183 
1184     //  Resolve the qualified name to a URI and name so that we can look up
1185     //  the element decl for this element. We have now update the prefix to
1186     //  namespace map so we should get the correct element now.
1187     unsigned int uriId = resolveQNameWithColon
1188     (
1189         qnameRawBuf
1190         , fPrefixBuf
1191         , ElemStack::Mode_Element
1192         , prefixColonPos
1193     );
1194 
1195     //if schema, check if we should lax or skip the validation of this element
1196     bool parentValidation = fValidate;
1197     if (cv) {
1198         QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
1199         // elementDepth will be > 0, as cv is only constructed if element is not
1200         // root.
1201         laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
1202     }
1203 
1204     //  Look up the element now in the grammar. This will get us back a
1205     //  generic element decl object. We tell him to fault one in if he does
1206     //  not find it.
1207     XMLElementDecl* elemDecl = 0;
1208     bool wasAdded = false;
1209     const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
1210     const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
1211 
1212     if (uriId != fEmptyNamespaceId) {
1213 
1214         // Check in current grammar before switching if necessary
1215         elemDecl = fGrammar->getElemDecl
1216         (
1217           uriId
1218           , nameRawBuf
1219           , qnameRawBuf
1220           , currentScope
1221         );
1222         if(!elemDecl)
1223         {
1224             // look in the list of undeclared elements, as would have been done
1225             // before we made grammars stateless:
1226             elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
1227         }
1228         // this is initialized correctly only if there is
1229         // no element decl.  The other uses in this scope will only
1230         // be encountered if there continues to be no element decl--which
1231         // implies that this will have been initialized correctly.
1232         unsigned int orgGrammarUri = uriId;
1233         if (!elemDecl && ( orgGrammarUri = fURIStringPool->getId(original_uriStr)) != uriId) {
1234             // not found, switch to the specified grammar
1235             const XMLCh* uriStr = getURIText(uriId);
1236             bool errorCondition = !switchGrammar(uriStr) && fValidate;
1237             if (errorCondition && !laxThisOne)
1238             {
1239                 fValidator->emitError
1240                 (
1241                     XMLValid::GrammarNotFound
1242                     ,uriStr
1243                 );
1244             }
1245 
1246             elemDecl = fGrammar->getElemDecl
1247             (
1248               uriId
1249               , nameRawBuf
1250               , qnameRawBuf
1251               , currentScope
1252             );
1253         }
1254 
1255         if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
1256             // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
1257             elemDecl = fGrammar->getElemDecl
1258                        (
1259                            uriId
1260                            , nameRawBuf
1261                            , qnameRawBuf
1262                            , Grammar::TOP_LEVEL_SCOPE
1263                        );
1264             if(!elemDecl)
1265             {
1266                 // look in the list of undeclared elements, as would have been done
1267                 // before we made grammars stateless:
1268                 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
1269             }
1270             if(!elemDecl) {
1271                 // still not found in specified uri
1272                 // try emptyNamespace see if element should be un-qualified.
1273                 // Use a temp variable until we decide this is the case
1274                 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
1275                            (
1276                                fEmptyNamespaceId
1277                                , nameRawBuf
1278                                , qnameRawBuf
1279                                , currentScope
1280                            );
1281                 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
1282                     fValidator->emitError
1283                     (
1284                         XMLValid::ElementNotUnQualified
1285                         , qnameRawBuf
1286                     );
1287                     elemDecl = tempElemDecl;
1288                 }
1289             }
1290         }
1291 
1292         if (!elemDecl) {
1293             // still not found, fault this in and issue error later
1294             // switch back to original grammar first (if necessary)
1295             if(orgGrammarUri != uriId)
1296             {
1297                 switchGrammar(original_uriStr);
1298             }
1299             elemDecl = new (fMemoryManager) SchemaElementDecl
1300             (
1301                 fPrefixBuf.getRawBuffer()
1302                 , nameRawBuf
1303                 , uriId
1304                 , SchemaElementDecl::Any
1305                 , Grammar::TOP_LEVEL_SCOPE
1306                 , fMemoryManager
1307             );
1308             elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
1309             wasAdded = true;
1310         }
1311     }
1312     else if (!elemDecl)
1313     {
1314         //the element has no prefix,
1315         //thus it is either a non-qualified element defined in current targetNS
1316         //or an element that is defined in the globalNS
1317 
1318         //try unqualifed first
1319         elemDecl = fGrammar->getElemDecl
1320                    (
1321                       uriId
1322                     , nameRawBuf
1323                     , qnameRawBuf
1324                     , currentScope
1325                     );
1326         if(!elemDecl)
1327         {
1328             // look in the list of undeclared elements, as would have been done
1329             // before we made grammars stateless:
1330             elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
1331         }
1332         // this is initialized correctly only if there is
1333         // no element decl.  The other uses in this scope will only
1334         // be encountered if there continues to be no element decl--which
1335         // implies that this will have been initialized correctly.
1336         unsigned int orgGrammarUri = fEmptyNamespaceId;
1337         if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
1338             //not found, switch grammar and try globalNS
1339             bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
1340             if (errorCondition && !laxThisOne)
1341             {
1342                 fValidator->emitError
1343                 (
1344                     XMLValid::GrammarNotFound
1345                   , XMLUni::fgZeroLenString
1346                 );
1347             }
1348 
1349             elemDecl = fGrammar->getElemDecl
1350             (
1351               uriId
1352               , nameRawBuf
1353               , qnameRawBuf
1354               , currentScope
1355             );
1356         }
1357 
1358         if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
1359             // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
1360             elemDecl = fGrammar->getElemDecl
1361                        (
1362                            uriId
1363                            , nameRawBuf
1364                            , qnameRawBuf
1365                            , Grammar::TOP_LEVEL_SCOPE
1366                        );
1367             if(!elemDecl)
1368             {
1369                 // look in the list of undeclared elements, as would have been done
1370                 // before we made grammars stateless:
1371                 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
1372             }
1373             if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
1374                 // still Not found in specified uri
1375                 // go to original Grammar again to see if element needs to be fully qualified.
1376                 bool errorCondition = !switchGrammar(original_uriStr) && fValidate;
1377                 if (errorCondition && !laxThisOne)
1378                 {
1379                     fValidator->emitError
1380                     (
1381                         XMLValid::GrammarNotFound
1382                         ,original_uriStr
1383                     );
1384                 }
1385 
1386                 // Use a temp variable until we decide this is the case
1387                 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
1388                            (
1389                                orgGrammarUri
1390                                , nameRawBuf
1391                                , qnameRawBuf
1392                                , currentScope
1393                            );
1394                 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
1395                     fValidator->emitError
1396                     (
1397                         XMLValid::ElementNotQualified
1398                         , qnameRawBuf
1399                     );
1400                     elemDecl=tempElemDecl;
1401                 }
1402             }
1403         }
1404 
1405         if (!elemDecl) {
1406             // still not found, fault this in and issue error later
1407             // switch back to original grammar first (if necessary)
1408             if(orgGrammarUri != fEmptyNamespaceId)
1409             {
1410                 switchGrammar(original_uriStr);
1411             }
1412             elemDecl = new (fMemoryManager) SchemaElementDecl
1413             (
1414                 fPrefixBuf.getRawBuffer()
1415                 , nameRawBuf
1416                 , uriId
1417                 , SchemaElementDecl::Any
1418                 , Grammar::TOP_LEVEL_SCOPE
1419                 , fMemoryManager
1420             );
1421             elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
1422             wasAdded = true;
1423         }
1424     }
1425 
1426     // this info needed for DOMTypeInfo
1427     fPSVIElemContext.fErrorOccurred = false;
1428 
1429     //  We do something different here according to whether we found the
1430     //  element or not.
1431     bool bXsiTypeSet= (fValidator)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
1432     if (wasAdded)
1433     {
1434         if (laxThisOne && !bXsiTypeSet) {
1435             fValidate = false;
1436             fElemStack.setValidationFlag(fValidate);
1437         }
1438 
1439         // If validating then emit an error
1440         if (fValidate)
1441         {
1442             // This is to tell the reuse Validator that this element was
1443             // faulted-in, was not an element in the grammar pool originally
1444             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
1445 
1446             if(!bXsiTypeSet)
1447             {
1448                 fValidator->emitError
1449                 (
1450                     XMLValid::ElementNotDefined
1451                     , elemDecl->getFullName()
1452                 );
1453                 fPSVIElemContext.fErrorOccurred = true;
1454             }
1455         }
1456     }
1457     else
1458     {
1459         // If its not marked declared and validating, then emit an error
1460         if (!elemDecl->isDeclared()) {
1461             if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
1462                 if(!bXsiTypeSet)
1463                     fPSVIElemContext.fErrorOccurred = true;
1464             }
1465             if (laxThisOne) {
1466                 fValidate = false;
1467                 fElemStack.setValidationFlag(fValidate);
1468             }
1469 
1470             if (fValidate && !bXsiTypeSet)
1471             {
1472                 fValidator->emitError
1473                 (
1474                     XMLValid::ElementNotDefined
1475                     , elemDecl->getFullName()
1476                 );
1477             }
1478         }
1479     }
1480 
1481 
1482     //  Now we can update the element stack to set the current element
1483     //  decl. We expanded the stack above, but couldn't store the element
1484     //  decl because we didn't know it yet.
1485     fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
1486     fElemStack.setCurrentURI(uriId);
1487 
1488     if (isRoot)
1489     {
1490         fRootGrammar = fGrammar;
1491         fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
1492     }
1493 
1494     if (fPSVIHandler)
1495     {
1496         fPSVIElemContext.fElemDepth++;
1497 
1498         if (elemDecl->isDeclared())
1499         {
1500             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
1501         }
1502         else
1503         {
1504             fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
1505 
1506             /******
1507              * While we report an error for historical reasons, this should
1508              * actually result in lax assessment - NG.
1509             if (isRoot && fValidate)
1510                 fPSVIElemContext.fErrorOccurred = true;
1511             ******/
1512         }
1513     }
1514 
1515     //  Validate the element
1516     if (fValidate)
1517     {
1518         fValidator->validateElement(elemDecl);
1519         if (((SchemaValidator*) fValidator)->getErrorOccurred())
1520             fPSVIElemContext.fErrorOccurred = true;
1521     }
1522 
1523     // squirrel away the element's QName, so that we can do an efficient
1524     // end-tag match
1525     fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
1526 
1527     ComplexTypeInfo* typeinfo = (fValidate)
1528         ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
1529         : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
1530 
1531     if (typeinfo) {
1532         currentScope = typeinfo->getScopeDefined();
1533 
1534         // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
1535         XMLCh* typeName = typeinfo->getTypeName();
1536         const int comma = XMLString::indexOf(typeName, chComma);
1537         if (comma > 0) {
1538             XMLBuffer prefixBuf(comma+1, fMemoryManager);
1539             prefixBuf.append(typeName, comma);
1540             const XMLCh* uriStr = prefixBuf.getRawBuffer();
1541 
1542             bool errorCondition = !switchGrammar(uriStr) && fValidate;
1543             if (errorCondition && !laxThisOne)
1544             {
1545                 fValidator->emitError
1546                 (
1547                     XMLValid::GrammarNotFound
1548                     , prefixBuf.getRawBuffer()
1549                 );
1550             }
1551         }
1552         else if (comma == 0) {
1553             bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
1554             if (errorCondition && !laxThisOne)
1555             {
1556                 fValidator->emitError
1557                 (
1558                     XMLValid::GrammarNotFound
1559                     , XMLUni::fgZeroLenString
1560                 );
1561             }
1562         }
1563     }
1564     fElemStack.setCurrentScope(currentScope);
1565 
1566     // Set element next state
1567     if (elemDepth >= fElemStateSize) {
1568         resizeElemState();
1569     }
1570 
1571     fElemState[elemDepth] = 0;
1572     fElemLoopState[elemDepth] = 0;
1573     fElemStack.setCurrentGrammar(fGrammar);
1574 
1575     //  If this is the first element and we are validating, check the root
1576     //  element.
1577     if (!isRoot && parentValidation)
1578     {
1579         //  If the element stack is not empty, then add this element as a
1580         //  child of the previous top element. If its empty, this is the root
1581         //  elem and is not the child of anything.
1582         fElemStack.addChild(elemDecl->getElementName(), true);
1583     }
1584 
1585     // PSVI handling:  must reset this, even if no attributes...
1586     if(getPSVIHandler())
1587         fPSVIAttrList->reset();
1588 
1589     //  Now lets get the fAttrList filled in. This involves faulting in any
1590     //  defaulted and fixed attributes and normalizing the values of any that
1591     //  we got explicitly.
1592     //
1593     //  We update the attCount value with the total number of attributes, but
1594     //  it goes in with the number of values we got during the raw scan of
1595     //  explictly provided attrs above.
1596     attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
1597 
1598     if(attCount)
1599     {
1600         // clean up after ourselves:
1601         // clear the map used to detect duplicate attributes
1602         fUndeclaredAttrRegistry->removeAll();
1603     }
1604 
1605     // activate identity constraints
1606     if (toCheckIdentityConstraint())
1607     {
1608         fICHandler->activateIdentityConstraint
1609                         (
1610                           (SchemaElementDecl*) elemDecl
1611                         , (int) elemDepth
1612                         , uriId
1613                         , fPrefixBuf.getRawBuffer()
1614                         , *fAttrList
1615                         , attCount
1616                         , fValidationContext
1617                         );
1618 
1619     }
1620 
1621     // Since the element may have default values, call start tag now regardless if it is empty or not
1622     // If we have a document handler, then tell it about this start tag
1623     if (fDocHandler)
1624     {
1625         fDocHandler->startElement
1626         (
1627             *elemDecl
1628             , uriId
1629             , fPrefixBuf.getRawBuffer()
1630             , *fAttrList
1631             , attCount
1632             , false
1633             , isRoot
1634         );
1635     } // may be where we output something...
1636 
1637     // if we have a PSVIHandler, now's the time to call
1638     // its handleAttributesPSVI method:
1639     if(fPSVIHandler)
1640     {
1641         QName *eName = elemDecl->getElementName();
1642         fPSVIHandler->handleAttributesPSVI
1643         (
1644             eName->getLocalPart()
1645             , fURIStringPool->getValueForId(eName->getURI())
1646             , fPSVIAttrList
1647         );
1648     }
1649 
1650     //  If empty, validate content right now if we are validating and then
1651     //  pop the element stack top. Else, we have to update the current stack
1652     //  top's namespace mapping elements.
1653     if (isEmpty)
1654     {
1655         // Pop the element stack back off since it'll never be used now
1656         fElemStack.popTop();
1657 
1658         // reset current type info
1659         DatatypeValidator* psviMemberType = 0;
1660         if (fGrammarType == Grammar::SchemaGrammarType)
1661         {
1662             if (fValidate && elemDecl->isDeclared())
1663             {
1664                 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1665                 if(!fPSVIElemContext.fCurrentTypeInfo)
1666                     fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1667                 else
1668                     fPSVIElemContext.fCurrentDV = 0;
1669                 if(fPSVIHandler)
1670                 {
1671                     fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
1672 
1673                     if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
1674                         fPSVIElemContext.fNormalizedValue = 0;
1675                 }
1676             }
1677             else
1678             {
1679                 fPSVIElemContext.fCurrentDV = 0;
1680                 fPSVIElemContext.fCurrentTypeInfo = 0;
1681                 fPSVIElemContext.fNormalizedValue = 0;
1682             }
1683         }
1684 
1685         // If validating, then insure that its legal to have no content
1686         if (fValidate)
1687         {
1688             XMLSize_t failure;
1689             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
1690             if (!res)
1691             {
1692                 // REVISIT:  in the case of xsi:type, this may
1693                 // return the wrong string...
1694                 fValidator->emitError
1695                 (
1696                     XMLValid::ElementNotValidForContent
1697                     , elemDecl->getFullName()
1698                     , elemDecl->getFormattedContentModel()
1699                 );
1700             }
1701 
1702             if (((SchemaValidator*) fValidator)->getErrorOccurred())
1703                 fPSVIElemContext.fErrorOccurred = true;
1704             // note that if we're empty, won't be a current DV
1705             else
1706             {
1707                 if (fPSVIHandler)
1708                 {
1709                     fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
1710                     if(fPSVIElemContext.fIsSpecified)
1711                         fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
1712                 }
1713                 if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
1714                     psviMemberType = fValidationContext->getValidatingMemberType();
1715             }
1716 
1717             // call matchers and de-activate context
1718             if (toCheckIdentityConstraint())
1719             {
1720                 fICHandler->deactivateContext
1721                        (
1722                         (SchemaElementDecl *) elemDecl
1723                       , fContent.getRawBuffer()
1724                       , fValidationContext
1725                       , fPSVIElemContext.fCurrentDV
1726                        );
1727             }
1728 
1729         }
1730         else if (fGrammarType == Grammar::SchemaGrammarType) {
1731             ((SchemaValidator*)fValidator)->resetNillable();
1732         }
1733 
1734         if (fPSVIHandler)
1735         {
1736             endElementPSVI
1737             (
1738                 (SchemaElementDecl*)elemDecl, psviMemberType
1739             );
1740         }
1741 
1742         // If we have a doc handler, tell it about the end tag
1743         if (fDocHandler)
1744         {
1745             fDocHandler->endElement
1746             (
1747                 *elemDecl
1748                 , uriId
1749                 , isRoot
1750                 , fPrefixBuf.getRawBuffer()
1751             );
1752         }
1753 
1754         // If the elem stack is empty, then it was an empty root
1755         if (isRoot)
1756             gotData = false;
1757         else
1758         {
1759             // Restore the grammar
1760             fGrammar = fElemStack.getCurrentGrammar();
1761             fGrammarType = fGrammar->getGrammarType();
1762             fValidator->setGrammar(fGrammar);
1763 
1764             // Restore the validation flag
1765             fValidate = fElemStack.getValidationFlag();
1766         }
1767     }
1768     else    // not empty
1769     {
1770 
1771         // send a partial element psvi
1772         if (fPSVIHandler)
1773         {
1774 
1775             ComplexTypeInfo*   curTypeInfo = 0;
1776             DatatypeValidator* curDV = 0;
1777             XSTypeDefinition*  typeDef = 0;
1778 
1779             if (fValidate && elemDecl->isDeclared())
1780             {
1781                 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1782 
1783                 if (curTypeInfo)
1784                 {
1785                     typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
1786                 }
1787                 else
1788                 {
1789                     curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1790 
1791                     if (curDV)
1792                     {
1793                         typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
1794                     }
1795                 }
1796             }
1797 
1798             fPSVIElement->reset
1799                 (
1800                   PSVIElement::VALIDITY_NOTKNOWN
1801                 , PSVIElement::VALIDATION_NONE
1802                 , fRootElemName
1803                 , ((SchemaValidator*) fValidator)->getIsElemSpecified()
1804                 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
1805                 , typeDef
1806                 , 0 //memberType
1807                 , fModel
1808                 , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
1809                 , 0
1810                 , 0
1811                 , 0
1812                 );
1813 
1814 
1815             fPSVIHandler->handlePartialElementPSVI
1816                 (
1817                   elemDecl->getBaseName()
1818                 , fURIStringPool->getValueForId(elemDecl->getURI())
1819                 , fPSVIElement
1820                 );
1821 
1822         }
1823 
1824         fErrorStack->push(fPSVIElemContext.fErrorOccurred);
1825     }
1826 
1827     return true;
1828 }
1829 
1830 
1831 // ---------------------------------------------------------------------------
1832 //  SGXMLScanner: Grammar preparsing
1833 // ---------------------------------------------------------------------------
loadGrammar(const InputSource & src,const short grammarType,const bool toCache)1834 Grammar* SGXMLScanner::loadGrammar(const   InputSource& src
1835                                    , const short        grammarType
1836                                    , const bool         toCache)
1837 {
1838     Grammar* loadedGrammar = 0;
1839 
1840     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
1841 
1842     try
1843     {
1844         fGrammarResolver->cacheGrammarFromParse(false);
1845 		// if the new grammar has to be cached, better use the already cached
1846 		// grammars, or the an exception will be thrown when caching an already
1847 		// cached grammar
1848         fGrammarResolver->useCachedGrammarInParse(toCache);
1849         fRootGrammar = 0;
1850 
1851         if (fValScheme == Val_Auto) {
1852             fValidate = true;
1853         }
1854 
1855         // Reset some status flags
1856         fInException = false;
1857         fStandalone = false;
1858         fErrorCount = 0;
1859         fHasNoDTD = true;
1860         fSeeXsi = false;
1861 
1862         if (grammarType == Grammar::SchemaGrammarType) {
1863             loadedGrammar = loadXMLSchemaGrammar(src, toCache);
1864         }
1865     }
1866     //  NOTE:
1867     //
1868     //  In all of the error processing below, the emitError() call MUST come
1869     //  before the flush of the reader mgr, or it will fail because it tries
1870     //  to find out the position in the XML source of the error.
1871     catch(const XMLErrs::Codes)
1872     {
1873         // This is a 'first failure' exception, so fall through
1874     }
1875     catch(const XMLValid::Codes)
1876     {
1877         // This is a 'first fatal error' type exit, so fall through
1878     }
1879     catch(const XMLException& excToCatch)
1880     {
1881         //  Emit the error and catch any user exception thrown from here. Make
1882         //  sure in all cases we flush the reader manager.
1883         fInException = true;
1884         try
1885         {
1886             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
1887                 emitError
1888                 (
1889                     XMLErrs::XMLException_Warning
1890                     , excToCatch.getCode()
1891                     , excToCatch.getMessage()
1892                 );
1893             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
1894                 emitError
1895                 (
1896                     XMLErrs::XMLException_Fatal
1897                     , excToCatch.getCode()
1898                     , excToCatch.getMessage()
1899                 );
1900             else
1901                 emitError
1902                 (
1903                     XMLErrs::XMLException_Error
1904                     , excToCatch.getCode()
1905                     , excToCatch.getMessage()
1906                 );
1907         }
1908         catch(const OutOfMemoryException&)
1909         {
1910             // This is a special case for out-of-memory
1911             // conditions, because resetting the ReaderMgr
1912             // can be problematic.
1913             resetReaderMgr.release();
1914 
1915             throw;
1916         }
1917     }
1918     catch(const OutOfMemoryException&)
1919     {
1920         // This is a special case for out-of-memory
1921         // conditions, because resetting the ReaderMgr
1922         // can be problematic.
1923         resetReaderMgr.release();
1924 
1925         throw;
1926     }
1927 
1928     return loadedGrammar;
1929 }
1930 
resetCachedGrammar()1931 void SGXMLScanner::resetCachedGrammar ()
1932 {
1933   fCachedSchemaInfoList->removeAll ();
1934 }
1935 
1936 // ---------------------------------------------------------------------------
1937 //  SGXMLScanner: Private helper methods
1938 // ---------------------------------------------------------------------------
1939 //  This method handles the common initialization, to avoid having to do
1940 //  it redundantly in multiple constructors.
commonInit()1941 void SGXMLScanner::commonInit()
1942 {
1943     //  Create the element state array
1944     fElemState = (unsigned int*) fMemoryManager->allocate
1945     (
1946         fElemStateSize * sizeof(unsigned int)
1947     ); //new unsigned int[fElemStateSize];
1948     fElemLoopState = (unsigned int*) fMemoryManager->allocate
1949     (
1950         fElemStateSize * sizeof(unsigned int)
1951     ); //new unsigned int[fElemStateSize];
1952 
1953     //  And we need one for the raw attribute scan. This just stores key/
1954     //  value string pairs (prior to any processing.)
1955     fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
1956     fRawAttrColonList = (int*) fMemoryManager->allocate
1957     (
1958         fRawAttrColonListSize * sizeof(int)
1959     );
1960 
1961     //  Create the Validator and init them
1962     fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
1963     initValidator(fSchemaValidator);
1964 
1965     // Create IdentityConstraint info
1966     fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
1967 
1968     //  Add the default entity entries for the character refs that must always
1969     //  be present.
1970     fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
1971     fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
1972     fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
1973     fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
1974     fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
1975     fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
1976     fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
1977     fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
1978     (
1979         131, false, fMemoryManager
1980     );
1981     fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
1982     fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
1983 
1984     fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
1985     fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
1986 
1987     if (fValidator)
1988     {
1989         if (!fValidator->handlesSchema())
1990             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
1991     }
1992     else
1993     {
1994         fValidator = fSchemaValidator;
1995     }
1996 }
1997 
cleanUp()1998 void SGXMLScanner::cleanUp()
1999 {
2000     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2001     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
2002     delete fSchemaGrammar;
2003     delete fEntityTable;
2004     delete fRawAttrList;
2005     fMemoryManager->deallocate(fRawAttrColonList);
2006     delete fSchemaValidator;
2007     delete fICHandler;
2008     delete fElemNonDeclPool;
2009     delete fAttDefRegistry;
2010     delete fUndeclaredAttrRegistry;
2011     delete fPSVIAttrList;
2012     if (fPSVIElement)
2013         delete fPSVIElement;
2014 
2015     if (fErrorStack)
2016         delete fErrorStack;
2017 
2018     delete fSchemaInfoList;
2019     delete fCachedSchemaInfoList;
2020 }
2021 
resizeElemState()2022 void SGXMLScanner::resizeElemState() {
2023 
2024     unsigned int newSize = fElemStateSize * 2;
2025     unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
2026     (
2027         newSize * sizeof(unsigned int)
2028     ); //new unsigned int[newSize];
2029     unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
2030     (
2031         newSize * sizeof(unsigned int)
2032     ); //new unsigned int[newSize];
2033 
2034     // Copy the existing values
2035     unsigned int index = 0;
2036     for (; index < fElemStateSize; index++)
2037     {
2038         newElemState[index] = fElemState[index];
2039         newElemLoopState[index] = fElemLoopState[index];
2040     }
2041 
2042     for (; index < newSize; index++)
2043         newElemLoopState[index] = newElemState[index] = 0;
2044 
2045     // Delete the old array and udpate our members
2046     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2047     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
2048     fElemState = newElemState;
2049     fElemLoopState = newElemLoopState;
2050     fElemStateSize = newSize;
2051 }
2052 
resizeRawAttrColonList()2053 void SGXMLScanner::resizeRawAttrColonList() {
2054 
2055     unsigned int newSize = fRawAttrColonListSize * 2;
2056     int* newRawAttrColonList = (int*) fMemoryManager->allocate
2057     (
2058         newSize * sizeof(int)
2059     ); //new int[newSize];
2060 
2061     // Copy the existing values
2062     unsigned int index = 0;
2063     for (; index < fRawAttrColonListSize; index++)
2064         newRawAttrColonList[index] = fRawAttrColonList[index];
2065 
2066     // Delete the old array and udpate our members
2067     fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
2068     fRawAttrColonList = newRawAttrColonList;
2069     fRawAttrColonListSize = newSize;
2070 }
2071 
2072 //  This method is called from scanStartTag() to build up the list of
2073 //  XMLAttr objects that will be passed out in the start tag callout. We
2074 //  get the key/value pairs from the raw scan of explicitly provided attrs,
2075 //  which have not been normalized. And we get the element declaration from
2076 //  which we will get any defaulted or fixed attribute defs and add those
2077 //  in as well.
2078 XMLSize_t
buildAttList(const RefVectorOf<KVStringPair> & providedAttrs,const XMLSize_t attCount,XMLElementDecl * elemDecl,RefVectorOf<XMLAttr> & toFill)2079 SGXMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
2080                           , const XMLSize_t                   attCount
2081                           ,       XMLElementDecl*             elemDecl
2082                           ,       RefVectorOf<XMLAttr>&       toFill)
2083 {
2084     //  Ask the element to clear the 'provided' flag on all of the att defs
2085     //  that it owns, and to return us a boolean indicating whether it has
2086     //  any defs.
2087     DatatypeValidator *currDV = 0;
2088     ComplexTypeInfo *currType = 0;
2089 
2090     if (fValidate)
2091     {
2092         currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
2093         if (!currType) {
2094             currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
2095         }
2096     }
2097 
2098     const bool hasDefs = (currType && fValidate)
2099             ? currType->hasAttDefs()
2100             : elemDecl->hasAttDefs();
2101 
2102     fElemCount++;
2103 
2104     //  If there are no expliclitily provided attributes and there are no
2105     //  defined attributes for the element, the we don't have anything to do.
2106     //  So just return zero in this case.
2107     if (!hasDefs && !attCount)
2108         return 0;
2109 
2110     // Keep up with how many attrs we end up with total
2111     XMLSize_t retCount = 0;
2112 
2113     //  And get the current size of the output vector. This lets us use
2114     //  existing elements until we fill it, then start adding new ones.
2115     const XMLSize_t curAttListSize = toFill.size();
2116 
2117     //  We need a buffer into which raw scanned attribute values will be
2118     //  normalized.
2119     XMLBufBid bbNormal(&fBufMgr);
2120     XMLBuffer& normBuf = bbNormal.getBuffer();
2121 
2122     XMLBufBid bbPrefix(&fBufMgr);
2123     XMLBuffer& prefixBuf = bbPrefix.getBuffer();
2124 
2125     //  Loop through our explicitly provided attributes, which are in the raw
2126     //  scanned form, and build up XMLAttr objects.
2127     XMLSize_t index;
2128     const XMLCh* prefPtr, *suffPtr;
2129     for (index = 0; index < attCount; index++)
2130     {
2131         PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
2132         PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
2133         const KVStringPair* curPair = providedAttrs.elementAt(index);
2134 
2135         //  We have to split the name into its prefix and name parts. Then
2136         //  we map the prefix to its URI.
2137         const XMLCh* const namePtr = curPair->getKey();
2138 
2139         const int colonInd = fRawAttrColonList[index];
2140         unsigned int uriId;
2141         if (colonInd != -1)
2142         {
2143             prefixBuf.set(namePtr, colonInd);
2144             prefPtr = prefixBuf.getRawBuffer();
2145             suffPtr = namePtr + colonInd + 1;
2146             //  Map the prefix to a URI id
2147             uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
2148         }
2149         else
2150         {
2151             // No colon, so we just have a name with no prefix
2152             prefPtr = XMLUni::fgZeroLenString;
2153             suffPtr = namePtr;
2154             // an empty prefix is always the empty namespace, when dealing with attributes
2155             uriId = fEmptyNamespaceId;
2156         }
2157 
2158         //  If the uri comes back as the xmlns or xml URI or its just a name
2159         //  and that name is 'xmlns', then we handle it specially. So set a
2160         //  boolean flag that lets us quickly below know which we are dealing
2161         //  with.
2162         const bool isNSAttr = (uriId == fEmptyNamespaceId)?
2163                                 XMLString::equals(suffPtr, XMLUni::fgXMLNSString) :
2164                                 (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI));
2165 
2166         //  If its not a special case namespace attr of some sort, then we
2167         //  do normal checking and processing.
2168         XMLAttDef::AttTypes attType = XMLAttDef::CData;
2169         DatatypeValidator *attrValidator = 0;
2170         PSVIAttribute *psviAttr = 0;
2171         bool otherXSI = false;
2172 
2173         if (isNSAttr)
2174         {
2175             if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
2176             {
2177                 emitError
2178                 (
2179                     XMLErrs::AttrAlreadyUsedInSTag
2180                     , namePtr
2181                     , elemDecl->getFullName()
2182                 );
2183                 fPSVIElemContext.fErrorOccurred = true;
2184             }
2185             else
2186             {
2187                 bool ValueValidate = false;
2188                 bool tokenizeBuffer = false;
2189 
2190                 if (uriId == fXMLNSNamespaceId)
2191                 {
2192                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2193                 }
2194                 else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
2195                 {
2196                     if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
2197                     {
2198                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
2199 
2200                         ValueValidate = true;
2201                     }
2202                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
2203                     {
2204                         // use anyURI as the validator
2205                         // tokenize the data and use the anyURI data for each piece
2206                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2207                         //We should validate each value in the schema location however
2208                         //this lead to a performance degradation of around 4%.  Since
2209                         //the first value of each pair needs to match what is in the
2210                         //schema document and the second value needs to be valid in
2211                         //order to open the document we won't validate it.  Need to
2212                         //do performance analysis of the anyuri datatype.
2213                         //ValueValidate = true;
2214                         ValueValidate = false;
2215                         tokenizeBuffer = true;
2216                     }
2217                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
2218                     {
2219                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2220                         //We should validate this value however
2221                         //this lead to a performance degradation of around 4%.  Since
2222                         //the value needs to be valid in
2223                         //order to open the document we won't validate it.  Need to
2224                         //do performance analysis of the anyuri datatype.
2225                         //ValueValidate = true;
2226                         ValueValidate = false;
2227                     }
2228                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
2229                     {
2230                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
2231 
2232                         ValueValidate = true;
2233                     }
2234                     else {
2235                         otherXSI = true;
2236                     }
2237                 }
2238 
2239                 if (!otherXSI) {
2240                     normalizeAttRawValue
2241                     (
2242                         namePtr
2243                         , curPair->getValue()
2244                         , normBuf
2245                     );
2246 
2247                     if (fValidate && attrValidator && ValueValidate)
2248                     {
2249                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true);
2250 
2251                         ValidationContext* const    theContext =
2252                             getValidationContext();
2253 
2254                         if (theContext)
2255                         {
2256                             try
2257                             {
2258                                 if (tokenizeBuffer) {
2259                                     XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager);
2260                                     while (tokenizer.hasMoreTokens()) {
2261                                         attrValidator->validate(
2262                                             tokenizer.nextToken(),
2263                                             theContext,
2264                                             fMemoryManager);
2265                                     }
2266                                 }
2267                                 else {
2268                                     attrValidator->validate(
2269                                         normBuf.getRawBuffer(),
2270                                         theContext,
2271                                         fMemoryManager);
2272                                 }
2273                             }
2274                             catch (const XMLException& idve)
2275                             {
2276                                 fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage());
2277                             }
2278                         }
2279                     }
2280 
2281                     if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
2282                     {
2283 	                    psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
2284 	                    XSSimpleTypeDefinition *validatingType = (attrValidator)
2285                             ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator)
2286                             : 0;
2287                         // no attribute declarations for these...
2288 	                    psviAttr->reset(
2289 	                        fRootElemName
2290 	                        , PSVIItem::VALIDITY_NOTKNOWN
2291 	                        , PSVIItem::VALIDATION_NONE
2292 	                        , validatingType
2293 	                        , 0
2294 	                        , 0
2295                             , false
2296 	                        , 0
2297                             , attrValidator
2298                         );
2299                     }
2300                 }
2301             }
2302         }
2303 
2304         if (!isNSAttr || otherXSI)
2305         {
2306             // Some checking for attribute wild card first (for schema)
2307             bool laxThisOne = false;
2308             bool skipThisOne = false;
2309 
2310             XMLAttDef* attDefForWildCard = 0;
2311             XMLAttDef*  attDef = 0;
2312 
2313             if (fGrammarType == Grammar::SchemaGrammarType) {
2314 
2315                 //retrieve the att def
2316                 SchemaAttDef* attWildCard = 0;
2317                 if (currType) {
2318                     attDef = currType->getAttDef(suffPtr, uriId);
2319                     attWildCard = currType->getAttWildCard();
2320                 }
2321                 else if (!currDV) { // check explicitly-set wildcard
2322                     attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
2323                 }
2324 
2325                 // if not found or faulted in - check for a matching wildcard attribute
2326                 // if no matching wildcard attribute, check (un)qualifed cases and flag
2327                 // appropriate errors
2328                 if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
2329 
2330                     if (attWildCard) {
2331                         //if schema, see if we should lax or skip the validation of this attribute
2332                         if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
2333 
2334                             if(!skipThisOne)
2335                             {
2336                                 SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
2337                                 if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
2338                                     RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
2339                                     if (attRegistry) {
2340                                         attDefForWildCard = attRegistry->get(suffPtr);
2341                                     }
2342                                 }
2343                             }
2344                         }
2345                     }
2346                     else if (currType) {
2347                         // not found, see if the attDef should be qualified or not
2348                         if (uriId == fEmptyNamespaceId) {
2349                             attDef = currType->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
2350                             if (fValidate
2351                                 && attDef
2352                                 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
2353                                 // the attribute should be qualified
2354                                 fValidator->emitError
2355                                 (
2356                                     XMLValid::AttributeNotQualified
2357                                     , attDef->getFullName()
2358                                 );
2359                                 fPSVIElemContext.fErrorOccurred = true;
2360                                 if (getPSVIHandler())
2361                                 {
2362                                     attrValid = PSVIItem::VALIDITY_INVALID;
2363                                 }
2364                             }
2365                         }
2366                         else {
2367                             attDef = currType->getAttDef(suffPtr, fEmptyNamespaceId);
2368                             if (fValidate
2369                                 && attDef
2370                                 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
2371                                 // the attribute should be qualified
2372                                 fValidator->emitError
2373                                 (
2374                                     XMLValid::AttributeNotUnQualified
2375                                     , attDef->getFullName()
2376                                 );
2377                                 fPSVIElemContext.fErrorOccurred = true;
2378                                 if (getPSVIHandler())
2379                                 {
2380                                     attrValid = PSVIItem::VALIDITY_INVALID;
2381                                 }
2382                             }
2383                         }
2384                     }
2385                 }
2386             }
2387 
2388             // now need to prepare for duplicate detection
2389             if(attDef)
2390             {
2391                 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
2392                 if(!curCountPtr)
2393                 {
2394                     curCountPtr = getNewUIntPtr();
2395                     *curCountPtr = fElemCount;
2396                     fAttDefRegistry->put(attDef, curCountPtr);
2397                 }
2398                 else if(*curCountPtr < fElemCount)
2399                     *curCountPtr = fElemCount;
2400                 else
2401                 {
2402                     emitError
2403                     (
2404                         XMLErrs::AttrAlreadyUsedInSTag
2405                         , attDef->getFullName()
2406                         , elemDecl->getFullName()
2407                     );
2408                     fPSVIElemContext.fErrorOccurred = true;
2409                 }
2410             }
2411             else
2412             {
2413                 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
2414                 {
2415                     emitError
2416                     (
2417                         XMLErrs::AttrAlreadyUsedInSTag
2418                         , namePtr
2419                         , elemDecl->getFullName()
2420                     );
2421                     fPSVIElemContext.fErrorOccurred = true;
2422                 }
2423             }
2424 
2425             // if we've found either an attDef or an attDefForWildCard,
2426             // then we're doing full validation and it may still be valid.
2427             if(!attDef && !attDefForWildCard)
2428             {
2429                 if(!laxThisOne && !skipThisOne)
2430                 {
2431                     fPSVIElemContext.fErrorOccurred = true;
2432                 }
2433                 if(getPSVIHandler())
2434                 {
2435                     if(!laxThisOne && !skipThisOne)
2436                     {
2437                         attrValid = PSVIItem::VALIDITY_INVALID;
2438                     }
2439                     else if(laxThisOne)
2440                     {
2441                         attrValid = PSVIItem::VALIDITY_NOTKNOWN;
2442                         attrAssessed = PSVIItem::VALIDATION_PARTIAL;
2443                     }
2444                     else
2445                     {
2446                         attrValid = PSVIItem::VALIDITY_NOTKNOWN;
2447                         attrAssessed = PSVIItem::VALIDATION_NONE;
2448                     }
2449                 }
2450             }
2451 
2452             bool errorCondition = fValidate && !attDefForWildCard && !attDef;
2453             if (errorCondition && !skipThisOne && !laxThisOne)
2454             {
2455                 //
2456                 //  Its not valid for this element, so issue an error if we are
2457                 //  validating.
2458                 //
2459                 XMLBufBid bbMsg(&fBufMgr);
2460                 XMLBuffer& bufMsg = bbMsg.getBuffer();
2461                 if (uriId != fEmptyNamespaceId) {
2462                     XMLBufBid bbURI(&fBufMgr);
2463                     XMLBuffer& bufURI = bbURI.getBuffer();
2464 
2465                     getURIText(uriId, bufURI);
2466 
2467                     bufMsg.append(chOpenCurly);
2468                     bufMsg.append(bufURI.getRawBuffer());
2469                     bufMsg.append(chCloseCurly);
2470                 }
2471                 bufMsg.append(suffPtr);
2472                 fValidator->emitError
2473                 (
2474                     XMLValid::AttNotDefinedForElement
2475                     , bufMsg.getRawBuffer()
2476                     , elemDecl->getFullName()
2477                 );
2478             }
2479 
2480             //  Now normalize the raw value since we have the attribute type. We
2481             //  don't care about the return status here. If it failed, an error
2482             //  was issued, which is all we care about.
2483             if (attDefForWildCard) {
2484                 normalizeAttValue(
2485                     attDefForWildCard, namePtr, curPair->getValue(), normBuf
2486                 );
2487 
2488                 //  If we found an attdef for this one, then lets validate it.
2489                 const XMLCh* xsNormalized = normBuf.getRawBuffer();
2490                 DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
2491                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2492                 {
2493                     // normalize the attribute according to schema whitespace facet
2494                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
2495                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
2496                     if (fNormalizeData && fValidate) {
2497                         normBuf.set(xsNormalized);
2498                     }
2499                 }
2500 
2501                 if (fValidate ) {
2502                     fValidator->validateAttrValue(
2503                         attDefForWildCard, xsNormalized, false, elemDecl
2504                     );
2505                     attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
2506                     if(((SchemaValidator *)fValidator)->getErrorOccurred())
2507                     {
2508                         fPSVIElemContext.fErrorOccurred = true;
2509                         if(getPSVIHandler())
2510                             attrValid = PSVIItem::VALIDITY_INVALID;
2511                     }
2512                 }
2513                 else { // no decl; default DOMTypeInfo to anySimpleType
2514                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2515                 }
2516 
2517                 // Save the type for later use
2518                 attType = attDefForWildCard->getType();
2519             }
2520             else {
2521                 normalizeAttValue(
2522                     attDef, namePtr, curPair->getValue(), normBuf
2523                 );
2524 
2525                 //  If we found an attdef for this one, then lets validate it.
2526                 if (attDef)
2527                 {
2528                     const XMLCh* xsNormalized = normBuf.getRawBuffer();
2529                     if (fGrammarType == Grammar::SchemaGrammarType)
2530                     {
2531                         DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
2532                         if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2533                         {
2534                             // normalize the attribute according to schema whitespace facet
2535                             ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
2536                             xsNormalized = fWSNormalizeBuf.getRawBuffer();
2537                             if (fNormalizeData && fValidate && !skipThisOne) {
2538                                 normBuf.set(xsNormalized);
2539                             }
2540                         }
2541                     }
2542 
2543                     if (fValidate && !skipThisOne)
2544                     {
2545                         fValidator->validateAttrValue(
2546                             attDef, xsNormalized, false, elemDecl
2547                         );
2548                         attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
2549                         if(((SchemaValidator *)fValidator)->getErrorOccurred())
2550                         {
2551                             fPSVIElemContext.fErrorOccurred = true;
2552                             if(getPSVIHandler())
2553                                 attrValid = PSVIItem::VALIDITY_INVALID;
2554                         }
2555                     }
2556                     else {
2557                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2558                     }
2559                 }
2560                 else {
2561                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2562                 }
2563 
2564                 // Save the type for later use
2565                 if (attDef)
2566                 {
2567                     attType = attDef->getType();
2568                 }
2569             }
2570 
2571             // now fill in the PSVIAttributes entry for this attribute:
2572 	        if(getPSVIHandler())
2573 	        {
2574 	            psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
2575 	            SchemaAttDef *actualAttDef = 0;
2576 	            if(attDef)
2577 	                actualAttDef = (SchemaAttDef *)attDef;
2578 	            else if (attDefForWildCard)
2579 	                actualAttDef = (SchemaAttDef *)attDefForWildCard;
2580                 if(actualAttDef)
2581                 {
2582 	                XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef);
2583                     DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator();
2584 	                XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType);
2585 	                if(attrValid != PSVIItem::VALIDITY_VALID)
2586 	                {
2587 	                    psviAttr->reset
2588                         (
2589 	                        fRootElemName
2590 	                        , attrValid
2591 	                        , attrAssessed
2592 	                        , validatingType
2593 	                        , 0
2594 	                        , actualAttDef->getValue()
2595 	                        , false
2596 	                        , attrDecl
2597                             , 0
2598 	                    );
2599 	                }
2600 	                else
2601 	                {
2602 	                    XSSimpleTypeDefinition *memberType = 0;
2603 	                    if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
2604 	                        memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator);
2605 	                    psviAttr->reset
2606                         (
2607 	                        fRootElemName
2608 	                        , attrValid
2609 	                        , attrAssessed
2610 	                        , validatingType
2611 	                        , memberType
2612 	                        , actualAttDef->getValue()
2613 	                        , false
2614 	                        , attrDecl
2615                             , (memberType)?attrValidator:attrDataType
2616 	                    );
2617 	                }
2618                 }
2619                 else
2620                 {
2621 	                psviAttr->reset
2622                     (
2623 	                    fRootElemName
2624 	                    , attrValid
2625 	                    , attrAssessed
2626 	                    , 0
2627 	                    , 0
2628                         , 0
2629 	                    , false
2630 	                    , 0
2631                         , 0
2632 	                );
2633                 }
2634 	        }
2635         }
2636 
2637         //  Add this attribute to the attribute list that we use to pass them
2638         //  to the handler. We reuse its existing elements but expand it as
2639         //  required.
2640         XMLAttr* curAttr;
2641         if (retCount >= curAttListSize)
2642         {
2643             curAttr = new (fMemoryManager) XMLAttr
2644             (
2645                 uriId
2646                 , suffPtr
2647                 , prefPtr
2648                 , normBuf.getRawBuffer()
2649                 , attType
2650                 , true
2651                 , fMemoryManager
2652             );
2653             toFill.addElement(curAttr);
2654         }
2655         else
2656         {
2657             curAttr = toFill.elementAt(retCount);
2658             curAttr->set
2659             (
2660                 uriId
2661                 , suffPtr
2662                 , prefPtr
2663                 , normBuf.getRawBuffer()
2664                 , attType
2665             );
2666             curAttr->setSpecified(true);
2667         }
2668         if(psviAttr)
2669             psviAttr->setValue(curAttr->getValue());
2670 
2671         // Bump the count of attrs in the list
2672         retCount++;
2673     }
2674 
2675     //  Now, if there are any attributes declared by this element, let's
2676     //  go through them and make sure that any required ones are provided,
2677     //  and fault in any fixed ones and defaulted ones that are not provided
2678     //  literally.
2679     if (hasDefs)
2680     {
2681         // Check after all specified attrs are scanned
2682         // (1) report error for REQUIRED attrs that are missing (V_TAGc)
2683         // (2) add default attrs if missing (FIXED and NOT_FIXED)
2684 
2685         XMLAttDefList& attDefList = getAttDefList(currType, elemDecl);
2686 
2687         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
2688         {
2689             // Get the current att def, for convenience and its def type
2690             XMLAttDef *curDef = &attDefList.getAttDef(i);
2691             const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
2692 
2693             unsigned int *attCountPtr = fAttDefRegistry->get(curDef);
2694             if (!attCountPtr || *attCountPtr < fElemCount)
2695             { // did not occur
2696                 // note that since there is no attribute information
2697                 // item present, there is no PSVI infoset to augment here *except*
2698                 // that the element is invalid
2699 
2700                 //the attribute is not provided
2701                 if (fValidate)
2702                 {
2703                     // If we are validating and its required, then an error
2704                     if ((defType == XMLAttDef::Required) ||
2705                         (defType == XMLAttDef::Required_And_Fixed)  )
2706 
2707                     {
2708                         fValidator->emitError
2709                         (
2710                             XMLValid::RequiredAttrNotProvided
2711                             , curDef->getFullName()
2712                         );
2713                         fPSVIElemContext.fErrorOccurred = true;
2714                     }
2715                     else if ((defType == XMLAttDef::Default) ||
2716                              (defType == XMLAttDef::Fixed)  )
2717                     {
2718                         if (fStandalone && curDef->isExternal())
2719                         {
2720                             // XML 1.0 Section 2.9
2721                             // Document is standalone, so attributes must not be defaulted.
2722                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName());
2723                         }
2724                     }
2725                 }
2726 
2727                 //  Fault in the value if needed, and bump the att count.
2728                 if ((defType == XMLAttDef::Default)
2729                     ||  (defType == XMLAttDef::Fixed))
2730                 {
2731                     // Let the validator pass judgement on the attribute value
2732                     if (fValidate)
2733                     {
2734                         fValidator->validateAttrValue
2735                         (
2736                             curDef
2737                             , curDef->getValue()
2738                             , false
2739                             , elemDecl
2740                         );
2741                     }
2742 
2743                     XMLAttr* curAtt;
2744                     if (retCount >= curAttListSize)
2745                     {
2746                         curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
2747                         fValidator->faultInAttr(*curAtt, *curDef);
2748                         fAttrList->addElement(curAtt);
2749                     }
2750                     else
2751                     {
2752                         curAtt = fAttrList->elementAt(retCount);
2753                         fValidator->faultInAttr(*curAtt, *curDef);
2754                     }
2755 
2756                     // Indicate it was not explicitly specified and bump count
2757                     curAtt->setSpecified(false);
2758                     retCount++;
2759                     if(getPSVIHandler())
2760                     {
2761                         QName *attName = ((SchemaAttDef *)curDef)->getAttName();
2762                         PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill
2763                         (
2764                             attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI())
2765                         );
2766                         XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef);
2767                         DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator();
2768                         XSSimpleTypeDefinition *defAttrType =
2769                             (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType);
2770                         // would have occurred during validation of default value
2771                         if(((SchemaValidator *)fValidator)->getErrorOccurred())
2772                         {
2773                             defAttrToFill->reset(
2774                                 fRootElemName
2775                                 , PSVIItem::VALIDITY_INVALID
2776                                 , PSVIItem::VALIDATION_FULL
2777                                 , defAttrType
2778                                 , 0
2779                                 , curDef->getValue()
2780                                 , true
2781                                 , defAttrDecl
2782                                 , 0
2783                             );
2784                         }
2785                         else
2786                         {
2787                             XSSimpleTypeDefinition *defAttrMemberType = 0;
2788                             if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
2789                             {
2790                                 defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject
2791                                 (
2792                                     ((SchemaValidator*)fValidator)->getMostRecentAttrValidator()
2793                                 );
2794                             }
2795                             defAttrToFill->reset
2796                             (
2797                                 fRootElemName
2798                                 , PSVIItem::VALIDITY_VALID
2799                                 , PSVIItem::VALIDATION_FULL
2800                                 , defAttrType
2801                                 , defAttrMemberType
2802                                 , curDef->getValue()
2803                                 , true
2804                                 , defAttrDecl
2805                                 , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType
2806                             );
2807                         }
2808                         defAttrToFill->setValue(curDef->getValue());
2809                     }
2810                 }
2811             }
2812             else if (attCountPtr)
2813             {
2814                 //attribute is provided
2815                 // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
2816                 if (defType == XMLAttDef::Prohibited && fValidate)
2817                 {
2818                     fValidator->emitError
2819                     (
2820                         XMLValid::ProhibitedAttributePresent
2821                         , curDef->getFullName()
2822                     );
2823                     fPSVIElemContext.fErrorOccurred = true;
2824                     if (getPSVIHandler())
2825                     {
2826                         QName *attQName = ((SchemaAttDef *)curDef)->getAttName();
2827                         // bad luck...
2828                         PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName
2829                         (
2830                             attQName->getLocalPart(),
2831                             fURIStringPool->getValueForId(attQName->getURI())
2832                         );
2833                         prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID);
2834                     }
2835                 }
2836             }
2837         }
2838     }
2839 
2840     return retCount;
2841 }
2842 
2843 
2844 //  This method will take a raw attribute value and normalize it according to
2845 //  the rules of the attribute type. It will put the resulting value into the
2846 //  passed buffer.
2847 //
2848 //  This code assumes that escaped characters in the original value (via char
2849 //  refs) are prefixed by a 0xFFFF character. This is because some characters
2850 //  are legal if escaped only. And some escape chars are not subject to
2851 //  normalization rules.
normalizeAttValue(const XMLAttDef * const attDef,const XMLCh * const attName,const XMLCh * const value,XMLBuffer & toFill)2852 bool SGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
2853                                       , const XMLCh* const        attName
2854                                       , const XMLCh* const        value
2855                                       ,       XMLBuffer&          toFill)
2856 {
2857     // A simple state value for a whitespace processing state machine
2858     enum States
2859     {
2860         InWhitespace
2861         , InContent
2862     };
2863 
2864     // Get the type and name
2865     const XMLAttDef::AttTypes type = (attDef)
2866                             ?attDef->getType()
2867                             :XMLAttDef::CData;
2868 
2869     // Assume its going to go fine, and empty the target buffer in preperation
2870     bool retVal = true;
2871     toFill.reset();
2872 
2873     // Get attribute def - to check to see if it's declared externally or not
2874     bool  isAttExternal = (attDef)
2875                         ?attDef->isExternal()
2876                         :false;
2877 
2878     //  Loop through the chars of the source value and normalize it according
2879     //  to the type.
2880     States curState = InContent;
2881     bool firstNonWS = false;
2882     XMLCh nextCh;
2883     const XMLCh* srcPtr = value;
2884 
2885     if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
2886         while (*srcPtr) {
2887             //  Get the next character from the source. We have to watch for
2888             //  escaped characters (which are indicated by a 0xFFFF value followed
2889             //  by the char that was escaped.)
2890             nextCh = *srcPtr;
2891 
2892             // Do we have an escaped character ?
2893             if (nextCh == 0xFFFF)
2894             {
2895                 nextCh = *++srcPtr;
2896             }
2897             else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) {
2898                 // Check Validity Constraint for Standalone document declaration
2899                 // XML 1.0, Section 2.9
2900                 if (fStandalone && fValidate && isAttExternal)
2901                 {
2902                      // Can't have a standalone document declaration of "yes" if  attribute
2903                      // values are subject to normalisation
2904                      fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
2905                 }
2906                 nextCh = chSpace;
2907             }
2908             else if (nextCh == chOpenAngle) {
2909                 //  If its not escaped, then make sure its not a < character, which is
2910                 //  not allowed in attribute values.
2911                 emitError(XMLErrs::BracketInAttrValue, attName);
2912                 retVal = false;
2913             }
2914 
2915             // Add this char to the target buffer
2916             toFill.append(nextCh);
2917 
2918             // And move up to the next character in the source
2919             srcPtr++;
2920         }
2921     }
2922     else {
2923         while (*srcPtr)
2924         {
2925             //  Get the next character from the source. We have to watch for
2926             //  escaped characters (which are indicated by a 0xFFFF value followed
2927             //  by the char that was escaped.)
2928             nextCh = *srcPtr;
2929 
2930             // Do we have an escaped character ?
2931             if (nextCh == 0xFFFF)
2932             {
2933                 nextCh = *++srcPtr;
2934             }
2935             else if (nextCh == chOpenAngle) {
2936                 //  If its not escaped, then make sure its not a < character, which is
2937                 //  not allowed in attribute values.
2938                 emitError(XMLErrs::BracketInAttrValue, attName);
2939                 retVal = false;
2940             }
2941 
2942             if (curState == InWhitespace)
2943             {
2944                 if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2945                 {
2946                     if (firstNonWS)
2947                         toFill.append(chSpace);
2948                     curState = InContent;
2949                     firstNonWS = true;
2950                 }
2951                 else
2952                 {
2953                     srcPtr++;
2954                     continue;
2955                 }
2956             }
2957             else if (curState == InContent)
2958             {
2959                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2960                 {
2961                     curState = InWhitespace;
2962                     srcPtr++;
2963 
2964                     // Check Validity Constraint for Standalone document declaration
2965                     // XML 1.0, Section 2.9
2966                     if (fStandalone && fValidate && isAttExternal)
2967                     {
2968                         if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr))
2969                         {
2970                             // Can't have a standalone document declaration of "yes" if  attribute
2971                             // values are subject to normalisation
2972                             fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
2973                         }
2974                     }
2975                     continue;
2976                 }
2977                 firstNonWS = true;
2978             }
2979 
2980             // Add this char to the target buffer
2981             toFill.append(nextCh);
2982 
2983             // And move up to the next character in the source
2984             srcPtr++;
2985         }
2986     }
2987 
2988     return retVal;
2989 }
2990 
2991 //  This method will just normalize the input value as CDATA without
2992 //  any standalone checking.
normalizeAttRawValue(const XMLCh * const attrName,const XMLCh * const value,XMLBuffer & toFill)2993 bool SGXMLScanner::normalizeAttRawValue( const   XMLCh* const        attrName
2994                                       , const XMLCh* const        value
2995                                       ,       XMLBuffer&          toFill)
2996 {
2997     // Assume its going to go fine, and empty the target buffer in preperation
2998     bool retVal = true;
2999     toFill.reset();
3000 
3001     //  Loop through the chars of the source value and normalize it according
3002     //  to the type.
3003     bool escaped;
3004     XMLCh nextCh;
3005     const XMLCh* srcPtr = value;
3006     while (*srcPtr)
3007     {
3008         //  Get the next character from the source. We have to watch for
3009         //  escaped characters (which are indicated by a 0xFFFF value followed
3010         //  by the char that was escaped.)
3011         nextCh = *srcPtr;
3012         escaped = (nextCh == 0xFFFF);
3013         if (escaped)
3014             nextCh = *++srcPtr;
3015 
3016         //  If its not escaped, then make sure its not a < character, which is
3017         //  not allowed in attribute values.
3018         if (!escaped && (*srcPtr == chOpenAngle))
3019         {
3020             emitError(XMLErrs::BracketInAttrValue, attrName);
3021             retVal = false;
3022         }
3023 
3024         if (!escaped)
3025         {
3026             //  NOTE: Yes this is a little redundant in that a 0x20 is
3027             //  replaced with an 0x20. But its faster to do this (I think)
3028             //  than checking for 9, A, and D separately.
3029             if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
3030                 nextCh = chSpace;
3031         }
3032 
3033         // Add this char to the target buffer
3034         toFill.append(nextCh);
3035 
3036         // And move up to the next character in the source
3037         srcPtr++;
3038     }
3039     return retVal;
3040 }
3041 
3042 //  This method will reset the scanner data structures, and related plugged
3043 //  in stuff, for a new scan session. We get the input source for the primary
3044 //  XML entity, create the reader for it, and push it on the stack so that
3045 //  upon successful return from here we are ready to go.
scanReset(const InputSource & src)3046 void SGXMLScanner::scanReset(const InputSource& src)
3047 {
3048 
3049     //  This call implicitly tells us that we are going to reuse the scanner
3050     //  if it was previously used. So tell the validator to reset itself.
3051     //
3052     //  But, if the fUseCacheGrammar flag is set, then don't reset it.
3053     //
3054     //  NOTE:   The ReaderMgr is flushed on the way out, because that is
3055     //          required to insure that files are closed.
3056     fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
3057     fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
3058 
3059     // Clear transient schema info list.
3060     //
3061     fSchemaInfoList->removeAll ();
3062 
3063     // fModel may need updating, as fGrammarResolver could have cleaned it
3064     if(fModel && getPSVIHandler())
3065         fModel = fGrammarResolver->getXSModel();
3066 
3067     // Create dummy schema grammar
3068     if (!fSchemaGrammar) {
3069         fSchemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3070     }
3071 
3072     fGrammar = fSchemaGrammar;
3073     fGrammarType = Grammar::DTDGrammarType;
3074     fRootGrammar = 0;
3075 
3076     fValidator->setGrammar(fGrammar);
3077     if (fValidatorFromUser) {
3078 
3079         ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter);
3080         ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver);
3081         ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal);
3082     }
3083 
3084     // Reset validation
3085     fValidate = (fValScheme == Val_Always) ? true : false;
3086 
3087     //  And for all installed handlers, send reset events. This gives them
3088     //  a chance to flush any cached data.
3089     if (fDocHandler)
3090         fDocHandler->resetDocument();
3091     if (fEntityHandler)
3092         fEntityHandler->resetEntities();
3093     if (fErrorReporter)
3094         fErrorReporter->resetErrors();
3095 
3096     // Clear out the id reference list
3097     resetValidationContext();
3098 
3099     // Reset the Root Element Name
3100     fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
3101     fRootElemName = 0;
3102 
3103     // Reset IdentityConstraints
3104     if (fICHandler)
3105         fICHandler->reset();
3106 
3107     //  Reset the element stack, and give it the latest ids for the special
3108     //  URIs it has to know about.
3109     fElemStack.reset
3110     (
3111         fEmptyNamespaceId
3112         , fUnknownNamespaceId
3113         , fXMLNamespaceId
3114         , fXMLNSNamespaceId
3115     );
3116 
3117     if (!fSchemaNamespaceId)
3118         fSchemaNamespaceId  = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
3119 
3120     // Reset some status flags
3121     fInException = false;
3122     fStandalone = false;
3123     fErrorCount = 0;
3124     fHasNoDTD = true;
3125     fSeeXsi = false;
3126     fDoNamespaces = true;
3127     fDoSchema = true;
3128 
3129     // Reset PSVI context
3130     // Note that we always need this around for DOMTypeInfo
3131     if (!fPSVIElement)
3132         fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager);
3133 
3134     if (!fErrorStack)
3135     {
3136         fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager);
3137     }
3138     else
3139     {
3140         fErrorStack->removeAllElements();
3141     }
3142 
3143     resetPSVIElemContext();
3144 
3145     // Reset the validators
3146     fSchemaValidator->reset();
3147     fSchemaValidator->setErrorReporter(fErrorReporter);
3148     fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
3149     fSchemaValidator->setGrammarResolver(fGrammarResolver);
3150     if (fValidatorFromUser)
3151         fValidator->reset();
3152 
3153     //  Handle the creation of the XML reader object for this input source.
3154     //  This will provide us with transcoding and basic lexing services.
3155     XMLReader* newReader = fReaderMgr.createReader
3156     (
3157         src
3158         , true
3159         , XMLReader::RefFrom_NonLiteral
3160         , XMLReader::Type_General
3161         , XMLReader::Source_External
3162         , fCalculateSrcOfs
3163         , fLowWaterMark
3164     );
3165 
3166     if (!newReader) {
3167         if (src.getIssueFatalErrorIfNotFound())
3168             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
3169         else
3170             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
3171     }
3172 
3173     // Push this read onto the reader manager
3174     fReaderMgr.pushReader(newReader, 0);
3175 
3176     // and reset security-related things if necessary:
3177     if(fSecurityManager != 0)
3178     {
3179         fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
3180         fEntityExpansionCount = 0;
3181     }
3182     fElemCount = 0;
3183     if(fUIntPoolRowTotal >= 32)
3184     { // 8 KB tied up with validating attributes...
3185         fAttDefRegistry->removeAll();
3186         recreateUIntPool();
3187     }
3188     else
3189     {
3190         // note that this will implicitly reset the values of the hashtables,
3191         // though their buckets will still be tied up
3192         resetUIntPool();
3193     }
3194     fUndeclaredAttrRegistry->removeAll();
3195 }
3196 
3197 
3198 //  This method is called between markup in content. It scans for character
3199 //  data that is sent to the document handler. It watches for any markup
3200 //  characters that would indicate that the character data has ended. It also
3201 //  handles expansion of general and character entities.
3202 //
3203 //  sendData() is a local static helper for this method which handles some
3204 //  code that must be done in three different places here.
sendCharData(XMLBuffer & toSend)3205 void SGXMLScanner::sendCharData(XMLBuffer& toSend)
3206 {
3207     // If no data in the buffer, then nothing to do
3208     if (toSend.isEmpty())
3209         return;
3210 
3211     //  We do different things according to whether we are validating or
3212     //  not. If not, its always just characters; else, it depends on the
3213     //  current element's content model.
3214     if (fValidate)
3215     {
3216         // Get the raw data we need for the callback
3217         const XMLCh* rawBuf = toSend.getRawBuffer();
3218         const XMLSize_t len = toSend.getLen();
3219 
3220         // Get the character data opts for the current element
3221         XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
3222         // And see if the current element is a 'Children' style content model
3223         ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
3224         if(currType)
3225         {
3226             SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
3227             if(modelType == SchemaElementDecl::Children ||
3228                modelType == SchemaElementDecl::ElementOnlyEmpty)
3229                 charOpts = XMLElementDecl::SpacesOk;
3230             else if(modelType == SchemaElementDecl::Empty)
3231                 charOpts = XMLElementDecl::NoCharData;
3232         }
3233 
3234         // should not be necessary once PSVI method on element decls
3235         // are removed
3236         if (charOpts == XMLElementDecl::NoCharData)
3237         {
3238             // They definitely cannot handle any type of char data
3239             fValidator->emitError(XMLValid::NoCharDataInCM);
3240             if (getPSVIHandler())
3241             {
3242                 // REVISIT:
3243                 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3244             }
3245         }
3246         else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
3247         {
3248             //  Its all spaces. So, if they can take spaces, then send it
3249             //  as ignorable whitespace. If they can handle any char data
3250             //  send it as characters.
3251             if (charOpts == XMLElementDecl::SpacesOk) {
3252                 if (fDocHandler)
3253                     fDocHandler->ignorableWhitespace(rawBuf, len, false);
3254             }
3255             else if (charOpts == XMLElementDecl::AllCharData)
3256             {
3257                 XMLSize_t xsLen;
3258                 const XMLCh* xsNormalized;
3259                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
3260                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
3261                 {
3262                     // normalize the character according to schema whitespace facet
3263                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
3264                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
3265                     xsLen = fWSNormalizeBuf.getLen();
3266                 }
3267                 else {
3268                     xsNormalized = rawBuf;
3269                     xsLen = len;
3270                 }
3271 
3272                 // tell the schema validation about the character data for checkContent later
3273                 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
3274 
3275                 // call all active identity constraints
3276                 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
3277                     fContent.append(xsNormalized, xsLen);
3278                 }
3279 
3280                 if (fDocHandler) {
3281                     if (fNormalizeData) {
3282                         fDocHandler->docCharacters(xsNormalized, xsLen, false);
3283                     }
3284                     else {
3285                         fDocHandler->docCharacters(rawBuf, len, false);
3286                     }
3287                 }
3288             }
3289         }
3290         else
3291         {
3292             //  If they can take any char data, then send it. Otherwise, they
3293             //  can only handle whitespace and can't handle this stuff so
3294             //  issue an error.
3295             if (charOpts == XMLElementDecl::AllCharData)
3296             {
3297                 XMLSize_t xsLen;
3298                 const XMLCh *xsNormalized;
3299                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
3300                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
3301                 {
3302                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
3303                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
3304                     xsLen = fWSNormalizeBuf.getLen();
3305                 }
3306                 else {
3307                     xsNormalized = rawBuf;
3308                     xsLen = len;
3309                 }
3310 
3311                 // tell the schema validation about the character data for checkContent later
3312                 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
3313 
3314                 // call all active identity constraints
3315                 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
3316                     fContent.append(xsNormalized, xsLen);
3317                 }
3318 
3319                 if (fDocHandler) {
3320                     if (fNormalizeData) {
3321                         fDocHandler->docCharacters(xsNormalized, xsLen, false);
3322                     }
3323                     else {
3324                         fDocHandler->docCharacters(rawBuf, len, false);
3325                     }
3326                 }
3327             }
3328             else
3329             {
3330                 fValidator->emitError(XMLValid::NoCharDataInCM);
3331                 if (getPSVIHandler())
3332                 {
3333                     // REVISIT:
3334                     // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3335                 }
3336             }
3337         }
3338     }
3339     else
3340     {
3341         // call all active identity constraints
3342         if (toCheckIdentityConstraint() && fICHandler->getMatcherCount())
3343             fContent.append(toSend.getRawBuffer(), toSend.getLen());
3344 
3345         // Always assume its just char data if not validating
3346         if (fDocHandler)
3347             fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
3348     }
3349 
3350     // Reset buffer
3351     toSend.reset();
3352 }
3353 
3354 
3355 
3356 //  This method is called with a key/value string pair that represents an
3357 //  xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
3358 //  current top of the element stack based on this data. We know that when
3359 //  we get here, that it is one of these forms, so we don't bother confirming
3360 //  it.
3361 //
3362 //  But we have to ensure
3363 //      1. xxx is not xmlns
3364 //      2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
3365 //      3. yyy is not XMLUni::fgXMLNSURIName
3366 //      4. if xxx is not null, then yyy cannot be an empty string.
updateNSMap(const XMLCh * const attrName,const XMLCh * const attrValue)3367 void SGXMLScanner::updateNSMap(const  XMLCh* const    attrName
3368                               , const XMLCh* const    attrValue)
3369 {
3370     updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon));
3371 }
3372 
updateNSMap(const XMLCh * const attrName,const XMLCh * const attrValue,const int colonOfs)3373 void SGXMLScanner::updateNSMap(const  XMLCh* const    attrName
3374                               , const XMLCh* const    attrValue
3375                               , const int colonOfs)
3376 {
3377     // We need a buffer to normalize the attribute value into
3378     XMLBufBid bbNormal(&fBufMgr);
3379     XMLBuffer& normalBuf = bbNormal.getBuffer();
3380 
3381     //  Normalize the value into the passed buffer. In this case, we don't
3382     //  care about the return value. An error was issued for the error, which
3383     //  is all we care about here.
3384     normalizeAttRawValue(attrName, attrValue, normalBuf);
3385     XMLCh* namespaceURI = normalBuf.getRawBuffer();
3386 
3387     //  We either have the default prefix (""), or we point it into the attr
3388     //  name parameter. Note that the xmlns is not the prefix we care about
3389     //  here. To us, the 'prefix' is really the local part of the attrName
3390     //  parameter.
3391     //
3392     //  Check 1. xxx is not xmlns
3393     //        2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
3394     //        3. yyy is not XMLUni::fgXMLNSURIName
3395     //        4. if xxx is not null, then yyy cannot be an empty string.
3396     const XMLCh* prefPtr = XMLUni::fgZeroLenString;
3397     if (colonOfs != -1) {
3398         prefPtr = &attrName[colonOfs + 1];
3399 
3400         if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
3401             emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
3402         else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
3403             if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
3404                 emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
3405         }
3406 
3407         if (!namespaceURI)
3408             emitError(XMLErrs::NoEmptyStrNamespace, attrName);
3409         else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
3410             emitError(XMLErrs::NoEmptyStrNamespace, attrName);
3411     }
3412 
3413     if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
3414         emitError(XMLErrs::NoUseOfxmlnsURI);
3415     else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
3416         if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
3417             emitError(XMLErrs::XMLURINotMatchXMLPrefix);
3418     }
3419 
3420     //  Ok, we have to get the unique id for the attribute value, which is the
3421     //  URI that this value should be mapped to. The validator has the
3422     //  namespace string pool, so we ask him to find or add this new one. Then
3423     //  we ask the element stack to add this prefix to URI Id mapping.
3424     fElemStack.addPrefix
3425     (
3426         prefPtr
3427         , fURIStringPool->addOrFind(namespaceURI)
3428     );
3429 }
3430 
scanRawAttrListforNameSpaces(XMLSize_t attCount)3431 void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
3432 {
3433     //  Make an initial pass through the list and find any xmlns attributes or
3434     //  schema attributes.
3435     //  When we find one, send it off to be used to update the element stack's
3436     //  namespace mappings.
3437     for (XMLSize_t index = 0; index < attCount; index++)
3438     {
3439         // each attribute has the prefix:suffix="value"
3440         const KVStringPair* curPair = fRawAttrList->elementAt(index);
3441         const XMLCh* rawPtr = curPair->getKey();
3442 
3443         //  If either the key begins with "xmlns:" or its just plain
3444         //  "xmlns", then use it to update the map.
3445         if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
3446         ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
3447         {
3448             const XMLCh* valuePtr = curPair->getValue();
3449 
3450             updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
3451 
3452             // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
3453             if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
3454                 fSeeXsi = true;
3455             }
3456         }
3457     }
3458 
3459     // walk through the list again to deal with "xsi:...."
3460     if (fSeeXsi)
3461     {
3462         //  Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
3463         XMLBufBid bbXsi(&fBufMgr);
3464         XMLBuffer& fXsiType = bbXsi.getBuffer();
3465 
3466         for (XMLSize_t index = 0; index < attCount; index++)
3467         {
3468             // each attribute has the prefix:suffix="value"
3469             const KVStringPair* curPair = fRawAttrList->elementAt(index);
3470             const XMLCh* rawPtr = curPair->getKey();
3471             const XMLCh* prefPtr;
3472 
3473             int   colonInd = fRawAttrColonList[index];
3474 
3475             if (colonInd != -1) {
3476                 fURIBuf.set(rawPtr, colonInd);
3477                 prefPtr = fURIBuf.getRawBuffer();
3478             }
3479             else {
3480                 prefPtr = XMLUni::fgZeroLenString;
3481             }
3482 
3483             // if schema URI has been seen, scan for the schema location and uri
3484             // and resolve the schema grammar; or scan for schema type
3485             if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
3486 
3487                 const XMLCh* valuePtr = curPair->getValue();
3488                 const XMLCh*  suffPtr = &rawPtr[colonInd + 1];
3489 
3490                 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
3491                     parseSchemaLocation(valuePtr);
3492                 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
3493                     resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
3494 
3495                 if( fValidator && fValidator->handlesSchema() )
3496                 {
3497                     if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
3498                     {
3499                         // normalize the attribute according to schema whitespace facet
3500                         DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
3501                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiType, true);
3502                     }
3503                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
3504                     {
3505                         // normalize the attribute according to schema whitespace facet
3506                         XMLBuffer& fXsiNil = fBufMgr.bidOnBuffer();
3507                         DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
3508                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiNil, true);
3509                         if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE))
3510                             ((SchemaValidator*)fValidator)->setNillable(true);
3511                         else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE))
3512                             ((SchemaValidator*)fValidator)->setNillable(false);
3513                         else
3514                             emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr);
3515                         fBufMgr.releaseBuffer(fXsiNil);
3516                     }
3517                 }
3518             }
3519         }
3520 
3521         if (fValidator && fValidator->handlesSchema()) {
3522             if (!fXsiType.isEmpty()) {
3523                 int colonPos = -1;
3524                 unsigned int uriId = resolveQName (
3525                       fXsiType.getRawBuffer()
3526                     , fPrefixBuf
3527                     , ElemStack::Mode_Element
3528                     , colonPos
3529                 );
3530                 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
3531             }
3532         }
3533     }
3534 }
3535 
parseSchemaLocation(const XMLCh * const schemaLocationStr,bool ignoreLoadSchema)3536 void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema)
3537 {
3538     BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr, fMemoryManager);
3539     Janitor<BaseRefVectorOf<XMLCh> > janLoc(schemaLocation);
3540 
3541     XMLSize_t size = schemaLocation->size();
3542     if (size % 2 != 0 ) {
3543         emitError(XMLErrs::BadSchemaLocation);
3544     } else {
3545         // We need a buffer to normalize the attribute value into
3546         XMLBuffer normalBuf(1023, fMemoryManager);
3547         for(XMLSize_t i=0; i<size; i=i+2) {
3548             normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, schemaLocation->elementAt(i), normalBuf);
3549             resolveSchemaGrammar(schemaLocation->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema);
3550         }
3551     }
3552 }
3553 
resolveSchemaGrammar(const XMLCh * const loc,const XMLCh * const uri,bool ignoreLoadSchema)3554 void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) {
3555 
3556     Grammar* grammar = 0;
3557 
3558     {
3559         XMLSchemaDescriptionImpl    theSchemaDescription(uri, fMemoryManager);
3560         theSchemaDescription.setLocationHints(loc);
3561         grammar = fGrammarResolver->getGrammar(&theSchemaDescription);
3562     }
3563 
3564     // If multi-import is enabled, make sure the existing grammar came
3565     // from the import directive. Otherwise we may end up reloading
3566     // the same schema that came from the external grammar pool. Ideally,
3567     // we would move fSchemaInfoList to XMLGrammarPool so that it survives
3568     // the destruction of the scanner in which case we could rely on the
3569     // same logic we use to weed out duplicate schemas below.
3570     //
3571     if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType ||
3572         (getHandleMultipleImports() &&
3573          ((XMLSchemaDescription*)grammar->getGrammarDescription())->
3574          getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
3575     {
3576       if (fLoadSchema || ignoreLoadSchema)
3577       {
3578         XSDDOMParser parser(0, fMemoryManager, 0);
3579 
3580         parser.setValidationScheme(XercesDOMParser::Val_Never);
3581         parser.setDoNamespaces(true);
3582         parser.setUserEntityHandler(fEntityHandler);
3583         parser.setUserErrorReporter(fErrorReporter);
3584 
3585         //Normalize sysId
3586         XMLBufBid nnSys(&fBufMgr);
3587         XMLBuffer& normalizedSysId = nnSys.getBuffer();
3588         XMLString::removeChar(loc, 0xFFFF, normalizedSysId);
3589         const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
3590 
3591         // Create a buffer for expanding the system id
3592         XMLBufBid bbSys(&fBufMgr);
3593         XMLBuffer& expSysId = bbSys.getBuffer();
3594 
3595         //  Allow the entity handler to expand the system id if they choose
3596         //  to do so.
3597         InputSource* srcToFill = 0;
3598         if (fEntityHandler)
3599         {
3600             if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
3601                 expSysId.set(normalizedURI);
3602 
3603             ReaderMgr::LastExtEntityInfo lastInfo;
3604             fReaderMgr.getLastExtEntityInfo(lastInfo);
3605             XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
3606                             expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId,
3607                             &fReaderMgr);
3608             srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3609         }
3610         else
3611         {
3612             expSysId.set(normalizedURI);
3613         }
3614 
3615         //  If they didn't create a source via the entity handler, then we
3616         //  have to create one on our own.
3617         if (!srcToFill)
3618         {
3619             if (fDisableDefaultEntityResolution)
3620                 return;
3621 
3622             ReaderMgr::LastExtEntityInfo lastInfo;
3623             fReaderMgr.getLastExtEntityInfo(lastInfo);
3624 
3625             XMLURL urlTmp(fMemoryManager);
3626             if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
3627                 (urlTmp.isRelative()))
3628             {
3629                 if (!fStandardUriConformant)
3630                 {
3631                     XMLBufBid  ddSys(&fBufMgr);
3632                     XMLBuffer& resolvedSysId = ddSys.getBuffer();
3633                     XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
3634 
3635                     srcToFill = new (fMemoryManager) LocalFileInputSource
3636                     (
3637                         lastInfo.systemId
3638                         , resolvedSysId.getRawBuffer()
3639                         , fMemoryManager
3640                     );
3641                 }
3642                 else
3643                     ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3644             }
3645             else
3646             {
3647                 if (fStandardUriConformant && urlTmp.hasInvalidChar())
3648                     ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3649 
3650                 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
3651             }
3652         }
3653 
3654         // Put a janitor on the input source
3655         Janitor<InputSource> janSrc(srcToFill);
3656 
3657         // Check if this exact schema has already been seen.
3658         //
3659         const XMLCh* sysId = srcToFill->getSystemId();
3660         unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId;
3661         SchemaInfo* importSchemaInfo = 0;
3662 
3663         if (fUseCachedGrammar)
3664           importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId);
3665 
3666         if (!importSchemaInfo && !fToCacheGrammar)
3667           importSchemaInfo = fSchemaInfoList->get(sysId, uriId);
3668 
3669         if (importSchemaInfo)
3670         {
3671           // We haven't added any new grammars so it is safe to just
3672           // return.
3673           //
3674           return;
3675         }
3676 
3677         // Should just issue warning if the schema is not found
3678         bool flag = srcToFill->getIssueFatalErrorIfNotFound();
3679         srcToFill->setIssueFatalErrorIfNotFound(false);
3680 
3681         parser.parse(*srcToFill);
3682 
3683         // Reset the InputSource
3684         srcToFill->setIssueFatalErrorIfNotFound(flag);
3685 
3686         if (parser.getSawFatal() && fExitOnFirstFatal)
3687             emitError(XMLErrs::SchemaScanFatalError);
3688 
3689         DOMDocument* document = parser.getDocument(); //Our Grammar
3690 
3691         if (document != 0) {
3692 
3693             DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
3694             if (root != 0)
3695             {
3696                 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
3697                 bool newGrammar = false;
3698                 if (!XMLString::equals(newUri, uri)) {
3699                     if (fValidate || fValScheme == Val_Auto) {
3700                         fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
3701                     }
3702 
3703                     grammar = fGrammarResolver->getGrammar(newUri);
3704                     newGrammar = true;
3705                 }
3706 
3707                 if (!grammar ||
3708                     grammar->getGrammarType() == Grammar::DTDGrammarType ||
3709                     (getHandleMultipleImports() &&
3710                      ((XMLSchemaDescription*) grammar->getGrammarDescription())->
3711                      getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
3712                 {
3713                     // If we switched namespace URI, recheck the schema info.
3714                     //
3715                     if (newGrammar)
3716                     {
3717                       unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId;
3718 
3719                       if (fUseCachedGrammar)
3720                         importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId);
3721 
3722                       if (!importSchemaInfo && !fToCacheGrammar)
3723                         importSchemaInfo = fSchemaInfoList->get(sysId, newUriId);
3724 
3725                       if (importSchemaInfo)
3726                         return;
3727                     }
3728 
3729                     //  Since we have seen a grammar, set our validation flag
3730                     //  at this point if the validation scheme is auto
3731                     if (fValScheme == Val_Auto && !fValidate) {
3732                         fValidate = true;
3733                         fElemStack.setValidationFlag(fValidate);
3734                     }
3735 
3736                     bool grammarFound = grammar &&
3737                       grammar->getGrammarType() == Grammar::SchemaGrammarType;
3738 
3739                     SchemaGrammar* schemaGrammar;
3740 
3741                     if (grammarFound) {
3742                       schemaGrammar = (SchemaGrammar*) grammar;
3743                     }
3744                     else {
3745                       schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3746                     }
3747 
3748                     XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
3749 
3750                     gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
3751                     gramDesc->setLocationHints(sysId);
3752 
3753                     TraverseSchema traverseSchema
3754                     (
3755                         root
3756                         , fURIStringPool
3757                         , schemaGrammar
3758                         , fGrammarResolver
3759                         , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList
3760                         , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList
3761                         , this
3762                         , sysId
3763                         , fEntityHandler
3764                         , fErrorReporter
3765                         , fMemoryManager
3766                         , grammarFound
3767                     );
3768 
3769                     // Reset the now invalid schema roots in the collected
3770                     // schema info entries.
3771                     //
3772                     {
3773                       RefHash2KeysTableOfEnumerator<SchemaInfo> i (
3774                         fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList);
3775 
3776                       while (i.hasMoreElements ())
3777                         i.nextElement().resetRoot ();
3778                     }
3779 
3780                     if (fGrammarType == Grammar::DTDGrammarType) {
3781                         fGrammar = schemaGrammar;
3782                         fGrammarType = Grammar::SchemaGrammarType;
3783                         fValidator->setGrammar(fGrammar);
3784                     }
3785 
3786                     if (fValidate) {
3787                         //  validate the Schema scan so far
3788                         fValidator->preContentValidation(false);
3789                     }
3790                 }
3791             }
3792         }
3793       }
3794     }
3795     else
3796     {
3797         //  Since we have seen a grammar, set our validation flag
3798         //  at this point if the validation scheme is auto
3799         if (fValScheme == Val_Auto && !fValidate) {
3800             fValidate = true;
3801             fElemStack.setValidationFlag(fValidate);
3802         }
3803 
3804         // we have seen a schema, so set up the fValidator as fSchemaValidator
3805         if (fGrammarType == Grammar::DTDGrammarType) {
3806             fGrammar = grammar;
3807             fGrammarType = Grammar::SchemaGrammarType;
3808             fValidator->setGrammar(fGrammar);
3809         }
3810     }
3811     // update fModel; rely on the grammar resolver to do this
3812     // efficiently
3813     if(getPSVIHandler())
3814         fModel = fGrammarResolver->getXSModel();
3815 }
3816 
resolveSystemId(const XMLCh * const sysId,const XMLCh * const pubId)3817 InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId
3818                                           ,const XMLCh* const pubId)
3819 {
3820     //Normalize sysId
3821     XMLBufBid nnSys(&fBufMgr);
3822     XMLBuffer& normalizedSysId = nnSys.getBuffer();
3823     XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
3824     const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
3825 
3826     // Create a buffer for expanding the system id
3827     XMLBufBid bbSys(&fBufMgr);
3828     XMLBuffer& expSysId = bbSys.getBuffer();
3829 
3830     //  Allow the entity handler to expand the system id if they choose
3831     //  to do so.
3832     InputSource* srcToFill = 0;
3833     if (fEntityHandler)
3834     {
3835         if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
3836             expSysId.set(normalizedURI);
3837 
3838         ReaderMgr::LastExtEntityInfo lastInfo;
3839         fReaderMgr.getLastExtEntityInfo(lastInfo);
3840         XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
3841                             expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
3842                             &fReaderMgr);
3843         srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3844     }
3845     else
3846     {
3847         expSysId.set(normalizedURI);
3848     }
3849 
3850     //  If they didn't create a source via the entity handler, then we
3851     //  have to create one on our own.
3852     if (!srcToFill)
3853     {
3854         if (fDisableDefaultEntityResolution)
3855             return 0;
3856 
3857         ReaderMgr::LastExtEntityInfo lastInfo;
3858         fReaderMgr.getLastExtEntityInfo(lastInfo);
3859 
3860         XMLURL urlTmp(fMemoryManager);
3861         if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
3862             (urlTmp.isRelative()))
3863         {
3864             if (!fStandardUriConformant)
3865             {
3866                 XMLBufBid  ddSys(&fBufMgr);
3867                 XMLBuffer& resolvedSysId = ddSys.getBuffer();
3868                 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
3869 
3870                 srcToFill = new (fMemoryManager) LocalFileInputSource
3871                 (
3872                     lastInfo.systemId
3873                     , resolvedSysId.getRawBuffer()
3874                     , fMemoryManager
3875                 );
3876             }
3877             else
3878                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3879         }
3880         else
3881         {
3882             if (fStandardUriConformant && urlTmp.hasInvalidChar())
3883                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3884             srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
3885         }
3886     }
3887 
3888     return srcToFill;
3889 }
3890 
3891 
3892 // ---------------------------------------------------------------------------
3893 //  SGXMLScanner: Private grammar preparsing methods
3894 // ---------------------------------------------------------------------------
loadXMLSchemaGrammar(const InputSource & src,const bool toCache)3895 Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
3896                                           const bool toCache)
3897 {
3898    // Reset the validators
3899     fSchemaValidator->reset();
3900     fSchemaValidator->setErrorReporter(fErrorReporter);
3901     fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
3902     fSchemaValidator->setGrammarResolver(fGrammarResolver);
3903 
3904     if (fValidatorFromUser)
3905         fValidator->reset();
3906 
3907     XSDDOMParser parser(0, fMemoryManager, 0);
3908 
3909     parser.setValidationScheme(XercesDOMParser::Val_Never);
3910     parser.setDoNamespaces(true);
3911     parser.setUserEntityHandler(fEntityHandler);
3912     parser.setUserErrorReporter(fErrorReporter);
3913 
3914     // Should just issue warning if the schema is not found
3915     bool flag = src.getIssueFatalErrorIfNotFound();
3916     ((InputSource&) src).setIssueFatalErrorIfNotFound(false);
3917 
3918     parser.parse(src);
3919 
3920     // Reset the InputSource
3921     ((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
3922 
3923     if (parser.getSawFatal() && fExitOnFirstFatal)
3924         emitError(XMLErrs::SchemaScanFatalError);
3925 
3926     DOMDocument* document = parser.getDocument(); //Our Grammar
3927 
3928     if (document != 0) {
3929 
3930         DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
3931         if (root != 0)
3932         {
3933             const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
3934             Grammar* grammar = fGrammarResolver->getGrammar(nsUri);
3935 
3936             // Check if this exact schema has already been seen.
3937             //
3938             const XMLCh* sysId = src.getSystemId();
3939             SchemaInfo* importSchemaInfo = 0;
3940 
3941             if (grammar)
3942             {
3943               if (nsUri && *nsUri)
3944                 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri));
3945               else
3946                 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId);
3947             }
3948 
3949             if (!importSchemaInfo)
3950             {
3951               bool grammarFound = grammar &&
3952                 grammar->getGrammarType() == Grammar::SchemaGrammarType &&
3953                 getHandleMultipleImports();
3954 
3955               SchemaGrammar* schemaGrammar;
3956 
3957               if (grammarFound)
3958                 schemaGrammar = (SchemaGrammar*) grammar;
3959               else
3960                 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3961 
3962               XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
3963               gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
3964               gramDesc->setLocationHints(sysId);
3965 
3966               TraverseSchema traverseSchema
3967                 (
3968                   root
3969                   , fURIStringPool
3970                   , schemaGrammar
3971                   , fGrammarResolver
3972                   , fCachedSchemaInfoList
3973                   , toCache ? fCachedSchemaInfoList : fSchemaInfoList
3974                   , this
3975                   , sysId
3976                   , fEntityHandler
3977                   , fErrorReporter
3978                   , fMemoryManager
3979                   , grammarFound
3980                 );
3981 
3982               grammar = schemaGrammar;
3983 
3984               // Reset the now invalid schema roots in the collected
3985               // schema info entries.
3986               //
3987               {
3988                 RefHash2KeysTableOfEnumerator<SchemaInfo> i (
3989                   toCache ? fCachedSchemaInfoList : fSchemaInfoList);
3990 
3991                 while (i.hasMoreElements ())
3992                   i.nextElement().resetRoot ();
3993               }
3994             }
3995 
3996             if (fValidate) {
3997               //  validate the Schema scan so far
3998               fValidator->setGrammar(grammar);
3999               fValidator->preContentValidation(false);
4000             }
4001 
4002             if (toCache) {
4003               fGrammarResolver->cacheGrammars();
4004             }
4005 
4006             if(getPSVIHandler())
4007               fModel = fGrammarResolver->getXSModel();
4008 
4009             return grammar;
4010         }
4011     }
4012 
4013     return 0;
4014 }
4015 
4016 
4017 
4018 // ---------------------------------------------------------------------------
4019 //  SGXMLScanner: Private parsing methods
4020 // ---------------------------------------------------------------------------
4021 
4022 //  This method is called to do a raw scan of an attribute value. It does not
4023 //  do normalization (since we don't know their types yet.) It just scans the
4024 //  value and does entity expansion.
4025 //
4026 //  End of entity's must be dealt with here. During DTD scan, they can come
4027 //  from external entities. During content, they can come from any entity.
4028 //  We just eat the end of entity and continue with our scan until we come
4029 //  to the closing quote. If an unterminated value causes us to go through
4030 //  subsequent entities, that will cause errors back in the calling code,
4031 //  but there's little we can do about it here.
basicAttrValueScan(const XMLCh * const attrName,XMLBuffer & toFill)4032 bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
4033 {
4034     // Reset the target buffer
4035     toFill.reset();
4036 
4037     // Get the next char which must be a single or double quote
4038     XMLCh quoteCh;
4039     if (!fReaderMgr.skipIfQuote(quoteCh))
4040         return false;
4041 
4042     //  We have to get the current reader because we have to ignore closing
4043     //  quotes until we hit the same reader again.
4044     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
4045 
4046     //  Loop until we get the attribute value. Note that we use a double
4047     //  loop here to avoid the setup/teardown overhead of the exception
4048     //  handler on every round.
4049     while (true)
4050     {
4051         try
4052         {
4053             while(true)
4054             {
4055                 XMLCh nextCh = fReaderMgr.getNextChar();
4056 
4057                 if (nextCh != quoteCh)
4058                 {
4059                     if (nextCh != chAmpersand)
4060                     {
4061                         if ((nextCh < 0xD800) || (nextCh > 0xDFFF))
4062                         {
4063                             // Its got to at least be a valid XML character
4064                             if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4065                             {
4066                                 if (nextCh == 0)
4067                                     ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
4068 
4069                                 XMLCh tmpBuf[9];
4070                                 XMLString::binToText
4071                                 (
4072                                     nextCh
4073                                     , tmpBuf
4074                                     , 8
4075                                     , 16
4076                                     , fMemoryManager
4077                                 );
4078                                 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
4079                             }
4080                         } else // its a surrogate
4081                         {
4082                             // Deal with surrogate pairs
4083 
4084                             //  we expect a a leading surrogate.
4085                             if (nextCh <= 0xDBFF)
4086                             {
4087                                 toFill.append(nextCh);
4088 
4089                                 //  process the trailing surrogate
4090                                 nextCh = fReaderMgr.getNextChar();
4091 
4092                                 //  it should be a trailing surrogate.
4093                                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
4094                                 {
4095                                     emitError(XMLErrs::Expected2ndSurrogateChar);
4096                                 }
4097                             } else
4098                             {
4099                                 //  Its a trailing surrogate, but we are not expecting it
4100                                 emitError(XMLErrs::Unexpected2ndSurrogateChar);
4101                             }
4102                         }
4103                     } else // its a chAmpersand
4104                     {
4105                         //  Check for an entity ref . We ignore the empty flag in
4106                         //  this one.
4107 
4108                         bool    escaped;
4109                         XMLCh   firstCh;
4110                         XMLCh   secondCh
4111                             ;
4112                         // If it was not returned directly, then jump back up
4113                         if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned)
4114                         {
4115                             //  If it was escaped, then put in a 0xFFFF value. This will
4116                             //  be used later during validation and normalization of the
4117                             //  value to know that the following character was via an
4118                             //  escape char.
4119                             if (escaped)
4120                                 toFill.append(0xFFFF);
4121 
4122                             toFill.append(firstCh);
4123                             if (secondCh)
4124                                 toFill.append(secondCh);
4125                         }
4126                         continue;
4127                     }
4128                 } else // its a quoteCh
4129                 {
4130                     //  Check for our ending quote. It has to be in the same entity
4131                     //  as where we started. Quotes in nested entities are ignored.
4132 
4133                     if (curReader == fReaderMgr.getCurrentReaderNum())
4134                     {
4135                         return true;
4136                     }
4137 
4138                     // Watch for spillover into a previous entity
4139                     if (curReader > fReaderMgr.getCurrentReaderNum())
4140                     {
4141                         emitError(XMLErrs::PartialMarkupInEntity);
4142                         return false;
4143                     }
4144                 }
4145 
4146                 // add it to the buffer
4147                 toFill.append(nextCh);
4148 
4149             }
4150         }
4151         catch(const EndOfEntityException&)
4152         {
4153             // Just eat it and continue.
4154         }
4155     }
4156     return true;
4157 }
4158 
4159 
4160 //  This method scans a CDATA section. It collects the character into one
4161 //  of the temp buffers and calls the document handler, if any, with the
4162 //  characters. It assumes that the <![CDATA string has been scanned before
4163 //  this call.
scanCDSection()4164 void SGXMLScanner::scanCDSection()
4165 {
4166     static const XMLCh CDataClose[] =
4167     {
4168             chCloseSquare, chCloseAngle, chNull
4169     };
4170 
4171     //  The next character should be the opening square bracket. If not
4172     //  issue an error, but then try to recover by skipping any whitespace
4173     //  and checking again.
4174     if (!fReaderMgr.skippedChar(chOpenSquare))
4175     {
4176         emitError(XMLErrs::ExpectedOpenSquareBracket);
4177         fReaderMgr.skipPastSpaces();
4178 
4179         // If we still don't find it, then give up, else keep going
4180         if (!fReaderMgr.skippedChar(chOpenSquare))
4181             return;
4182     }
4183 
4184     // Get a buffer for this
4185     XMLBufBid bbCData(&fBufMgr);
4186 
4187     //  We just scan forward until we hit the end of CDATA section sequence.
4188     //  CDATA is effectively a big escape mechanism so we don't treat markup
4189     //  characters specially here.
4190     bool            emittedError = false;
4191     bool    gotLeadingSurrogate = false;
4192 
4193     // Get the character data opts for the current element
4194     XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
4195     // And see if the current element is a 'Children' style content model
4196     ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
4197     if(currType)
4198     {
4199         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
4200         if(modelType == SchemaElementDecl::Children ||
4201            modelType == SchemaElementDecl::ElementOnlyEmpty)
4202             charOpts = XMLElementDecl::SpacesOk;
4203         else if(modelType == SchemaElementDecl::Empty)
4204             charOpts = XMLElementDecl::NoCharData;
4205     }
4206 
4207     // should not be necessary when PSVI on element decl removed
4208     const ElemStack::StackElem* topElem = fElemStack.topElement();
4209 
4210     while (true)
4211     {
4212         const XMLCh nextCh = fReaderMgr.getNextChar();
4213 
4214         // Watch for unexpected end of file
4215         if (!nextCh)
4216         {
4217             emitError(XMLErrs::UnterminatedCDATASection);
4218             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
4219         }
4220 
4221         if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
4222         {
4223             // This document is standalone; this ignorable CDATA whitespace is forbidden.
4224             // XML 1.0, Section 2.9
4225             // And see if the current element is a 'Children' style content model
4226             if (topElem->fThisElement->isExternal()) {
4227 
4228                 if (charOpts == XMLElementDecl::SpacesOk) // Element Content
4229                 {
4230                     // Error - standalone should have a value of "no" as whitespace detected in an
4231                     // element type with element content whose element declaration was external
4232                     fValidator->emitError(XMLValid::NoWSForStandalone);
4233                     if (getPSVIHandler())
4234                     {
4235                         // REVISIT:
4236                         // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4237                     }
4238                 }
4239             }
4240         }
4241 
4242         //  If this is a close square bracket it could be our closing
4243         //  sequence.
4244         if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
4245         {
4246             //  make sure we were not expecting a trailing surrogate.
4247             if (gotLeadingSurrogate) {
4248                 emitError(XMLErrs::Expected2ndSurrogateChar);
4249             }
4250 
4251             XMLSize_t xsLen = bbCData.getLen();
4252             const XMLCh* xsNormalized = bbCData.getRawBuffer();
4253             if (fValidate) {
4254 
4255                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
4256                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
4257                 {
4258                     // normalize the character according to schema whitespace facet
4259                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
4260                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
4261                     xsLen = fWSNormalizeBuf.getLen();
4262                 }
4263 
4264                 // tell the schema validation about the character data for checkContent later
4265                 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
4266 
4267                 if (charOpts != XMLElementDecl::AllCharData)
4268                 {
4269                     // They definitely cannot handle any type of char data
4270                     fValidator->emitError(XMLValid::NoCharDataInCM);
4271                     if (getPSVIHandler())
4272                     {
4273                         // REVISIT:
4274                         // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4275                     }
4276                 }
4277             }
4278 
4279             // call all active identity constraints
4280             if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
4281                 fContent.append(xsNormalized, xsLen);
4282             }
4283 
4284             // If we have a doc handler, call it
4285             if (fDocHandler)
4286             {
4287                 if (fNormalizeData) {
4288                     fDocHandler->docCharacters(xsNormalized, xsLen, true);
4289                 }
4290                 else {
4291                     fDocHandler->docCharacters(
4292                         bbCData.getRawBuffer(), bbCData.getLen(), true
4293                     );
4294                 }
4295             }
4296 
4297             // And we are done
4298             break;
4299         }
4300 
4301         //  Make sure its a valid character. But if we've emitted an error
4302         //  already, don't bother with the overhead since we've already told
4303         //  them about it.
4304         if (!emittedError)
4305         {
4306             // Deal with surrogate pairs
4307             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
4308             {
4309                 //  Its a leading surrogate. If we already got one, then
4310                 //  issue an error, else set leading flag to make sure that
4311                 //  we look for a trailing next time.
4312                 if (gotLeadingSurrogate)
4313                     emitError(XMLErrs::Expected2ndSurrogateChar);
4314                 else
4315                     gotLeadingSurrogate = true;
4316             }
4317             else
4318             {
4319                 //  If its a trailing surrogate, make sure that we are
4320                 //  prepared for that. Else, its just a regular char so make
4321                 //  sure that we were not expected a trailing surrogate.
4322                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
4323                 {
4324                     // Its trailing, so make sure we were expecting it
4325                     if (!gotLeadingSurrogate)
4326                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
4327                 }
4328                 else
4329                 {
4330                     //  Its just a char, so make sure we were not expecting a
4331                     //  trailing surrogate.
4332                     if (gotLeadingSurrogate)
4333                         emitError(XMLErrs::Expected2ndSurrogateChar);
4334 
4335                     // Its got to at least be a valid XML character
4336                     else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4337                     {
4338                         XMLCh tmpBuf[9];
4339                         XMLString::binToText
4340                         (
4341                             nextCh
4342                             , tmpBuf
4343                             , 8
4344                             , 16
4345                             , fMemoryManager
4346                         );
4347                         emitError(XMLErrs::InvalidCharacter, tmpBuf);
4348                         emittedError = true;
4349                     }
4350                 }
4351                 gotLeadingSurrogate = false;
4352             }
4353         }
4354 
4355         // Add it to the buffer
4356         bbCData.append(nextCh);
4357     }
4358 }
4359 
4360 
scanCharData(XMLBuffer & toUse)4361 void SGXMLScanner::scanCharData(XMLBuffer& toUse)
4362 {
4363     //  We have to watch for the stupid ]]> sequence, which is illegal in
4364     //  character data. So this is a little state machine that handles that.
4365     enum States
4366     {
4367         State_Waiting
4368         , State_GotOne
4369         , State_GotTwo
4370     };
4371 
4372     // Reset the buffer before we start
4373     toUse.reset();
4374 
4375     // Turn on the 'throw at end' flag of the reader manager
4376     ThrowEOEJanitor jan(&fReaderMgr, true);
4377 
4378     //  In order to be more efficient we have to use kind of a deeply nested
4379     //  set of blocks here. The outer block puts on a try and catches end of
4380     //  entity exceptions. The inner loop is the per-character loop. If we
4381     //  put the try inside the inner loop, it would work but would require
4382     //  the exception handling code setup/teardown code to be invoked for
4383     //  each character.
4384     XMLCh   nextCh;
4385     XMLCh   secondCh = 0;
4386     States  curState = State_Waiting;
4387     bool    escaped = false;
4388     bool    gotLeadingSurrogate = false;
4389     bool    notDone = true;
4390     while (notDone)
4391     {
4392         try
4393         {
4394             while (true)
4395             {
4396                 //  Eat through as many plain content characters as possible without
4397                 //  needing special handling.  Moving most content characters here,
4398                 //  in this one call, rather than running the overall loop once
4399                 //  per content character, is a speed optimization.
4400                 if (curState == State_Waiting  &&  !gotLeadingSurrogate)
4401                 {
4402                      fReaderMgr.movePlainContentChars(toUse);
4403                 }
4404 
4405                 // Try to get another char from the source
4406                 //   The code from here on down covers all contengencies,
4407                 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
4408                 {
4409                     // If we were waiting for a trailing surrogate, its an error
4410                     if (gotLeadingSurrogate)
4411                         emitError(XMLErrs::Expected2ndSurrogateChar);
4412 
4413                     notDone = false;
4414                     break;
4415                 }
4416 
4417                 //  Watch for a reference. Note that the escapement mechanism
4418                 //  is ignored in this content.
4419                 escaped = false;
4420                 if (nextCh == chAmpersand)
4421                 {
4422                     sendCharData(toUse);
4423 
4424                     // Turn off the throwing at the end of entity during this
4425                     ThrowEOEJanitor jan(&fReaderMgr, false);
4426 
4427                     if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
4428                     {
4429                         gotLeadingSurrogate = false;
4430                         continue;
4431                     }
4432                 }
4433                 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
4434                 {
4435                     // Deal with surrogate pairs
4436                     //  Its a leading surrogate. If we already got one, then
4437                     //  issue an error, else set leading flag to make sure that
4438                     //  we look for a trailing next time.
4439                     if (gotLeadingSurrogate)
4440                         emitError(XMLErrs::Expected2ndSurrogateChar);
4441                     else
4442                         gotLeadingSurrogate = true;
4443                 }
4444                 else
4445                 {
4446                     //  If its a trailing surrogate, make sure that we are
4447                     //  prepared for that. Else, its just a regular char so make
4448                     //  sure that we were not expected a trailing surrogate.
4449                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
4450                     {
4451                         // Its trailing, so make sure we were expecting it
4452                         if (!gotLeadingSurrogate)
4453                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
4454                     }
4455                     else
4456                     {
4457                         //  Its just a char, so make sure we were not expecting a
4458                         //  trailing surrogate.
4459                         if (gotLeadingSurrogate)
4460                             emitError(XMLErrs::Expected2ndSurrogateChar);
4461 
4462                         // Make sure the returned char is a valid XML char
4463                         if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4464                         {
4465                             XMLCh tmpBuf[9];
4466                             XMLString::binToText
4467                             (
4468                                 nextCh
4469                                 , tmpBuf
4470                                 , 8
4471                                 , 16
4472                                 , fMemoryManager
4473                             );
4474                             emitError(XMLErrs::InvalidCharacter, tmpBuf);
4475                         }
4476                     }
4477                     gotLeadingSurrogate = false;
4478                 }
4479 
4480                 // Keep the state machine up to date
4481                 if (!escaped)
4482                 {
4483                     if (nextCh == chCloseSquare)
4484                     {
4485                         if (curState == State_Waiting)
4486                             curState = State_GotOne;
4487                         else if (curState == State_GotOne)
4488                             curState = State_GotTwo;
4489                     }
4490                     else if (nextCh == chCloseAngle)
4491                     {
4492                         if (curState == State_GotTwo)
4493                             emitError(XMLErrs::BadSequenceInCharData);
4494                         curState = State_Waiting;
4495                     }
4496                     else
4497                     {
4498                         curState = State_Waiting;
4499                     }
4500                 }
4501                 else
4502                 {
4503                     curState = State_Waiting;
4504                 }
4505 
4506                 // Add this char to the buffer
4507                 toUse.append(nextCh);
4508 
4509                 if (secondCh)
4510                 {
4511                     toUse.append(secondCh);
4512                     secondCh=0;
4513                 }
4514             }
4515         }
4516         catch(const EndOfEntityException& toCatch)
4517         {
4518             //  Some entity ended, so we have to send any accumulated
4519             //  chars and send an end of entity event.
4520             sendCharData(toUse);
4521             gotLeadingSurrogate = false;
4522 
4523             if (fDocHandler)
4524                 fDocHandler->endEntityReference(toCatch.getEntity());
4525         }
4526     }
4527 
4528     // Check the validity constraints as per XML 1.0 Section 2.9
4529     if (fValidate && fStandalone)
4530     {
4531         // See if the text contains whitespace
4532         // Get the raw data we need for the callback
4533         const XMLCh* rawBuf = toUse.getRawBuffer();
4534         const XMLSize_t len = toUse.getLen();
4535         const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
4536 
4537         if (isSpaces)
4538         {
4539             // And see if the current element is a 'Children' style content model
4540             const ElemStack::StackElem* topElem = fElemStack.topElement();
4541 
4542             if (topElem->fThisElement->isExternal()) {
4543 
4544                 // Get the character data opts for the current element
4545                 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
4546                 // And see if the current element is a 'Children' style content model
4547                 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
4548                 if(currType)
4549                 {
4550                     SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
4551                     if(modelType == SchemaElementDecl::Children ||
4552                        modelType == SchemaElementDecl::ElementOnlyEmpty)
4553                         charOpts = XMLElementDecl::SpacesOk;
4554                 }
4555 
4556                 if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
4557                 {
4558                     // Error - standalone should have a value of "no" as whitespace detected in an
4559                     // element type with element content whose element declaration was external
4560                     //
4561                     fValidator->emitError(XMLValid::NoWSForStandalone);
4562                     if (getPSVIHandler())
4563                     {
4564                         // REVISIT:
4565                         // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4566                     }
4567                 }
4568             }
4569         }
4570     }
4571     // Send any char data that we accumulated into the buffer
4572     sendCharData(toUse);
4573 }
4574 
4575 
4576 //  This method will scan a general/character entity ref. It will either
4577 //  expand a char ref and return it directly, or push a reader for a general
4578 //  entity.
4579 //
4580 //  The return value indicates whether the char parameters hold the value
4581 //  or whether the value was pushed as a reader, or that it failed.
4582 //
4583 //  The escaped flag tells the caller whether the returned parameter resulted
4584 //  from a character reference, which escapes the character in some cases. It
4585 //  only makes any difference if the return value indicates the value was
4586 //  returned directly.
4587 SGXMLScanner::EntityExpRes
scanEntityRef(const bool,XMLCh & firstCh,XMLCh & secondCh,bool & escaped)4588 SGXMLScanner::scanEntityRef(  const   bool
4589                             ,       XMLCh&  firstCh
4590                             ,       XMLCh&  secondCh
4591                             ,       bool&   escaped)
4592 {
4593     // Assume no escape
4594     secondCh = 0;
4595     escaped = false;
4596 
4597     // We have to insure that its all in one entity
4598     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
4599 
4600     //  If the next char is a pound, then its a character reference and we
4601     //  need to expand it always.
4602     if (fReaderMgr.skippedChar(chPound))
4603     {
4604         //  Its a character reference, so scan it and get back the numeric
4605         //  value it represents.
4606         if (!scanCharRef(firstCh, secondCh))
4607             return EntityExp_Failed;
4608 
4609         escaped = true;
4610 
4611         if (curReader != fReaderMgr.getCurrentReaderNum())
4612             emitError(XMLErrs::PartialMarkupInEntity);
4613 
4614         return EntityExp_Returned;
4615     }
4616 
4617     // Expand it since its a normal entity ref
4618     XMLBufBid bbName(&fBufMgr);
4619     int colonPosition;
4620     if (!fReaderMgr.getQName(bbName.getBuffer(), &colonPosition))
4621     {
4622         if (bbName.isEmpty())
4623             emitError(XMLErrs::ExpectedEntityRefName);
4624         else
4625             emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
4626         return EntityExp_Failed;
4627     }
4628 
4629     //  Next char must be a semi-colon. But if its not, just emit
4630     //  an error and try to continue.
4631     if (!fReaderMgr.skippedChar(chSemiColon))
4632         emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
4633 
4634     // Make sure we ended up on the same entity reader as the & char
4635     if (curReader != fReaderMgr.getCurrentReaderNum())
4636         emitError(XMLErrs::PartialMarkupInEntity);
4637 
4638     // Look up the name in the general entity pool
4639     // If it does not exist, then obviously an error
4640     if (!fEntityTable->containsKey(bbName.getRawBuffer()))
4641     {
4642         // XML 1.0 Section 4.1
4643         // Well-formedness Constraint for entity not found:
4644         //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
4645         //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
4646         //      or a parameter entity
4647         if (fStandalone || fHasNoDTD)
4648             emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
4649 
4650         return EntityExp_Failed;
4651     }
4652 
4653     // here's where we need to check if there's a SecurityManager,
4654     // how many entity references we've had
4655     if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
4656         XMLCh expLimStr[32];
4657         XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
4658         emitError
4659         (
4660             XMLErrs::EntityExpansionLimitExceeded
4661             , expLimStr
4662         );
4663         // there seems nothing better to be done than to reset the entity expansion limit
4664         fEntityExpansionCount = 0;
4665     }
4666 
4667     firstCh = fEntityTable->get(bbName.getRawBuffer());
4668     escaped = true;
4669     return EntityExp_Returned;
4670 }
4671 
4672 
switchGrammar(const XMLCh * const newGrammarNameSpace)4673 bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
4674 {
4675     Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
4676 
4677     if (!tempGrammar) {
4678         tempGrammar = fSchemaGrammar;
4679     }
4680 
4681     if (!tempGrammar)
4682         return false;
4683     else {
4684         fGrammar = tempGrammar;
4685         fGrammarType = fGrammar->getGrammarType();
4686         if (fGrammarType == Grammar::DTDGrammarType) {
4687             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
4688         }
4689 
4690         fValidator->setGrammar(fGrammar);
4691         return true;
4692     }
4693 }
4694 
4695 // check if we should skip or lax the validation of the element
4696 // if skip - no validation
4697 // if lax - validate only if the element if found
laxElementValidation(QName * element,ContentLeafNameTypeVector * cv,const XMLContentModel * const cm,const XMLSize_t parentElemDepth)4698 bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
4699                                         const XMLContentModel* const cm,
4700                                         const XMLSize_t parentElemDepth)
4701 {
4702     bool skipThisOne = false;
4703     bool laxThisOne = false;
4704     unsigned int elementURI = element->getURI();
4705     unsigned int currState = fElemState[parentElemDepth];
4706     unsigned int currLoop = fElemLoopState[parentElemDepth];
4707 
4708     if (currState == XMLContentModel::gInvalidTrans) {
4709         return laxThisOne;
4710     }
4711 
4712     SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
4713 
4714     if (cv) {
4715         XMLSize_t i = 0;
4716         XMLSize_t leafCount = cv->getLeafCount();
4717         unsigned int nextState = 0;
4718 
4719         for (; i < leafCount; i++) {
4720 
4721             QName* fElemMap = cv->getLeafNameAt(i);
4722             unsigned int uri = fElemMap->getURI();
4723             ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
4724 
4725             if (type == ContentSpecNode::Leaf) {
4726                 if (((uri == elementURI)
4727                       && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
4728                     || comparator.isEquivalentTo(element, fElemMap)) {
4729 
4730                     nextState = cm->getNextState(currState, i);
4731 
4732                     if (nextState != XMLContentModel::gInvalidTrans)
4733                         break;
4734                 }
4735             } else if ((type & 0x0f) == ContentSpecNode::Any) {
4736                 nextState = cm->getNextState(currState, i);
4737                 if (nextState != XMLContentModel::gInvalidTrans)
4738                     break;
4739             }
4740             else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
4741                 if (uri != elementURI && elementURI != fEmptyNamespaceId) {
4742                     nextState = cm->getNextState(currState, i);
4743                     if (nextState != XMLContentModel::gInvalidTrans)
4744                         break;
4745                 }
4746             }
4747             else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
4748                 if (uri == elementURI) {
4749                     nextState = cm->getNextState(currState, i);
4750                     if (nextState != XMLContentModel::gInvalidTrans)
4751                         break;
4752                 }
4753             }
4754 
4755         } // for
4756 
4757         if (i == leafCount) { // no match
4758             fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
4759             fElemLoopState[parentElemDepth] = 0;
4760             return laxThisOne;
4761         }
4762 
4763         ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
4764         if ((type & 0x0f) == ContentSpecNode::Any ||
4765             (type & 0x0f) == ContentSpecNode::Any_Other ||
4766             (type & 0x0f) == ContentSpecNode::Any_NS)
4767         {
4768             if (type == ContentSpecNode::Any_Skip ||
4769                 type == ContentSpecNode::Any_NS_Skip ||
4770                 type == ContentSpecNode::Any_Other_Skip) {
4771                 skipThisOne = true;
4772             }
4773             else if (type == ContentSpecNode::Any_Lax ||
4774                      type == ContentSpecNode::Any_NS_Lax ||
4775                      type == ContentSpecNode::Any_Other_Lax) {
4776                 laxThisOne = true;
4777             }
4778         }
4779         fElemState[parentElemDepth] = nextState;
4780         fElemLoopState[parentElemDepth] = currLoop;
4781     } // if
4782 
4783     if (skipThisOne) {
4784         fValidate = false;
4785         fElemStack.setValidationFlag(fValidate);
4786     }
4787 
4788     return laxThisOne;
4789 }
4790 
4791 
4792 // check if there is an AnyAttribute, and if so, see if we should lax or skip
4793 // if skip - no validation
4794 // if lax - validate only if the attribute if found
anyAttributeValidation(SchemaAttDef * attWildCard,unsigned int uriId,bool & skipThisOne,bool & laxThisOne)4795 bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
4796 {
4797     XMLAttDef::AttTypes wildCardType = attWildCard->getType();
4798     bool anyEncountered = false;
4799     skipThisOne = false;
4800     laxThisOne = false;
4801     if (wildCardType == XMLAttDef::Any_Any)
4802         anyEncountered = true;
4803     else if (wildCardType == XMLAttDef::Any_Other) {
4804         if (attWildCard->getAttName()->getURI() != uriId
4805             && uriId != fEmptyNamespaceId)
4806             anyEncountered = true;
4807     }
4808     else if (wildCardType == XMLAttDef::Any_List) {
4809         ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
4810         XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0;
4811 
4812         if (listSize) {
4813             for (XMLSize_t i=0; i < listSize; i++) {
4814                 if (nameURIList->elementAt(i) == uriId)
4815                     anyEncountered = true;
4816             }
4817         }
4818     }
4819 
4820     if (anyEncountered) {
4821         XMLAttDef::DefAttTypes   defType   = attWildCard->getDefaultType();
4822         if (defType == XMLAttDef::ProcessContents_Skip) {
4823             // attribute should just be bypassed,
4824             skipThisOne = true;
4825             if (getPSVIHandler())
4826             {
4827                 // REVISIT:
4828                 // PSVIAttribute->setValidationAttempted(PSVIItem::VALIDATION_NONE);
4829             }
4830         }
4831         else if (defType == XMLAttDef::ProcessContents_Lax) {
4832             laxThisOne = true;
4833         }
4834     }
4835 
4836     return anyEncountered;
4837 }
4838 
getAttDefList(ComplexTypeInfo * currType,XMLElementDecl * elemDecl)4839 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl)
4840 {
4841     if (currType)
4842         return currType->getAttDefList();
4843     else
4844         return elemDecl->getAttDefList();
4845 }
4846 
endElementPSVI(SchemaElementDecl * const elemDecl,DatatypeValidator * const memberDV)4847 void SGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
4848                                   DatatypeValidator* const memberDV)
4849 {
4850     PSVIElement::ASSESSMENT_TYPE validationAttempted;
4851     PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
4852 
4853     if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
4854         validationAttempted = PSVIElement::VALIDATION_FULL;
4855     else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
4856         validationAttempted = PSVIElement::VALIDATION_NONE;
4857     else
4858     {
4859         validationAttempted  = PSVIElement::VALIDATION_PARTIAL;
4860 		fPSVIElemContext.fFullValidationDepth =
4861             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
4862     }
4863 
4864     if (fValidate && elemDecl->isDeclared())
4865     {
4866         validity = (fPSVIElemContext.fErrorOccurred)
4867             ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
4868     }
4869 
4870     XSTypeDefinition* typeDef = 0;
4871     bool isMixed = false;
4872     if (fPSVIElemContext.fCurrentTypeInfo)
4873     {
4874         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
4875         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
4876         isMixed = (modelType == SchemaElementDecl::Mixed_Simple
4877                 || modelType == SchemaElementDecl::Mixed_Complex);
4878     }
4879     else if (fPSVIElemContext.fCurrentDV)
4880         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
4881 
4882     XMLCh* canonicalValue = 0;
4883     if (fPSVIElemContext.fNormalizedValue && !isMixed &&
4884             validity == PSVIElement::VALIDITY_VALID)
4885     {
4886         if (memberDV)
4887             canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
4888         else if (fPSVIElemContext.fCurrentDV)
4889             canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
4890     }
4891 
4892     fPSVIElement->reset
4893     (
4894         validity
4895         , validationAttempted
4896         , fRootElemName
4897         , fPSVIElemContext.fIsSpecified
4898         , (elemDecl->isDeclared())
4899             ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
4900         , typeDef
4901         , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
4902         , fModel
4903         , elemDecl->getDefaultValue()
4904         , fPSVIElemContext.fNormalizedValue
4905         , canonicalValue
4906     );
4907 
4908     fPSVIHandler->handleElementPSVI
4909     (
4910         elemDecl->getBaseName()
4911         , fURIStringPool->getValueForId(elemDecl->getURI())
4912         , fPSVIElement
4913     );
4914 
4915     // decrease element depth
4916     fPSVIElemContext.fElemDepth--;
4917 
4918 }
4919 
resetPSVIElemContext()4920 void SGXMLScanner::resetPSVIElemContext()
4921 {
4922     fPSVIElemContext.fIsSpecified = false;
4923     fPSVIElemContext.fErrorOccurred = false;
4924     fPSVIElemContext.fElemDepth = -1;
4925     fPSVIElemContext.fFullValidationDepth = -1;
4926     fPSVIElemContext.fNoneValidationDepth = -1;
4927     fPSVIElemContext.fCurrentDV = 0;
4928     fPSVIElemContext.fCurrentTypeInfo = 0;
4929     fPSVIElemContext.fNormalizedValue = 0;
4930 }
4931 
4932 XERCES_CPP_NAMESPACE_END
4933