1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id: SGXMLScanner.cpp 925236 2010-03-19 14:29:47Z borisk $
20 */
21
22
23 // ---------------------------------------------------------------------------
24 // Includes
25 // ---------------------------------------------------------------------------
26 #include <xercesc/internal/SGXMLScanner.hpp>
27 #include <xercesc/util/RuntimeException.hpp>
28 #include <xercesc/util/UnexpectedEOFException.hpp>
29 #include <xercesc/util/XMLUri.hpp>
30 #include <xercesc/framework/LocalFileInputSource.hpp>
31 #include <xercesc/framework/URLInputSource.hpp>
32 #include <xercesc/framework/XMLDocumentHandler.hpp>
33 #include <xercesc/framework/XMLEntityHandler.hpp>
34 #include <xercesc/framework/XMLPScanToken.hpp>
35 #include <xercesc/framework/MemoryManager.hpp>
36 #include <xercesc/framework/XMLGrammarPool.hpp>
37 #include <xercesc/framework/psvi/PSVIElement.hpp>
38 #include <xercesc/framework/psvi/PSVIHandler.hpp>
39 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
40 #include <xercesc/framework/psvi/XSAnnotation.hpp>
41 #include <xercesc/internal/EndOfEntityException.hpp>
42 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
43 #include <xercesc/validators/schema/SchemaValidator.hpp>
44 #include <xercesc/validators/schema/TraverseSchema.hpp>
45 #include <xercesc/validators/schema/XSDDOMParser.hpp>
46 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
47 #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
48 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
49 #include <xercesc/validators/schema/identity/IC_Selector.hpp>
50 #include <xercesc/validators/schema/identity/ValueStore.hpp>
51 #include <xercesc/util/OutOfMemoryException.hpp>
52 #include <xercesc/util/XMLStringTokenizer.hpp>
53
54 XERCES_CPP_NAMESPACE_BEGIN
55
56 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl);
57
58
59 typedef JanitorMemFunCall<SGXMLScanner> CleanupType;
60 typedef JanitorMemFunCall<ReaderMgr> ReaderMgrResetType;
61
62
63 // ---------------------------------------------------------------------------
64 // SGXMLScanner: Constructors and Destructor
65 // ---------------------------------------------------------------------------
SGXMLScanner(XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)66 SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt
67 , GrammarResolver* const grammarResolver
68 , MemoryManager* const manager) :
69
70 XMLScanner(valToAdopt, grammarResolver, manager)
71 , fSeeXsi(false)
72 , fGrammarType(Grammar::UnKnown)
73 , fElemStateSize(16)
74 , fElemState(0)
75 , fElemLoopState(0)
76 , fContent(1023, manager)
77 , fEntityTable(0)
78 , fRawAttrList(0)
79 , fRawAttrColonListSize(32)
80 , fRawAttrColonList(0)
81 , fSchemaGrammar(0)
82 , fSchemaValidator(0)
83 , fICHandler(0)
84 , fElemNonDeclPool(0)
85 , fElemCount(0)
86 , fAttDefRegistry(0)
87 , fUndeclaredAttrRegistry(0)
88 , fPSVIAttrList(0)
89 , fModel(0)
90 , fPSVIElement(0)
91 , fErrorStack(0)
92 , fSchemaInfoList(0)
93 , fCachedSchemaInfoList(0)
94 {
95 CleanupType cleanup(this, &SGXMLScanner::cleanUp);
96
97 try
98 {
99 commonInit();
100 }
101 catch(const OutOfMemoryException&)
102 {
103 // Don't cleanup when out of memory, since executing the
104 // code can cause problems.
105 cleanup.release();
106
107 throw;
108 }
109
110 cleanup.release();
111 }
112
SGXMLScanner(XMLDocumentHandler * const docHandler,DocTypeHandler * const docTypeHandler,XMLEntityHandler * const entityHandler,XMLErrorReporter * const errHandler,XMLValidator * const valToAdopt,GrammarResolver * const grammarResolver,MemoryManager * const manager)113 SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler
114 , DocTypeHandler* const docTypeHandler
115 , XMLEntityHandler* const entityHandler
116 , XMLErrorReporter* const errHandler
117 , XMLValidator* const valToAdopt
118 , GrammarResolver* const grammarResolver
119 , MemoryManager* const manager) :
120
121 XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
122 , fSeeXsi(false)
123 , fGrammarType(Grammar::UnKnown)
124 , fElemStateSize(16)
125 , fElemState(0)
126 , fElemLoopState(0)
127 , fContent(1023, manager)
128 , fEntityTable(0)
129 , fRawAttrList(0)
130 , fRawAttrColonListSize(32)
131 , fRawAttrColonList(0)
132 , fSchemaGrammar(0)
133 , fSchemaValidator(0)
134 , fICHandler(0)
135 , fElemNonDeclPool(0)
136 , fElemCount(0)
137 , fAttDefRegistry(0)
138 , fUndeclaredAttrRegistry(0)
139 , fPSVIAttrList(0)
140 , fModel(0)
141 , fPSVIElement(0)
142 , fErrorStack(0)
143 , fSchemaInfoList(0)
144 , fCachedSchemaInfoList(0)
145 {
146 CleanupType cleanup(this, &SGXMLScanner::cleanUp);
147
148 try
149 {
150 commonInit();
151 }
152 catch(const OutOfMemoryException&)
153 {
154 // Don't cleanup when out of memory, since executing the
155 // code can cause problems.
156 cleanup.release();
157
158 throw;
159 }
160
161 cleanup.release();
162 }
163
~SGXMLScanner()164 SGXMLScanner::~SGXMLScanner()
165 {
166 cleanUp();
167 }
168
169 // ---------------------------------------------------------------------------
170 // XMLScanner: Getter methods
171 // ---------------------------------------------------------------------------
getEntityDeclPool()172 NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool()
173 {
174 return 0;
175 }
176
getEntityDeclPool() const177 const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const
178 {
179 return 0;
180 }
181
182 // ---------------------------------------------------------------------------
183 // SGXMLScanner: Main entry point to scan a document
184 // ---------------------------------------------------------------------------
scanDocument(const InputSource & src)185 void SGXMLScanner::scanDocument(const InputSource& src)
186 {
187 // Bump up the sequence id for this parser instance. This will invalidate
188 // any previous progressive scan tokens.
189 fSequenceId++;
190
191 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
192
193 try
194 {
195 // Reset the scanner and its plugged in stuff for a new run. This
196 // resets all the data structures, creates the initial reader and
197 // pushes it on the stack, and sets up the base document path.
198 scanReset(src);
199
200 // If we have a document handler, then call the start document
201 if (fDocHandler)
202 fDocHandler->startDocument();
203
204 // Scan the prolog part, which is everything before the root element
205 // including the DTD subsets.
206 scanProlog();
207
208 // If we got to the end of input, then its not a valid XML file.
209 // Else, go on to scan the content.
210 if (fReaderMgr.atEOF())
211 {
212 emitError(XMLErrs::EmptyMainEntity);
213 }
214 else
215 {
216 // Scan content, and tell it its not an external entity
217 if (scanContent())
218 {
219 // Do post-parse validation if required
220 if (fValidate)
221 {
222 // We handle ID reference semantics at this level since
223 // its required by XML 1.0.
224 checkIDRefs();
225
226 // Then allow the validator to do any extra stuff it wants
227 // fValidator->postParseValidation();
228 }
229
230 // That went ok, so scan for any miscellaneous stuff
231 if (!fReaderMgr.atEOF())
232 scanMiscellaneous();
233 }
234 }
235
236 // If we have a document handler, then call the end document
237 if (fDocHandler)
238 fDocHandler->endDocument();
239 }
240 // NOTE:
241 //
242 // In all of the error processing below, the emitError() call MUST come
243 // before the flush of the reader mgr, or it will fail because it tries
244 // to find out the position in the XML source of the error.
245 catch(const XMLErrs::Codes)
246 {
247 // This is a 'first failure' exception, so fall through
248 }
249 catch(const XMLValid::Codes)
250 {
251 // This is a 'first fatal error' type exit, so fall through
252 }
253 catch(const XMLException& excToCatch)
254 {
255 // Emit the error and catch any user exception thrown from here. Make
256 // sure in all cases we flush the reader manager.
257 fInException = true;
258 try
259 {
260 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
261 emitError
262 (
263 XMLErrs::XMLException_Warning
264 , excToCatch.getCode()
265 , excToCatch.getMessage()
266 );
267 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
268 emitError
269 (
270 XMLErrs::XMLException_Fatal
271 , excToCatch.getCode()
272 , excToCatch.getMessage()
273 );
274 else
275 emitError
276 (
277 XMLErrs::XMLException_Error
278 , excToCatch.getCode()
279 , excToCatch.getMessage()
280 );
281 }
282 catch(const OutOfMemoryException&)
283 {
284 // This is a special case for out-of-memory
285 // conditions, because resetting the ReaderMgr
286 // can be problematic.
287 resetReaderMgr.release();
288
289 throw;
290 }
291 }
292 catch(const OutOfMemoryException&)
293 {
294 // This is a special case for out-of-memory
295 // conditions, because resetting the ReaderMgr
296 // can be problematic.
297 resetReaderMgr.release();
298
299 throw;
300 }
301 }
302
303
scanNext(XMLPScanToken & token)304 bool SGXMLScanner::scanNext(XMLPScanToken& token)
305 {
306 // Make sure this token is still legal
307 if (!isLegalToken(token))
308 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
309
310 // Find the next token and remember the reader id
311 XMLSize_t orgReader;
312 XMLTokens curToken;
313
314 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
315
316 bool retVal = true;
317
318 try
319 {
320 while (true)
321 {
322 // We have to handle any end of entity exceptions that happen here.
323 // We could be at the end of X nested entities, each of which will
324 // generate an end of entity exception as we try to move forward.
325 try
326 {
327 curToken = senseNextToken(orgReader);
328 break;
329 }
330 catch(const EndOfEntityException& toCatch)
331 {
332 // Send an end of entity reference event
333 if (fDocHandler)
334 fDocHandler->endEntityReference(toCatch.getEntity());
335 }
336 }
337
338 if (curToken == Token_CharData)
339 {
340 scanCharData(fCDataBuf);
341 }
342 else if (curToken == Token_EOF)
343 {
344 if (!fElemStack.isEmpty())
345 {
346 const ElemStack::StackElem* topElem = fElemStack.popTop();
347 emitError
348 (
349 XMLErrs::EndedWithTagsOnStack
350 , topElem->fThisElement->getFullName()
351 );
352 }
353
354 retVal = false;
355 }
356 else
357 {
358 // Its some sort of markup
359 bool gotData = true;
360 switch(curToken)
361 {
362 case Token_CData :
363 // Make sure we are within content
364 if (fElemStack.isEmpty())
365 emitError(XMLErrs::CDATAOutsideOfContent);
366 scanCDSection();
367 break;
368
369 case Token_Comment :
370 scanComment();
371 break;
372
373 case Token_EndTag :
374 scanEndTag(gotData);
375 break;
376
377 case Token_PI :
378 scanPI();
379 break;
380
381 case Token_StartTag :
382 scanStartTag(gotData);
383 break;
384
385 default :
386 fReaderMgr.skipToChar(chOpenAngle);
387 break;
388 }
389
390 if (orgReader != fReaderMgr.getCurrentReaderNum())
391 emitError(XMLErrs::PartialMarkupInEntity);
392
393 // If we hit the end, then do the miscellaneous part
394 if (!gotData)
395 {
396 // Do post-parse validation if required
397 if (fValidate)
398 {
399 // We handle ID reference semantics at this level since
400 // its required by XML 1.0.
401 checkIDRefs();
402
403 // Then allow the validator to do any extra stuff it wants
404 // fValidator->postParseValidation();
405 }
406
407 // That went ok, so scan for any miscellaneous stuff
408 scanMiscellaneous();
409
410 if (toCheckIdentityConstraint())
411 fICHandler->endDocument();
412
413 if (fDocHandler)
414 fDocHandler->endDocument();
415 }
416 }
417 }
418 // NOTE:
419 //
420 // In all of the error processing below, the emitError() call MUST come
421 // before the flush of the reader mgr, or it will fail because it tries
422 // to find out the position in the XML source of the error.
423 catch(const XMLErrs::Codes)
424 {
425 // This is a 'first failure' exception, so return failure
426 retVal = false;
427 }
428 catch(const XMLValid::Codes)
429 {
430 // This is a 'first fatal error' type exit, so return failure
431 retVal = false;
432 }
433 catch(const XMLException& excToCatch)
434 {
435 // Emit the error and catch any user exception thrown from here. Make
436 // sure in all cases we flush the reader manager.
437 fInException = true;
438 try
439 {
440 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
441 emitError
442 (
443 XMLErrs::XMLException_Warning
444 , excToCatch.getCode()
445 , excToCatch.getMessage()
446 );
447 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
448 emitError
449 (
450 XMLErrs::XMLException_Fatal
451 , excToCatch.getCode()
452 , excToCatch.getMessage()
453 );
454 else
455 emitError
456 (
457 XMLErrs::XMLException_Error
458 , excToCatch.getCode()
459 , excToCatch.getMessage()
460 );
461 }
462 catch(const OutOfMemoryException&)
463 {
464 // This is a special case for out-of-memory
465 // conditions, because resetting the ReaderMgr
466 // can be problematic.
467 resetReaderMgr.release();
468
469 throw;
470 }
471
472 retVal = false;
473 }
474 catch(const OutOfMemoryException&)
475 {
476 // This is a special case for out-of-memory
477 // conditions, because resetting the ReaderMgr
478 // can be problematic.
479 resetReaderMgr.release();
480
481 throw;
482 }
483
484 // If we are not at the end, release the object that will
485 // reset the ReaderMgr.
486 if (retVal)
487 resetReaderMgr.release();
488
489 return retVal;
490 }
491
492 // ---------------------------------------------------------------------------
493 // SGXMLScanner: Private scanning methods
494 // ---------------------------------------------------------------------------
495
496 // This method is called from scanStartTag() to handle the very raw initial
497 // scan of the attributes. It just fills in the passed collection with
498 // key/value pairs for each attribute. No processing is done on them at all.
499 XMLSize_t
rawAttrScan(const XMLCh * const elemName,RefVectorOf<KVStringPair> & toFill,bool & isEmpty)500 SGXMLScanner::rawAttrScan(const XMLCh* const elemName
501 , RefVectorOf<KVStringPair>& toFill
502 , bool& isEmpty)
503 {
504 // Keep up with how many attributes we've seen so far, and how many
505 // elements are available in the vector. This way we can reuse old
506 // elements until we run out and then expand it.
507 XMLSize_t attCount = 0;
508 XMLSize_t curVecSize = toFill.size();
509
510 // Assume it is not empty
511 isEmpty = false;
512
513 // We loop until we either see a /> or >, handling key/value pairs util
514 // we get there. We place them in the passed vector, which we will expand
515 // as required to hold them.
516 while (true)
517 {
518 // Get the next character, which should be non-space
519 XMLCh nextCh = fReaderMgr.peekNextChar();
520
521 // If the next character is not a slash or closed angle bracket,
522 // then it must be whitespace, since whitespace is required
523 // between the end of the last attribute and the name of the next
524 // one.
525 //
526 if (attCount)
527 {
528 if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
529 {
530 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
531 {
532 // Ok, skip by them and get another char
533 fReaderMgr.getNextChar();
534 fReaderMgr.skipPastSpaces();
535 nextCh = fReaderMgr.peekNextChar();
536 }
537 else
538 {
539 // Emit the error but keep on going
540 emitError(XMLErrs::ExpectedWhitespace);
541 }
542 }
543 }
544
545 // Ok, here we first check for any of the special case characters.
546 // If its not one, then we do the normal case processing, which
547 // assumes that we've hit an attribute value, Otherwise, we do all
548 // the special case checks.
549 if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
550 {
551 // Assume its going to be an attribute, so get a name from
552 // the input.
553 int colonPosition;
554 if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
555 {
556 if (fAttNameBuf.isEmpty())
557 emitError(XMLErrs::ExpectedAttrName);
558 else
559 emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
560 fReaderMgr.skipPastChar(chCloseAngle);
561 return attCount;
562 }
563
564 const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
565
566 // And next must be an equal sign
567 if (!scanEq())
568 {
569 static const XMLCh tmpList[] =
570 {
571 chSingleQuote, chDoubleQuote, chCloseAngle
572 , chOpenAngle, chForwardSlash, chNull
573 };
574
575 emitError(XMLErrs::ExpectedEqSign);
576
577 // Try to sync back up by skipping forward until we either
578 // hit something meaningful.
579 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
580
581 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
582 {
583 // Jump back to top for normal processing of these
584 continue;
585 }
586 else if ((chFound == chSingleQuote)
587 || (chFound == chDoubleQuote)
588 || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
589 {
590 // Just fall through assuming that the value is to follow
591 }
592 else if (chFound == chOpenAngle)
593 {
594 // Assume a malformed tag and that new one is starting
595 emitError(XMLErrs::UnterminatedStartTag, elemName);
596 return attCount;
597 }
598 else
599 {
600 // Something went really wrong
601 return attCount;
602 }
603 }
604
605 // Next should be the quoted attribute value. We just do a simple
606 // and stupid scan of this value. The only thing we do here
607 // is to expand entity references.
608 if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
609 {
610 static const XMLCh tmpList[] =
611 {
612 chCloseAngle, chOpenAngle, chForwardSlash, chNull
613 };
614
615 emitError(XMLErrs::ExpectedAttrValue);
616
617 // It failed, so lets try to get synced back up. We skip
618 // forward until we find some whitespace or one of the
619 // chars in our list.
620 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
621
622 if ((chFound == chCloseAngle)
623 || (chFound == chForwardSlash)
624 || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
625 {
626 // Just fall through and process this attribute, though
627 // the value will be "".
628 }
629 else if (chFound == chOpenAngle)
630 {
631 // Assume a malformed tag and that new one is starting
632 emitError(XMLErrs::UnterminatedStartTag, elemName);
633 return attCount;
634 }
635 else
636 {
637 // Something went really wrong
638 return attCount;
639 }
640 }
641
642 // And now lets add it to the passed collection. If we have not
643 // filled it up yet, then we use the next element. Else we add
644 // a new one.
645 KVStringPair* curPair = 0;
646 if (attCount >= curVecSize)
647 {
648 curPair = new (fMemoryManager) KVStringPair
649 (
650 curAttNameBuf
651 , fAttNameBuf.getLen()
652 , fAttValueBuf.getRawBuffer()
653 , fAttValueBuf.getLen()
654 , fMemoryManager
655 );
656 toFill.addElement(curPair);
657 }
658 else
659 {
660 curPair = toFill.elementAt(attCount);
661 curPair->set
662 (
663 curAttNameBuf
664 , fAttNameBuf.getLen()
665 , fAttValueBuf.getRawBuffer()
666 , fAttValueBuf.getLen()
667 );
668 }
669 if (attCount >= fRawAttrColonListSize) {
670 resizeRawAttrColonList();
671 }
672 fRawAttrColonList[attCount] = colonPosition;
673
674 // And bump the count of attributes we've gotten
675 attCount++;
676
677 // And go to the top again for another attribute
678 continue;
679 }
680
681 // It was some special case character so do all of the checks and
682 // deal with it.
683 if (!nextCh)
684 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
685
686 if (nextCh == chForwardSlash)
687 {
688 fReaderMgr.getNextChar();
689 isEmpty = true;
690 if (!fReaderMgr.skippedChar(chCloseAngle))
691 emitError(XMLErrs::UnterminatedStartTag, elemName);
692 break;
693 }
694 else if (nextCh == chCloseAngle)
695 {
696 fReaderMgr.getNextChar();
697 break;
698 }
699 else if (nextCh == chOpenAngle)
700 {
701 // Check for this one specially, since its going to be common
702 // and it is kind of auto-recovering since we've already hit the
703 // next open bracket, which is what we would have seeked to (and
704 // skipped this whole tag.)
705 emitError(XMLErrs::UnterminatedStartTag, elemName);
706 break;
707 }
708 else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
709 {
710 // Check for this one specially, which is probably a missing
711 // attribute name, e.g. ="value". Just issue expected name
712 // error and eat the quoted string, then jump back to the
713 // top again.
714 emitError(XMLErrs::ExpectedAttrName);
715 fReaderMgr.getNextChar();
716 fReaderMgr.skipQuotedString(nextCh);
717 fReaderMgr.skipPastSpaces();
718 continue;
719 }
720 }
721
722 return attCount;
723 }
724
725
726 // This method will kick off the scanning of the primary content of the
727 // document, i.e. the elements.
scanContent()728 bool SGXMLScanner::scanContent()
729 {
730 // Go into a loop until we hit the end of the root element, or we fall
731 // out because there is no root element.
732 //
733 // We have to do kind of a deeply nested double loop here in order to
734 // avoid doing the setup/teardown of the exception handler on each
735 // round. Doing it this way we only do it when an exception actually
736 // occurs.
737 bool gotData = true;
738 bool inMarkup = false;
739 while (gotData)
740 {
741 try
742 {
743 while (gotData)
744 {
745 // Sense what the next top level token is. According to what
746 // this tells us, we will call something to handle that kind
747 // of thing.
748 XMLSize_t orgReader;
749 const XMLTokens curToken = senseNextToken(orgReader);
750
751 // Handle character data and end of file specially. Char data
752 // is not markup so we don't want to handle it in the loop
753 // below.
754 if (curToken == Token_CharData)
755 {
756 // Scan the character data and call appropriate events. Let
757 // him use our local character data buffer for efficiency.
758 scanCharData(fCDataBuf);
759 continue;
760 }
761 else if (curToken == Token_EOF)
762 {
763 // The element stack better be empty at this point or we
764 // ended prematurely before all elements were closed.
765 if (!fElemStack.isEmpty())
766 {
767 const ElemStack::StackElem* topElem = fElemStack.popTop();
768 emitError
769 (
770 XMLErrs::EndedWithTagsOnStack
771 , topElem->fThisElement->getFullName()
772 );
773 }
774
775 // Its the end of file, so clear the got data flag
776 gotData = false;
777 continue;
778 }
779
780 // We are in some sort of markup now
781 inMarkup = true;
782
783 // According to the token we got, call the appropriate
784 // scanning method.
785 switch(curToken)
786 {
787 case Token_CData :
788 // Make sure we are within content
789 if (fElemStack.isEmpty())
790 emitError(XMLErrs::CDATAOutsideOfContent);
791 scanCDSection();
792 break;
793
794 case Token_Comment :
795 scanComment();
796 break;
797
798 case Token_EndTag :
799 scanEndTag(gotData);
800 break;
801
802 case Token_PI :
803 scanPI();
804 break;
805
806 case Token_StartTag :
807 scanStartTag(gotData);
808 break;
809
810 default :
811 fReaderMgr.skipToChar(chOpenAngle);
812 break;
813 }
814
815 if (orgReader != fReaderMgr.getCurrentReaderNum())
816 emitError(XMLErrs::PartialMarkupInEntity);
817
818 // And we are back out of markup again
819 inMarkup = false;
820 }
821 }
822 catch(const EndOfEntityException& toCatch)
823 {
824 // If we were in some markup when this happened, then its a
825 // partial markup error.
826 if (inMarkup)
827 emitError(XMLErrs::PartialMarkupInEntity);
828
829 // Send an end of entity reference event
830 if (fDocHandler)
831 fDocHandler->endEntityReference(toCatch.getEntity());
832
833 inMarkup = false;
834 }
835 }
836
837 // It went ok, so return success
838 return true;
839 }
840
841
scanEndTag(bool & gotData)842 void SGXMLScanner::scanEndTag(bool& gotData)
843 {
844 // Assume we will still have data until proven otherwise. It will only
845 // ever be false if this is the end of the root element.
846 gotData = true;
847
848 // Check if the element stack is empty. If so, then this is an unbalanced
849 // element (i.e. more ends than starts, perhaps because of bad text
850 // causing one to be skipped.)
851 if (fElemStack.isEmpty())
852 {
853 emitError(XMLErrs::MoreEndThanStartTags);
854 fReaderMgr.skipPastChar(chCloseAngle);
855 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
856 }
857
858 // Pop the stack of the element we are supposed to be ending. Remember
859 // that we don't own this. The stack just keeps them and reuses them.
860 unsigned int uriId = (fDoNamespaces)
861 ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
862
863 // Make sure that its the end of the element that we expect
864 const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
865 const ElemStack::StackElem* topElem = fElemStack.topElement();
866 if (!fReaderMgr.skippedStringLong(elemName))
867 {
868 emitError
869 (
870 XMLErrs::ExpectedEndOfTagX
871 , elemName
872 );
873 fReaderMgr.skipPastChar(chCloseAngle);
874 fElemStack.popTop();
875 return;
876 }
877
878 fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
879
880 // Make sure we are back on the same reader as where we started
881 if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
882 emitError(XMLErrs::PartialTagMarkupError);
883
884 // Skip optional whitespace
885 fReaderMgr.skipPastSpaces();
886
887 // Make sure we find the closing bracket
888 if (!fReaderMgr.skippedChar(chCloseAngle))
889 {
890 emitError
891 (
892 XMLErrs::UnterminatedEndTag
893 , topElem->fThisElement->getFullName()
894 );
895 }
896
897 if (fValidate && topElem->fThisElement->isDeclared())
898 {
899 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
900 if(!fPSVIElemContext.fCurrentTypeInfo)
901 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
902 else
903 fPSVIElemContext.fCurrentDV = 0;
904 if (fPSVIHandler)
905 {
906 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
907
908 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
909 fPSVIElemContext.fNormalizedValue = 0;
910
911 }
912 }
913 else
914 {
915 fPSVIElemContext.fCurrentDV = 0;
916 fPSVIElemContext.fCurrentTypeInfo = 0;
917 fPSVIElemContext.fNormalizedValue = 0;
918 }
919
920 // If validation is enabled, then lets pass him the list of children and
921 // this element and let him validate it.
922 DatatypeValidator* psviMemberType = 0;
923 if (fValidate)
924 {
925 XMLSize_t failure;
926 bool res = fValidator->checkContent
927 (
928 topElem->fThisElement
929 , topElem->fChildren
930 , topElem->fChildCount
931 , &failure
932 );
933
934 if (!res)
935 {
936 // One of the elements is not valid for the content. NOTE that
937 // if no children were provided but the content model requires
938 // them, it comes back with a zero value. But we cannot use that
939 // to index the child array in this case, and have to put out a
940 // special message.
941 if (!topElem->fChildCount)
942 {
943 fValidator->emitError
944 (
945 XMLValid::EmptyNotValidForContent
946 , topElem->fThisElement->getFormattedContentModel()
947 );
948 }
949 else if (failure >= topElem->fChildCount)
950 {
951 fValidator->emitError
952 (
953 XMLValid::NotEnoughElemsForCM
954 , topElem->fThisElement->getFormattedContentModel()
955 );
956 }
957 else
958 {
959 fValidator->emitError
960 (
961 XMLValid::ElementNotValidForContent
962 , topElem->fChildren[failure]->getRawName()
963 , topElem->fThisElement->getFormattedContentModel()
964 );
965 }
966
967 }
968
969 // update PSVI info
970 if (((SchemaValidator*) fValidator)->getErrorOccurred())
971 fPSVIElemContext.fErrorOccurred = true;
972 else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
973 psviMemberType = fValidationContext->getValidatingMemberType();
974 if (fPSVIHandler)
975 {
976 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
977 if(fPSVIElemContext.fIsSpecified)
978 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
979 }
980
981 // call matchers and de-activate context
982 if (toCheckIdentityConstraint())
983 {
984 fICHandler->deactivateContext
985 (
986 (SchemaElementDecl *) topElem->fThisElement
987 , fContent.getRawBuffer()
988 , fValidationContext
989 , fPSVIElemContext.fCurrentDV
990 );
991 }
992
993 }
994
995 // QName dv needed topElem to resolve URIs on the checkContent
996 fElemStack.popTop();
997
998 // See if it was the root element, to avoid multiple calls below
999 const bool isRoot = fElemStack.isEmpty();
1000
1001 if (fPSVIHandler)
1002 {
1003 endElementPSVI
1004 (
1005 (SchemaElementDecl*)topElem->fThisElement, psviMemberType
1006 );
1007 }
1008 // now we can reset the datatype buffer, since the
1009 // application has had a chance to copy the characters somewhere else
1010 ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
1011
1012 // If we have a doc handler, tell it about the end tag
1013 if (fDocHandler)
1014 {
1015 if (fGrammarType == Grammar::SchemaGrammarType) {
1016 if (topElem->fPrefixColonPos != -1)
1017 fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
1018 else
1019 fPrefixBuf.reset();
1020 }
1021 else {
1022 fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
1023 }
1024 fDocHandler->endElement
1025 (
1026 *topElem->fThisElement
1027 , uriId
1028 , isRoot
1029 , fPrefixBuf.getRawBuffer()
1030 );
1031 }
1032
1033 if (!isRoot)
1034 {
1035 // update error information
1036 fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
1037 }
1038
1039 // If this was the root, then done with content
1040 gotData = !isRoot;
1041
1042 if (gotData) {
1043
1044 // Restore the grammar
1045 fGrammar = fElemStack.getCurrentGrammar();
1046 fGrammarType = fGrammar->getGrammarType();
1047 fValidator->setGrammar(fGrammar);
1048
1049 // Restore the validation flag
1050 fValidate = fElemStack.getValidationFlag();
1051 }
1052 }
1053
1054
1055 // This method handles the high level logic of scanning the DOCType
1056 // declaration. This calls the DTDScanner and kicks off both the scanning of
1057 // the internal subset and the scanning of the external subset, if any.
1058 //
1059 // When we get here the '<!DOCTYPE' part has already been scanned, which is
1060 // what told us that we had a doc type decl to parse.
scanDocTypeDecl()1061 void SGXMLScanner::scanDocTypeDecl()
1062 {
1063 // Just skips over it
1064 // REVISIT: Should we issue a warning
1065 static const XMLCh doctypeIE[] =
1066 {
1067 chOpenSquare, chCloseAngle, chNull
1068 };
1069 XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);
1070
1071 if (nextCh == chOpenSquare)
1072 fReaderMgr.skipPastChar(chCloseSquare);
1073
1074 fReaderMgr.skipPastChar(chCloseAngle);
1075 }
1076
1077 // This method is called to scan a start tag when we are processing
1078 // namespaces. This method is called after we've scanned the < of a
1079 // start tag. So we have to get the element name, then scan the attributes,
1080 // after which we are either going to see >, />, or attributes followed
1081 // by one of those sequences.
scanStartTag(bool & gotData)1082 bool SGXMLScanner::scanStartTag(bool& gotData)
1083 {
1084 // Assume we will still have data until proven otherwise. It will only
1085 // ever be false if this is the root and its empty.
1086 gotData = true;
1087
1088 // Reset element content
1089 fContent.reset();
1090
1091 // The current position is after the open bracket, so we need to read in
1092 // in the element name.
1093 int prefixColonPos;
1094 if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
1095 {
1096 if (fQNameBuf.isEmpty())
1097 emitError(XMLErrs::ExpectedElementName);
1098 else
1099 emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
1100 fReaderMgr.skipToChar(chOpenAngle);
1101 return false;
1102 }
1103
1104 // See if its the root element
1105 const bool isRoot = fElemStack.isEmpty();
1106
1107 // Skip any whitespace after the name
1108 fReaderMgr.skipPastSpaces();
1109
1110 // First we have to do the rawest attribute scan. We don't do any
1111 // normalization of them at all, since we don't know yet what type they
1112 // might be (since we need the element decl in order to do that.)
1113 const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
1114 bool isEmpty;
1115 XMLSize_t attCount = rawAttrScan
1116 (
1117 qnameRawBuf
1118 , *fRawAttrList
1119 , isEmpty
1120 );
1121
1122 // save the contentleafname and currentscope before addlevel, for later use
1123 ContentLeafNameTypeVector* cv = 0;
1124 XMLContentModel* cm = 0;
1125 unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
1126 bool laxThisOne = false;
1127 if (!isRoot)
1128 {
1129 // schema validator will have correct type if validating
1130 SchemaElementDecl* tempElement = (SchemaElementDecl*)
1131 fElemStack.topElement()->fThisElement;
1132 SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
1133 ComplexTypeInfo *currType = 0;
1134
1135 if (fValidate)
1136 {
1137 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
1138 if (currType)
1139 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
1140 else // something must have gone wrong
1141 modelType = SchemaElementDecl::Any;
1142 }
1143 else
1144 {
1145 currType = tempElement->getComplexTypeInfo();
1146 }
1147
1148 if ((modelType == SchemaElementDecl::Mixed_Simple)
1149 || (modelType == SchemaElementDecl::Mixed_Complex)
1150 || (modelType == SchemaElementDecl::Children))
1151 {
1152 cm = currType->getContentModel();
1153 cv = cm->getContentLeafNameTypeVector();
1154 currentScope = fElemStack.getCurrentScope();
1155 }
1156 else if (modelType == SchemaElementDecl::Any) {
1157 laxThisOne = true;
1158 }
1159 }
1160
1161 // Now, since we might have to update the namespace map for this element,
1162 // but we don't have the element decl yet, we just tell the element stack
1163 // to expand up to get ready.
1164 XMLSize_t elemDepth = fElemStack.addLevel();
1165 fElemStack.setValidationFlag(fValidate);
1166 fElemStack.setPrefixColonPos(prefixColonPos);
1167
1168 // Check if there is any external schema location specified, and if we are at root,
1169 // go through them first before scanning those specified in the instance document
1170 if (isRoot
1171 && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
1172
1173 if (fExternalSchemaLocation)
1174 parseSchemaLocation(fExternalSchemaLocation, true);
1175 if (fExternalNoNamespaceSchemaLocation)
1176 resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
1177 }
1178
1179 // Make an initial pass through the list and find any xmlns attributes or
1180 // schema attributes.
1181 if (attCount)
1182 scanRawAttrListforNameSpaces(attCount);
1183
1184 // Resolve the qualified name to a URI and name so that we can look up
1185 // the element decl for this element. We have now update the prefix to
1186 // namespace map so we should get the correct element now.
1187 unsigned int uriId = resolveQNameWithColon
1188 (
1189 qnameRawBuf
1190 , fPrefixBuf
1191 , ElemStack::Mode_Element
1192 , prefixColonPos
1193 );
1194
1195 //if schema, check if we should lax or skip the validation of this element
1196 bool parentValidation = fValidate;
1197 if (cv) {
1198 QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
1199 // elementDepth will be > 0, as cv is only constructed if element is not
1200 // root.
1201 laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
1202 }
1203
1204 // Look up the element now in the grammar. This will get us back a
1205 // generic element decl object. We tell him to fault one in if he does
1206 // not find it.
1207 XMLElementDecl* elemDecl = 0;
1208 bool wasAdded = false;
1209 const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
1210 const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
1211
1212 if (uriId != fEmptyNamespaceId) {
1213
1214 // Check in current grammar before switching if necessary
1215 elemDecl = fGrammar->getElemDecl
1216 (
1217 uriId
1218 , nameRawBuf
1219 , qnameRawBuf
1220 , currentScope
1221 );
1222 if(!elemDecl)
1223 {
1224 // look in the list of undeclared elements, as would have been done
1225 // before we made grammars stateless:
1226 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
1227 }
1228 // this is initialized correctly only if there is
1229 // no element decl. The other uses in this scope will only
1230 // be encountered if there continues to be no element decl--which
1231 // implies that this will have been initialized correctly.
1232 unsigned int orgGrammarUri = uriId;
1233 if (!elemDecl && ( orgGrammarUri = fURIStringPool->getId(original_uriStr)) != uriId) {
1234 // not found, switch to the specified grammar
1235 const XMLCh* uriStr = getURIText(uriId);
1236 bool errorCondition = !switchGrammar(uriStr) && fValidate;
1237 if (errorCondition && !laxThisOne)
1238 {
1239 fValidator->emitError
1240 (
1241 XMLValid::GrammarNotFound
1242 ,uriStr
1243 );
1244 }
1245
1246 elemDecl = fGrammar->getElemDecl
1247 (
1248 uriId
1249 , nameRawBuf
1250 , qnameRawBuf
1251 , currentScope
1252 );
1253 }
1254
1255 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
1256 // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
1257 elemDecl = fGrammar->getElemDecl
1258 (
1259 uriId
1260 , nameRawBuf
1261 , qnameRawBuf
1262 , Grammar::TOP_LEVEL_SCOPE
1263 );
1264 if(!elemDecl)
1265 {
1266 // look in the list of undeclared elements, as would have been done
1267 // before we made grammars stateless:
1268 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
1269 }
1270 if(!elemDecl) {
1271 // still not found in specified uri
1272 // try emptyNamespace see if element should be un-qualified.
1273 // Use a temp variable until we decide this is the case
1274 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
1275 (
1276 fEmptyNamespaceId
1277 , nameRawBuf
1278 , qnameRawBuf
1279 , currentScope
1280 );
1281 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
1282 fValidator->emitError
1283 (
1284 XMLValid::ElementNotUnQualified
1285 , qnameRawBuf
1286 );
1287 elemDecl = tempElemDecl;
1288 }
1289 }
1290 }
1291
1292 if (!elemDecl) {
1293 // still not found, fault this in and issue error later
1294 // switch back to original grammar first (if necessary)
1295 if(orgGrammarUri != uriId)
1296 {
1297 switchGrammar(original_uriStr);
1298 }
1299 elemDecl = new (fMemoryManager) SchemaElementDecl
1300 (
1301 fPrefixBuf.getRawBuffer()
1302 , nameRawBuf
1303 , uriId
1304 , SchemaElementDecl::Any
1305 , Grammar::TOP_LEVEL_SCOPE
1306 , fMemoryManager
1307 );
1308 elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
1309 wasAdded = true;
1310 }
1311 }
1312 else if (!elemDecl)
1313 {
1314 //the element has no prefix,
1315 //thus it is either a non-qualified element defined in current targetNS
1316 //or an element that is defined in the globalNS
1317
1318 //try unqualifed first
1319 elemDecl = fGrammar->getElemDecl
1320 (
1321 uriId
1322 , nameRawBuf
1323 , qnameRawBuf
1324 , currentScope
1325 );
1326 if(!elemDecl)
1327 {
1328 // look in the list of undeclared elements, as would have been done
1329 // before we made grammars stateless:
1330 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
1331 }
1332 // this is initialized correctly only if there is
1333 // no element decl. The other uses in this scope will only
1334 // be encountered if there continues to be no element decl--which
1335 // implies that this will have been initialized correctly.
1336 unsigned int orgGrammarUri = fEmptyNamespaceId;
1337 if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
1338 //not found, switch grammar and try globalNS
1339 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
1340 if (errorCondition && !laxThisOne)
1341 {
1342 fValidator->emitError
1343 (
1344 XMLValid::GrammarNotFound
1345 , XMLUni::fgZeroLenString
1346 );
1347 }
1348
1349 elemDecl = fGrammar->getElemDecl
1350 (
1351 uriId
1352 , nameRawBuf
1353 , qnameRawBuf
1354 , currentScope
1355 );
1356 }
1357
1358 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
1359 // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
1360 elemDecl = fGrammar->getElemDecl
1361 (
1362 uriId
1363 , nameRawBuf
1364 , qnameRawBuf
1365 , Grammar::TOP_LEVEL_SCOPE
1366 );
1367 if(!elemDecl)
1368 {
1369 // look in the list of undeclared elements, as would have been done
1370 // before we made grammars stateless:
1371 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
1372 }
1373 if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
1374 // still Not found in specified uri
1375 // go to original Grammar again to see if element needs to be fully qualified.
1376 bool errorCondition = !switchGrammar(original_uriStr) && fValidate;
1377 if (errorCondition && !laxThisOne)
1378 {
1379 fValidator->emitError
1380 (
1381 XMLValid::GrammarNotFound
1382 ,original_uriStr
1383 );
1384 }
1385
1386 // Use a temp variable until we decide this is the case
1387 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
1388 (
1389 orgGrammarUri
1390 , nameRawBuf
1391 , qnameRawBuf
1392 , currentScope
1393 );
1394 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
1395 fValidator->emitError
1396 (
1397 XMLValid::ElementNotQualified
1398 , qnameRawBuf
1399 );
1400 elemDecl=tempElemDecl;
1401 }
1402 }
1403 }
1404
1405 if (!elemDecl) {
1406 // still not found, fault this in and issue error later
1407 // switch back to original grammar first (if necessary)
1408 if(orgGrammarUri != fEmptyNamespaceId)
1409 {
1410 switchGrammar(original_uriStr);
1411 }
1412 elemDecl = new (fMemoryManager) SchemaElementDecl
1413 (
1414 fPrefixBuf.getRawBuffer()
1415 , nameRawBuf
1416 , uriId
1417 , SchemaElementDecl::Any
1418 , Grammar::TOP_LEVEL_SCOPE
1419 , fMemoryManager
1420 );
1421 elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
1422 wasAdded = true;
1423 }
1424 }
1425
1426 // this info needed for DOMTypeInfo
1427 fPSVIElemContext.fErrorOccurred = false;
1428
1429 // We do something different here according to whether we found the
1430 // element or not.
1431 bool bXsiTypeSet= (fValidator)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
1432 if (wasAdded)
1433 {
1434 if (laxThisOne && !bXsiTypeSet) {
1435 fValidate = false;
1436 fElemStack.setValidationFlag(fValidate);
1437 }
1438
1439 // If validating then emit an error
1440 if (fValidate)
1441 {
1442 // This is to tell the reuse Validator that this element was
1443 // faulted-in, was not an element in the grammar pool originally
1444 elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
1445
1446 if(!bXsiTypeSet)
1447 {
1448 fValidator->emitError
1449 (
1450 XMLValid::ElementNotDefined
1451 , elemDecl->getFullName()
1452 );
1453 fPSVIElemContext.fErrorOccurred = true;
1454 }
1455 }
1456 }
1457 else
1458 {
1459 // If its not marked declared and validating, then emit an error
1460 if (!elemDecl->isDeclared()) {
1461 if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
1462 if(!bXsiTypeSet)
1463 fPSVIElemContext.fErrorOccurred = true;
1464 }
1465 if (laxThisOne) {
1466 fValidate = false;
1467 fElemStack.setValidationFlag(fValidate);
1468 }
1469
1470 if (fValidate && !bXsiTypeSet)
1471 {
1472 fValidator->emitError
1473 (
1474 XMLValid::ElementNotDefined
1475 , elemDecl->getFullName()
1476 );
1477 }
1478 }
1479 }
1480
1481
1482 // Now we can update the element stack to set the current element
1483 // decl. We expanded the stack above, but couldn't store the element
1484 // decl because we didn't know it yet.
1485 fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
1486 fElemStack.setCurrentURI(uriId);
1487
1488 if (isRoot)
1489 {
1490 fRootGrammar = fGrammar;
1491 fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
1492 }
1493
1494 if (fPSVIHandler)
1495 {
1496 fPSVIElemContext.fElemDepth++;
1497
1498 if (elemDecl->isDeclared())
1499 {
1500 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
1501 }
1502 else
1503 {
1504 fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
1505
1506 /******
1507 * While we report an error for historical reasons, this should
1508 * actually result in lax assessment - NG.
1509 if (isRoot && fValidate)
1510 fPSVIElemContext.fErrorOccurred = true;
1511 ******/
1512 }
1513 }
1514
1515 // Validate the element
1516 if (fValidate)
1517 {
1518 fValidator->validateElement(elemDecl);
1519 if (((SchemaValidator*) fValidator)->getErrorOccurred())
1520 fPSVIElemContext.fErrorOccurred = true;
1521 }
1522
1523 // squirrel away the element's QName, so that we can do an efficient
1524 // end-tag match
1525 fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
1526
1527 ComplexTypeInfo* typeinfo = (fValidate)
1528 ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
1529 : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
1530
1531 if (typeinfo) {
1532 currentScope = typeinfo->getScopeDefined();
1533
1534 // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
1535 XMLCh* typeName = typeinfo->getTypeName();
1536 const int comma = XMLString::indexOf(typeName, chComma);
1537 if (comma > 0) {
1538 XMLBuffer prefixBuf(comma+1, fMemoryManager);
1539 prefixBuf.append(typeName, comma);
1540 const XMLCh* uriStr = prefixBuf.getRawBuffer();
1541
1542 bool errorCondition = !switchGrammar(uriStr) && fValidate;
1543 if (errorCondition && !laxThisOne)
1544 {
1545 fValidator->emitError
1546 (
1547 XMLValid::GrammarNotFound
1548 , prefixBuf.getRawBuffer()
1549 );
1550 }
1551 }
1552 else if (comma == 0) {
1553 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
1554 if (errorCondition && !laxThisOne)
1555 {
1556 fValidator->emitError
1557 (
1558 XMLValid::GrammarNotFound
1559 , XMLUni::fgZeroLenString
1560 );
1561 }
1562 }
1563 }
1564 fElemStack.setCurrentScope(currentScope);
1565
1566 // Set element next state
1567 if (elemDepth >= fElemStateSize) {
1568 resizeElemState();
1569 }
1570
1571 fElemState[elemDepth] = 0;
1572 fElemLoopState[elemDepth] = 0;
1573 fElemStack.setCurrentGrammar(fGrammar);
1574
1575 // If this is the first element and we are validating, check the root
1576 // element.
1577 if (!isRoot && parentValidation)
1578 {
1579 // If the element stack is not empty, then add this element as a
1580 // child of the previous top element. If its empty, this is the root
1581 // elem and is not the child of anything.
1582 fElemStack.addChild(elemDecl->getElementName(), true);
1583 }
1584
1585 // PSVI handling: must reset this, even if no attributes...
1586 if(getPSVIHandler())
1587 fPSVIAttrList->reset();
1588
1589 // Now lets get the fAttrList filled in. This involves faulting in any
1590 // defaulted and fixed attributes and normalizing the values of any that
1591 // we got explicitly.
1592 //
1593 // We update the attCount value with the total number of attributes, but
1594 // it goes in with the number of values we got during the raw scan of
1595 // explictly provided attrs above.
1596 attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
1597
1598 if(attCount)
1599 {
1600 // clean up after ourselves:
1601 // clear the map used to detect duplicate attributes
1602 fUndeclaredAttrRegistry->removeAll();
1603 }
1604
1605 // activate identity constraints
1606 if (toCheckIdentityConstraint())
1607 {
1608 fICHandler->activateIdentityConstraint
1609 (
1610 (SchemaElementDecl*) elemDecl
1611 , (int) elemDepth
1612 , uriId
1613 , fPrefixBuf.getRawBuffer()
1614 , *fAttrList
1615 , attCount
1616 , fValidationContext
1617 );
1618
1619 }
1620
1621 // Since the element may have default values, call start tag now regardless if it is empty or not
1622 // If we have a document handler, then tell it about this start tag
1623 if (fDocHandler)
1624 {
1625 fDocHandler->startElement
1626 (
1627 *elemDecl
1628 , uriId
1629 , fPrefixBuf.getRawBuffer()
1630 , *fAttrList
1631 , attCount
1632 , false
1633 , isRoot
1634 );
1635 } // may be where we output something...
1636
1637 // if we have a PSVIHandler, now's the time to call
1638 // its handleAttributesPSVI method:
1639 if(fPSVIHandler)
1640 {
1641 QName *eName = elemDecl->getElementName();
1642 fPSVIHandler->handleAttributesPSVI
1643 (
1644 eName->getLocalPart()
1645 , fURIStringPool->getValueForId(eName->getURI())
1646 , fPSVIAttrList
1647 );
1648 }
1649
1650 // If empty, validate content right now if we are validating and then
1651 // pop the element stack top. Else, we have to update the current stack
1652 // top's namespace mapping elements.
1653 if (isEmpty)
1654 {
1655 // Pop the element stack back off since it'll never be used now
1656 fElemStack.popTop();
1657
1658 // reset current type info
1659 DatatypeValidator* psviMemberType = 0;
1660 if (fGrammarType == Grammar::SchemaGrammarType)
1661 {
1662 if (fValidate && elemDecl->isDeclared())
1663 {
1664 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1665 if(!fPSVIElemContext.fCurrentTypeInfo)
1666 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1667 else
1668 fPSVIElemContext.fCurrentDV = 0;
1669 if(fPSVIHandler)
1670 {
1671 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
1672
1673 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
1674 fPSVIElemContext.fNormalizedValue = 0;
1675 }
1676 }
1677 else
1678 {
1679 fPSVIElemContext.fCurrentDV = 0;
1680 fPSVIElemContext.fCurrentTypeInfo = 0;
1681 fPSVIElemContext.fNormalizedValue = 0;
1682 }
1683 }
1684
1685 // If validating, then insure that its legal to have no content
1686 if (fValidate)
1687 {
1688 XMLSize_t failure;
1689 bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
1690 if (!res)
1691 {
1692 // REVISIT: in the case of xsi:type, this may
1693 // return the wrong string...
1694 fValidator->emitError
1695 (
1696 XMLValid::ElementNotValidForContent
1697 , elemDecl->getFullName()
1698 , elemDecl->getFormattedContentModel()
1699 );
1700 }
1701
1702 if (((SchemaValidator*) fValidator)->getErrorOccurred())
1703 fPSVIElemContext.fErrorOccurred = true;
1704 // note that if we're empty, won't be a current DV
1705 else
1706 {
1707 if (fPSVIHandler)
1708 {
1709 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
1710 if(fPSVIElemContext.fIsSpecified)
1711 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
1712 }
1713 if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
1714 psviMemberType = fValidationContext->getValidatingMemberType();
1715 }
1716
1717 // call matchers and de-activate context
1718 if (toCheckIdentityConstraint())
1719 {
1720 fICHandler->deactivateContext
1721 (
1722 (SchemaElementDecl *) elemDecl
1723 , fContent.getRawBuffer()
1724 , fValidationContext
1725 , fPSVIElemContext.fCurrentDV
1726 );
1727 }
1728
1729 }
1730 else if (fGrammarType == Grammar::SchemaGrammarType) {
1731 ((SchemaValidator*)fValidator)->resetNillable();
1732 }
1733
1734 if (fPSVIHandler)
1735 {
1736 endElementPSVI
1737 (
1738 (SchemaElementDecl*)elemDecl, psviMemberType
1739 );
1740 }
1741
1742 // If we have a doc handler, tell it about the end tag
1743 if (fDocHandler)
1744 {
1745 fDocHandler->endElement
1746 (
1747 *elemDecl
1748 , uriId
1749 , isRoot
1750 , fPrefixBuf.getRawBuffer()
1751 );
1752 }
1753
1754 // If the elem stack is empty, then it was an empty root
1755 if (isRoot)
1756 gotData = false;
1757 else
1758 {
1759 // Restore the grammar
1760 fGrammar = fElemStack.getCurrentGrammar();
1761 fGrammarType = fGrammar->getGrammarType();
1762 fValidator->setGrammar(fGrammar);
1763
1764 // Restore the validation flag
1765 fValidate = fElemStack.getValidationFlag();
1766 }
1767 }
1768 else // not empty
1769 {
1770
1771 // send a partial element psvi
1772 if (fPSVIHandler)
1773 {
1774
1775 ComplexTypeInfo* curTypeInfo = 0;
1776 DatatypeValidator* curDV = 0;
1777 XSTypeDefinition* typeDef = 0;
1778
1779 if (fValidate && elemDecl->isDeclared())
1780 {
1781 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1782
1783 if (curTypeInfo)
1784 {
1785 typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
1786 }
1787 else
1788 {
1789 curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1790
1791 if (curDV)
1792 {
1793 typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
1794 }
1795 }
1796 }
1797
1798 fPSVIElement->reset
1799 (
1800 PSVIElement::VALIDITY_NOTKNOWN
1801 , PSVIElement::VALIDATION_NONE
1802 , fRootElemName
1803 , ((SchemaValidator*) fValidator)->getIsElemSpecified()
1804 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
1805 , typeDef
1806 , 0 //memberType
1807 , fModel
1808 , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
1809 , 0
1810 , 0
1811 , 0
1812 );
1813
1814
1815 fPSVIHandler->handlePartialElementPSVI
1816 (
1817 elemDecl->getBaseName()
1818 , fURIStringPool->getValueForId(elemDecl->getURI())
1819 , fPSVIElement
1820 );
1821
1822 }
1823
1824 fErrorStack->push(fPSVIElemContext.fErrorOccurred);
1825 }
1826
1827 return true;
1828 }
1829
1830
1831 // ---------------------------------------------------------------------------
1832 // SGXMLScanner: Grammar preparsing
1833 // ---------------------------------------------------------------------------
loadGrammar(const InputSource & src,const short grammarType,const bool toCache)1834 Grammar* SGXMLScanner::loadGrammar(const InputSource& src
1835 , const short grammarType
1836 , const bool toCache)
1837 {
1838 Grammar* loadedGrammar = 0;
1839
1840 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
1841
1842 try
1843 {
1844 fGrammarResolver->cacheGrammarFromParse(false);
1845 // if the new grammar has to be cached, better use the already cached
1846 // grammars, or the an exception will be thrown when caching an already
1847 // cached grammar
1848 fGrammarResolver->useCachedGrammarInParse(toCache);
1849 fRootGrammar = 0;
1850
1851 if (fValScheme == Val_Auto) {
1852 fValidate = true;
1853 }
1854
1855 // Reset some status flags
1856 fInException = false;
1857 fStandalone = false;
1858 fErrorCount = 0;
1859 fHasNoDTD = true;
1860 fSeeXsi = false;
1861
1862 if (grammarType == Grammar::SchemaGrammarType) {
1863 loadedGrammar = loadXMLSchemaGrammar(src, toCache);
1864 }
1865 }
1866 // NOTE:
1867 //
1868 // In all of the error processing below, the emitError() call MUST come
1869 // before the flush of the reader mgr, or it will fail because it tries
1870 // to find out the position in the XML source of the error.
1871 catch(const XMLErrs::Codes)
1872 {
1873 // This is a 'first failure' exception, so fall through
1874 }
1875 catch(const XMLValid::Codes)
1876 {
1877 // This is a 'first fatal error' type exit, so fall through
1878 }
1879 catch(const XMLException& excToCatch)
1880 {
1881 // Emit the error and catch any user exception thrown from here. Make
1882 // sure in all cases we flush the reader manager.
1883 fInException = true;
1884 try
1885 {
1886 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
1887 emitError
1888 (
1889 XMLErrs::XMLException_Warning
1890 , excToCatch.getCode()
1891 , excToCatch.getMessage()
1892 );
1893 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
1894 emitError
1895 (
1896 XMLErrs::XMLException_Fatal
1897 , excToCatch.getCode()
1898 , excToCatch.getMessage()
1899 );
1900 else
1901 emitError
1902 (
1903 XMLErrs::XMLException_Error
1904 , excToCatch.getCode()
1905 , excToCatch.getMessage()
1906 );
1907 }
1908 catch(const OutOfMemoryException&)
1909 {
1910 // This is a special case for out-of-memory
1911 // conditions, because resetting the ReaderMgr
1912 // can be problematic.
1913 resetReaderMgr.release();
1914
1915 throw;
1916 }
1917 }
1918 catch(const OutOfMemoryException&)
1919 {
1920 // This is a special case for out-of-memory
1921 // conditions, because resetting the ReaderMgr
1922 // can be problematic.
1923 resetReaderMgr.release();
1924
1925 throw;
1926 }
1927
1928 return loadedGrammar;
1929 }
1930
resetCachedGrammar()1931 void SGXMLScanner::resetCachedGrammar ()
1932 {
1933 fCachedSchemaInfoList->removeAll ();
1934 }
1935
1936 // ---------------------------------------------------------------------------
1937 // SGXMLScanner: Private helper methods
1938 // ---------------------------------------------------------------------------
1939 // This method handles the common initialization, to avoid having to do
1940 // it redundantly in multiple constructors.
commonInit()1941 void SGXMLScanner::commonInit()
1942 {
1943 // Create the element state array
1944 fElemState = (unsigned int*) fMemoryManager->allocate
1945 (
1946 fElemStateSize * sizeof(unsigned int)
1947 ); //new unsigned int[fElemStateSize];
1948 fElemLoopState = (unsigned int*) fMemoryManager->allocate
1949 (
1950 fElemStateSize * sizeof(unsigned int)
1951 ); //new unsigned int[fElemStateSize];
1952
1953 // And we need one for the raw attribute scan. This just stores key/
1954 // value string pairs (prior to any processing.)
1955 fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
1956 fRawAttrColonList = (int*) fMemoryManager->allocate
1957 (
1958 fRawAttrColonListSize * sizeof(int)
1959 );
1960
1961 // Create the Validator and init them
1962 fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
1963 initValidator(fSchemaValidator);
1964
1965 // Create IdentityConstraint info
1966 fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
1967
1968 // Add the default entity entries for the character refs that must always
1969 // be present.
1970 fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
1971 fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
1972 fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
1973 fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
1974 fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
1975 fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
1976 fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
1977 fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
1978 (
1979 131, false, fMemoryManager
1980 );
1981 fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
1982 fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
1983
1984 fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
1985 fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
1986
1987 if (fValidator)
1988 {
1989 if (!fValidator->handlesSchema())
1990 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
1991 }
1992 else
1993 {
1994 fValidator = fSchemaValidator;
1995 }
1996 }
1997
cleanUp()1998 void SGXMLScanner::cleanUp()
1999 {
2000 fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2001 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
2002 delete fSchemaGrammar;
2003 delete fEntityTable;
2004 delete fRawAttrList;
2005 fMemoryManager->deallocate(fRawAttrColonList);
2006 delete fSchemaValidator;
2007 delete fICHandler;
2008 delete fElemNonDeclPool;
2009 delete fAttDefRegistry;
2010 delete fUndeclaredAttrRegistry;
2011 delete fPSVIAttrList;
2012 if (fPSVIElement)
2013 delete fPSVIElement;
2014
2015 if (fErrorStack)
2016 delete fErrorStack;
2017
2018 delete fSchemaInfoList;
2019 delete fCachedSchemaInfoList;
2020 }
2021
resizeElemState()2022 void SGXMLScanner::resizeElemState() {
2023
2024 unsigned int newSize = fElemStateSize * 2;
2025 unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
2026 (
2027 newSize * sizeof(unsigned int)
2028 ); //new unsigned int[newSize];
2029 unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
2030 (
2031 newSize * sizeof(unsigned int)
2032 ); //new unsigned int[newSize];
2033
2034 // Copy the existing values
2035 unsigned int index = 0;
2036 for (; index < fElemStateSize; index++)
2037 {
2038 newElemState[index] = fElemState[index];
2039 newElemLoopState[index] = fElemLoopState[index];
2040 }
2041
2042 for (; index < newSize; index++)
2043 newElemLoopState[index] = newElemState[index] = 0;
2044
2045 // Delete the old array and udpate our members
2046 fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2047 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
2048 fElemState = newElemState;
2049 fElemLoopState = newElemLoopState;
2050 fElemStateSize = newSize;
2051 }
2052
resizeRawAttrColonList()2053 void SGXMLScanner::resizeRawAttrColonList() {
2054
2055 unsigned int newSize = fRawAttrColonListSize * 2;
2056 int* newRawAttrColonList = (int*) fMemoryManager->allocate
2057 (
2058 newSize * sizeof(int)
2059 ); //new int[newSize];
2060
2061 // Copy the existing values
2062 unsigned int index = 0;
2063 for (; index < fRawAttrColonListSize; index++)
2064 newRawAttrColonList[index] = fRawAttrColonList[index];
2065
2066 // Delete the old array and udpate our members
2067 fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
2068 fRawAttrColonList = newRawAttrColonList;
2069 fRawAttrColonListSize = newSize;
2070 }
2071
2072 // This method is called from scanStartTag() to build up the list of
2073 // XMLAttr objects that will be passed out in the start tag callout. We
2074 // get the key/value pairs from the raw scan of explicitly provided attrs,
2075 // which have not been normalized. And we get the element declaration from
2076 // which we will get any defaulted or fixed attribute defs and add those
2077 // in as well.
2078 XMLSize_t
buildAttList(const RefVectorOf<KVStringPair> & providedAttrs,const XMLSize_t attCount,XMLElementDecl * elemDecl,RefVectorOf<XMLAttr> & toFill)2079 SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs
2080 , const XMLSize_t attCount
2081 , XMLElementDecl* elemDecl
2082 , RefVectorOf<XMLAttr>& toFill)
2083 {
2084 // Ask the element to clear the 'provided' flag on all of the att defs
2085 // that it owns, and to return us a boolean indicating whether it has
2086 // any defs.
2087 DatatypeValidator *currDV = 0;
2088 ComplexTypeInfo *currType = 0;
2089
2090 if (fValidate)
2091 {
2092 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
2093 if (!currType) {
2094 currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
2095 }
2096 }
2097
2098 const bool hasDefs = (currType && fValidate)
2099 ? currType->hasAttDefs()
2100 : elemDecl->hasAttDefs();
2101
2102 fElemCount++;
2103
2104 // If there are no expliclitily provided attributes and there are no
2105 // defined attributes for the element, the we don't have anything to do.
2106 // So just return zero in this case.
2107 if (!hasDefs && !attCount)
2108 return 0;
2109
2110 // Keep up with how many attrs we end up with total
2111 XMLSize_t retCount = 0;
2112
2113 // And get the current size of the output vector. This lets us use
2114 // existing elements until we fill it, then start adding new ones.
2115 const XMLSize_t curAttListSize = toFill.size();
2116
2117 // We need a buffer into which raw scanned attribute values will be
2118 // normalized.
2119 XMLBufBid bbNormal(&fBufMgr);
2120 XMLBuffer& normBuf = bbNormal.getBuffer();
2121
2122 XMLBufBid bbPrefix(&fBufMgr);
2123 XMLBuffer& prefixBuf = bbPrefix.getBuffer();
2124
2125 // Loop through our explicitly provided attributes, which are in the raw
2126 // scanned form, and build up XMLAttr objects.
2127 XMLSize_t index;
2128 const XMLCh* prefPtr, *suffPtr;
2129 for (index = 0; index < attCount; index++)
2130 {
2131 PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
2132 PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
2133 const KVStringPair* curPair = providedAttrs.elementAt(index);
2134
2135 // We have to split the name into its prefix and name parts. Then
2136 // we map the prefix to its URI.
2137 const XMLCh* const namePtr = curPair->getKey();
2138
2139 const int colonInd = fRawAttrColonList[index];
2140 unsigned int uriId;
2141 if (colonInd != -1)
2142 {
2143 prefixBuf.set(namePtr, colonInd);
2144 prefPtr = prefixBuf.getRawBuffer();
2145 suffPtr = namePtr + colonInd + 1;
2146 // Map the prefix to a URI id
2147 uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
2148 }
2149 else
2150 {
2151 // No colon, so we just have a name with no prefix
2152 prefPtr = XMLUni::fgZeroLenString;
2153 suffPtr = namePtr;
2154 // an empty prefix is always the empty namespace, when dealing with attributes
2155 uriId = fEmptyNamespaceId;
2156 }
2157
2158 // If the uri comes back as the xmlns or xml URI or its just a name
2159 // and that name is 'xmlns', then we handle it specially. So set a
2160 // boolean flag that lets us quickly below know which we are dealing
2161 // with.
2162 const bool isNSAttr = (uriId == fEmptyNamespaceId)?
2163 XMLString::equals(suffPtr, XMLUni::fgXMLNSString) :
2164 (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI));
2165
2166 // If its not a special case namespace attr of some sort, then we
2167 // do normal checking and processing.
2168 XMLAttDef::AttTypes attType = XMLAttDef::CData;
2169 DatatypeValidator *attrValidator = 0;
2170 PSVIAttribute *psviAttr = 0;
2171 bool otherXSI = false;
2172
2173 if (isNSAttr)
2174 {
2175 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
2176 {
2177 emitError
2178 (
2179 XMLErrs::AttrAlreadyUsedInSTag
2180 , namePtr
2181 , elemDecl->getFullName()
2182 );
2183 fPSVIElemContext.fErrorOccurred = true;
2184 }
2185 else
2186 {
2187 bool ValueValidate = false;
2188 bool tokenizeBuffer = false;
2189
2190 if (uriId == fXMLNSNamespaceId)
2191 {
2192 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2193 }
2194 else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
2195 {
2196 if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
2197 {
2198 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
2199
2200 ValueValidate = true;
2201 }
2202 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
2203 {
2204 // use anyURI as the validator
2205 // tokenize the data and use the anyURI data for each piece
2206 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2207 //We should validate each value in the schema location however
2208 //this lead to a performance degradation of around 4%. Since
2209 //the first value of each pair needs to match what is in the
2210 //schema document and the second value needs to be valid in
2211 //order to open the document we won't validate it. Need to
2212 //do performance analysis of the anyuri datatype.
2213 //ValueValidate = true;
2214 ValueValidate = false;
2215 tokenizeBuffer = true;
2216 }
2217 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
2218 {
2219 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2220 //We should validate this value however
2221 //this lead to a performance degradation of around 4%. Since
2222 //the value needs to be valid in
2223 //order to open the document we won't validate it. Need to
2224 //do performance analysis of the anyuri datatype.
2225 //ValueValidate = true;
2226 ValueValidate = false;
2227 }
2228 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
2229 {
2230 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
2231
2232 ValueValidate = true;
2233 }
2234 else {
2235 otherXSI = true;
2236 }
2237 }
2238
2239 if (!otherXSI) {
2240 normalizeAttRawValue
2241 (
2242 namePtr
2243 , curPair->getValue()
2244 , normBuf
2245 );
2246
2247 if (fValidate && attrValidator && ValueValidate)
2248 {
2249 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true);
2250
2251 ValidationContext* const theContext =
2252 getValidationContext();
2253
2254 if (theContext)
2255 {
2256 try
2257 {
2258 if (tokenizeBuffer) {
2259 XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager);
2260 while (tokenizer.hasMoreTokens()) {
2261 attrValidator->validate(
2262 tokenizer.nextToken(),
2263 theContext,
2264 fMemoryManager);
2265 }
2266 }
2267 else {
2268 attrValidator->validate(
2269 normBuf.getRawBuffer(),
2270 theContext,
2271 fMemoryManager);
2272 }
2273 }
2274 catch (const XMLException& idve)
2275 {
2276 fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage());
2277 }
2278 }
2279 }
2280
2281 if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
2282 {
2283 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
2284 XSSimpleTypeDefinition *validatingType = (attrValidator)
2285 ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator)
2286 : 0;
2287 // no attribute declarations for these...
2288 psviAttr->reset(
2289 fRootElemName
2290 , PSVIItem::VALIDITY_NOTKNOWN
2291 , PSVIItem::VALIDATION_NONE
2292 , validatingType
2293 , 0
2294 , 0
2295 , false
2296 , 0
2297 , attrValidator
2298 );
2299 }
2300 }
2301 }
2302 }
2303
2304 if (!isNSAttr || otherXSI)
2305 {
2306 // Some checking for attribute wild card first (for schema)
2307 bool laxThisOne = false;
2308 bool skipThisOne = false;
2309
2310 XMLAttDef* attDefForWildCard = 0;
2311 XMLAttDef* attDef = 0;
2312
2313 if (fGrammarType == Grammar::SchemaGrammarType) {
2314
2315 //retrieve the att def
2316 SchemaAttDef* attWildCard = 0;
2317 if (currType) {
2318 attDef = currType->getAttDef(suffPtr, uriId);
2319 attWildCard = currType->getAttWildCard();
2320 }
2321 else if (!currDV) { // check explicitly-set wildcard
2322 attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
2323 }
2324
2325 // if not found or faulted in - check for a matching wildcard attribute
2326 // if no matching wildcard attribute, check (un)qualifed cases and flag
2327 // appropriate errors
2328 if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
2329
2330 if (attWildCard) {
2331 //if schema, see if we should lax or skip the validation of this attribute
2332 if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
2333
2334 if(!skipThisOne)
2335 {
2336 SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
2337 if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
2338 RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
2339 if (attRegistry) {
2340 attDefForWildCard = attRegistry->get(suffPtr);
2341 }
2342 }
2343 }
2344 }
2345 }
2346 else if (currType) {
2347 // not found, see if the attDef should be qualified or not
2348 if (uriId == fEmptyNamespaceId) {
2349 attDef = currType->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
2350 if (fValidate
2351 && attDef
2352 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
2353 // the attribute should be qualified
2354 fValidator->emitError
2355 (
2356 XMLValid::AttributeNotQualified
2357 , attDef->getFullName()
2358 );
2359 fPSVIElemContext.fErrorOccurred = true;
2360 if (getPSVIHandler())
2361 {
2362 attrValid = PSVIItem::VALIDITY_INVALID;
2363 }
2364 }
2365 }
2366 else {
2367 attDef = currType->getAttDef(suffPtr, fEmptyNamespaceId);
2368 if (fValidate
2369 && attDef
2370 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
2371 // the attribute should be qualified
2372 fValidator->emitError
2373 (
2374 XMLValid::AttributeNotUnQualified
2375 , attDef->getFullName()
2376 );
2377 fPSVIElemContext.fErrorOccurred = true;
2378 if (getPSVIHandler())
2379 {
2380 attrValid = PSVIItem::VALIDITY_INVALID;
2381 }
2382 }
2383 }
2384 }
2385 }
2386 }
2387
2388 // now need to prepare for duplicate detection
2389 if(attDef)
2390 {
2391 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
2392 if(!curCountPtr)
2393 {
2394 curCountPtr = getNewUIntPtr();
2395 *curCountPtr = fElemCount;
2396 fAttDefRegistry->put(attDef, curCountPtr);
2397 }
2398 else if(*curCountPtr < fElemCount)
2399 *curCountPtr = fElemCount;
2400 else
2401 {
2402 emitError
2403 (
2404 XMLErrs::AttrAlreadyUsedInSTag
2405 , attDef->getFullName()
2406 , elemDecl->getFullName()
2407 );
2408 fPSVIElemContext.fErrorOccurred = true;
2409 }
2410 }
2411 else
2412 {
2413 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
2414 {
2415 emitError
2416 (
2417 XMLErrs::AttrAlreadyUsedInSTag
2418 , namePtr
2419 , elemDecl->getFullName()
2420 );
2421 fPSVIElemContext.fErrorOccurred = true;
2422 }
2423 }
2424
2425 // if we've found either an attDef or an attDefForWildCard,
2426 // then we're doing full validation and it may still be valid.
2427 if(!attDef && !attDefForWildCard)
2428 {
2429 if(!laxThisOne && !skipThisOne)
2430 {
2431 fPSVIElemContext.fErrorOccurred = true;
2432 }
2433 if(getPSVIHandler())
2434 {
2435 if(!laxThisOne && !skipThisOne)
2436 {
2437 attrValid = PSVIItem::VALIDITY_INVALID;
2438 }
2439 else if(laxThisOne)
2440 {
2441 attrValid = PSVIItem::VALIDITY_NOTKNOWN;
2442 attrAssessed = PSVIItem::VALIDATION_PARTIAL;
2443 }
2444 else
2445 {
2446 attrValid = PSVIItem::VALIDITY_NOTKNOWN;
2447 attrAssessed = PSVIItem::VALIDATION_NONE;
2448 }
2449 }
2450 }
2451
2452 bool errorCondition = fValidate && !attDefForWildCard && !attDef;
2453 if (errorCondition && !skipThisOne && !laxThisOne)
2454 {
2455 //
2456 // Its not valid for this element, so issue an error if we are
2457 // validating.
2458 //
2459 XMLBufBid bbMsg(&fBufMgr);
2460 XMLBuffer& bufMsg = bbMsg.getBuffer();
2461 if (uriId != fEmptyNamespaceId) {
2462 XMLBufBid bbURI(&fBufMgr);
2463 XMLBuffer& bufURI = bbURI.getBuffer();
2464
2465 getURIText(uriId, bufURI);
2466
2467 bufMsg.append(chOpenCurly);
2468 bufMsg.append(bufURI.getRawBuffer());
2469 bufMsg.append(chCloseCurly);
2470 }
2471 bufMsg.append(suffPtr);
2472 fValidator->emitError
2473 (
2474 XMLValid::AttNotDefinedForElement
2475 , bufMsg.getRawBuffer()
2476 , elemDecl->getFullName()
2477 );
2478 }
2479
2480 // Now normalize the raw value since we have the attribute type. We
2481 // don't care about the return status here. If it failed, an error
2482 // was issued, which is all we care about.
2483 if (attDefForWildCard) {
2484 normalizeAttValue(
2485 attDefForWildCard, namePtr, curPair->getValue(), normBuf
2486 );
2487
2488 // If we found an attdef for this one, then lets validate it.
2489 const XMLCh* xsNormalized = normBuf.getRawBuffer();
2490 DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
2491 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2492 {
2493 // normalize the attribute according to schema whitespace facet
2494 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
2495 xsNormalized = fWSNormalizeBuf.getRawBuffer();
2496 if (fNormalizeData && fValidate) {
2497 normBuf.set(xsNormalized);
2498 }
2499 }
2500
2501 if (fValidate ) {
2502 fValidator->validateAttrValue(
2503 attDefForWildCard, xsNormalized, false, elemDecl
2504 );
2505 attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
2506 if(((SchemaValidator *)fValidator)->getErrorOccurred())
2507 {
2508 fPSVIElemContext.fErrorOccurred = true;
2509 if(getPSVIHandler())
2510 attrValid = PSVIItem::VALIDITY_INVALID;
2511 }
2512 }
2513 else { // no decl; default DOMTypeInfo to anySimpleType
2514 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2515 }
2516
2517 // Save the type for later use
2518 attType = attDefForWildCard->getType();
2519 }
2520 else {
2521 normalizeAttValue(
2522 attDef, namePtr, curPair->getValue(), normBuf
2523 );
2524
2525 // If we found an attdef for this one, then lets validate it.
2526 if (attDef)
2527 {
2528 const XMLCh* xsNormalized = normBuf.getRawBuffer();
2529 if (fGrammarType == Grammar::SchemaGrammarType)
2530 {
2531 DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
2532 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2533 {
2534 // normalize the attribute according to schema whitespace facet
2535 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
2536 xsNormalized = fWSNormalizeBuf.getRawBuffer();
2537 if (fNormalizeData && fValidate && !skipThisOne) {
2538 normBuf.set(xsNormalized);
2539 }
2540 }
2541 }
2542
2543 if (fValidate && !skipThisOne)
2544 {
2545 fValidator->validateAttrValue(
2546 attDef, xsNormalized, false, elemDecl
2547 );
2548 attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
2549 if(((SchemaValidator *)fValidator)->getErrorOccurred())
2550 {
2551 fPSVIElemContext.fErrorOccurred = true;
2552 if(getPSVIHandler())
2553 attrValid = PSVIItem::VALIDITY_INVALID;
2554 }
2555 }
2556 else {
2557 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2558 }
2559 }
2560 else {
2561 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2562 }
2563
2564 // Save the type for later use
2565 if (attDef)
2566 {
2567 attType = attDef->getType();
2568 }
2569 }
2570
2571 // now fill in the PSVIAttributes entry for this attribute:
2572 if(getPSVIHandler())
2573 {
2574 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
2575 SchemaAttDef *actualAttDef = 0;
2576 if(attDef)
2577 actualAttDef = (SchemaAttDef *)attDef;
2578 else if (attDefForWildCard)
2579 actualAttDef = (SchemaAttDef *)attDefForWildCard;
2580 if(actualAttDef)
2581 {
2582 XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef);
2583 DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator();
2584 XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType);
2585 if(attrValid != PSVIItem::VALIDITY_VALID)
2586 {
2587 psviAttr->reset
2588 (
2589 fRootElemName
2590 , attrValid
2591 , attrAssessed
2592 , validatingType
2593 , 0
2594 , actualAttDef->getValue()
2595 , false
2596 , attrDecl
2597 , 0
2598 );
2599 }
2600 else
2601 {
2602 XSSimpleTypeDefinition *memberType = 0;
2603 if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
2604 memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator);
2605 psviAttr->reset
2606 (
2607 fRootElemName
2608 , attrValid
2609 , attrAssessed
2610 , validatingType
2611 , memberType
2612 , actualAttDef->getValue()
2613 , false
2614 , attrDecl
2615 , (memberType)?attrValidator:attrDataType
2616 );
2617 }
2618 }
2619 else
2620 {
2621 psviAttr->reset
2622 (
2623 fRootElemName
2624 , attrValid
2625 , attrAssessed
2626 , 0
2627 , 0
2628 , 0
2629 , false
2630 , 0
2631 , 0
2632 );
2633 }
2634 }
2635 }
2636
2637 // Add this attribute to the attribute list that we use to pass them
2638 // to the handler. We reuse its existing elements but expand it as
2639 // required.
2640 XMLAttr* curAttr;
2641 if (retCount >= curAttListSize)
2642 {
2643 curAttr = new (fMemoryManager) XMLAttr
2644 (
2645 uriId
2646 , suffPtr
2647 , prefPtr
2648 , normBuf.getRawBuffer()
2649 , attType
2650 , true
2651 , fMemoryManager
2652 );
2653 toFill.addElement(curAttr);
2654 }
2655 else
2656 {
2657 curAttr = toFill.elementAt(retCount);
2658 curAttr->set
2659 (
2660 uriId
2661 , suffPtr
2662 , prefPtr
2663 , normBuf.getRawBuffer()
2664 , attType
2665 );
2666 curAttr->setSpecified(true);
2667 }
2668 if(psviAttr)
2669 psviAttr->setValue(curAttr->getValue());
2670
2671 // Bump the count of attrs in the list
2672 retCount++;
2673 }
2674
2675 // Now, if there are any attributes declared by this element, let's
2676 // go through them and make sure that any required ones are provided,
2677 // and fault in any fixed ones and defaulted ones that are not provided
2678 // literally.
2679 if (hasDefs)
2680 {
2681 // Check after all specified attrs are scanned
2682 // (1) report error for REQUIRED attrs that are missing (V_TAGc)
2683 // (2) add default attrs if missing (FIXED and NOT_FIXED)
2684
2685 XMLAttDefList& attDefList = getAttDefList(currType, elemDecl);
2686
2687 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
2688 {
2689 // Get the current att def, for convenience and its def type
2690 XMLAttDef *curDef = &attDefList.getAttDef(i);
2691 const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
2692
2693 unsigned int *attCountPtr = fAttDefRegistry->get(curDef);
2694 if (!attCountPtr || *attCountPtr < fElemCount)
2695 { // did not occur
2696 // note that since there is no attribute information
2697 // item present, there is no PSVI infoset to augment here *except*
2698 // that the element is invalid
2699
2700 //the attribute is not provided
2701 if (fValidate)
2702 {
2703 // If we are validating and its required, then an error
2704 if ((defType == XMLAttDef::Required) ||
2705 (defType == XMLAttDef::Required_And_Fixed) )
2706
2707 {
2708 fValidator->emitError
2709 (
2710 XMLValid::RequiredAttrNotProvided
2711 , curDef->getFullName()
2712 );
2713 fPSVIElemContext.fErrorOccurred = true;
2714 }
2715 else if ((defType == XMLAttDef::Default) ||
2716 (defType == XMLAttDef::Fixed) )
2717 {
2718 if (fStandalone && curDef->isExternal())
2719 {
2720 // XML 1.0 Section 2.9
2721 // Document is standalone, so attributes must not be defaulted.
2722 fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName());
2723 }
2724 }
2725 }
2726
2727 // Fault in the value if needed, and bump the att count.
2728 if ((defType == XMLAttDef::Default)
2729 || (defType == XMLAttDef::Fixed))
2730 {
2731 // Let the validator pass judgement on the attribute value
2732 if (fValidate)
2733 {
2734 fValidator->validateAttrValue
2735 (
2736 curDef
2737 , curDef->getValue()
2738 , false
2739 , elemDecl
2740 );
2741 }
2742
2743 XMLAttr* curAtt;
2744 if (retCount >= curAttListSize)
2745 {
2746 curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
2747 fValidator->faultInAttr(*curAtt, *curDef);
2748 fAttrList->addElement(curAtt);
2749 }
2750 else
2751 {
2752 curAtt = fAttrList->elementAt(retCount);
2753 fValidator->faultInAttr(*curAtt, *curDef);
2754 }
2755
2756 // Indicate it was not explicitly specified and bump count
2757 curAtt->setSpecified(false);
2758 retCount++;
2759 if(getPSVIHandler())
2760 {
2761 QName *attName = ((SchemaAttDef *)curDef)->getAttName();
2762 PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill
2763 (
2764 attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI())
2765 );
2766 XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef);
2767 DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator();
2768 XSSimpleTypeDefinition *defAttrType =
2769 (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType);
2770 // would have occurred during validation of default value
2771 if(((SchemaValidator *)fValidator)->getErrorOccurred())
2772 {
2773 defAttrToFill->reset(
2774 fRootElemName
2775 , PSVIItem::VALIDITY_INVALID
2776 , PSVIItem::VALIDATION_FULL
2777 , defAttrType
2778 , 0
2779 , curDef->getValue()
2780 , true
2781 , defAttrDecl
2782 , 0
2783 );
2784 }
2785 else
2786 {
2787 XSSimpleTypeDefinition *defAttrMemberType = 0;
2788 if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
2789 {
2790 defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject
2791 (
2792 ((SchemaValidator*)fValidator)->getMostRecentAttrValidator()
2793 );
2794 }
2795 defAttrToFill->reset
2796 (
2797 fRootElemName
2798 , PSVIItem::VALIDITY_VALID
2799 , PSVIItem::VALIDATION_FULL
2800 , defAttrType
2801 , defAttrMemberType
2802 , curDef->getValue()
2803 , true
2804 , defAttrDecl
2805 , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType
2806 );
2807 }
2808 defAttrToFill->setValue(curDef->getValue());
2809 }
2810 }
2811 }
2812 else if (attCountPtr)
2813 {
2814 //attribute is provided
2815 // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
2816 if (defType == XMLAttDef::Prohibited && fValidate)
2817 {
2818 fValidator->emitError
2819 (
2820 XMLValid::ProhibitedAttributePresent
2821 , curDef->getFullName()
2822 );
2823 fPSVIElemContext.fErrorOccurred = true;
2824 if (getPSVIHandler())
2825 {
2826 QName *attQName = ((SchemaAttDef *)curDef)->getAttName();
2827 // bad luck...
2828 PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName
2829 (
2830 attQName->getLocalPart(),
2831 fURIStringPool->getValueForId(attQName->getURI())
2832 );
2833 prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID);
2834 }
2835 }
2836 }
2837 }
2838 }
2839
2840 return retCount;
2841 }
2842
2843
2844 // This method will take a raw attribute value and normalize it according to
2845 // the rules of the attribute type. It will put the resulting value into the
2846 // passed buffer.
2847 //
2848 // This code assumes that escaped characters in the original value (via char
2849 // refs) are prefixed by a 0xFFFF character. This is because some characters
2850 // are legal if escaped only. And some escape chars are not subject to
2851 // normalization rules.
normalizeAttValue(const XMLAttDef * const attDef,const XMLCh * const attName,const XMLCh * const value,XMLBuffer & toFill)2852 bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef
2853 , const XMLCh* const attName
2854 , const XMLCh* const value
2855 , XMLBuffer& toFill)
2856 {
2857 // A simple state value for a whitespace processing state machine
2858 enum States
2859 {
2860 InWhitespace
2861 , InContent
2862 };
2863
2864 // Get the type and name
2865 const XMLAttDef::AttTypes type = (attDef)
2866 ?attDef->getType()
2867 :XMLAttDef::CData;
2868
2869 // Assume its going to go fine, and empty the target buffer in preperation
2870 bool retVal = true;
2871 toFill.reset();
2872
2873 // Get attribute def - to check to see if it's declared externally or not
2874 bool isAttExternal = (attDef)
2875 ?attDef->isExternal()
2876 :false;
2877
2878 // Loop through the chars of the source value and normalize it according
2879 // to the type.
2880 States curState = InContent;
2881 bool firstNonWS = false;
2882 XMLCh nextCh;
2883 const XMLCh* srcPtr = value;
2884
2885 if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
2886 while (*srcPtr) {
2887 // Get the next character from the source. We have to watch for
2888 // escaped characters (which are indicated by a 0xFFFF value followed
2889 // by the char that was escaped.)
2890 nextCh = *srcPtr;
2891
2892 // Do we have an escaped character ?
2893 if (nextCh == 0xFFFF)
2894 {
2895 nextCh = *++srcPtr;
2896 }
2897 else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) {
2898 // Check Validity Constraint for Standalone document declaration
2899 // XML 1.0, Section 2.9
2900 if (fStandalone && fValidate && isAttExternal)
2901 {
2902 // Can't have a standalone document declaration of "yes" if attribute
2903 // values are subject to normalisation
2904 fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
2905 }
2906 nextCh = chSpace;
2907 }
2908 else if (nextCh == chOpenAngle) {
2909 // If its not escaped, then make sure its not a < character, which is
2910 // not allowed in attribute values.
2911 emitError(XMLErrs::BracketInAttrValue, attName);
2912 retVal = false;
2913 }
2914
2915 // Add this char to the target buffer
2916 toFill.append(nextCh);
2917
2918 // And move up to the next character in the source
2919 srcPtr++;
2920 }
2921 }
2922 else {
2923 while (*srcPtr)
2924 {
2925 // Get the next character from the source. We have to watch for
2926 // escaped characters (which are indicated by a 0xFFFF value followed
2927 // by the char that was escaped.)
2928 nextCh = *srcPtr;
2929
2930 // Do we have an escaped character ?
2931 if (nextCh == 0xFFFF)
2932 {
2933 nextCh = *++srcPtr;
2934 }
2935 else if (nextCh == chOpenAngle) {
2936 // If its not escaped, then make sure its not a < character, which is
2937 // not allowed in attribute values.
2938 emitError(XMLErrs::BracketInAttrValue, attName);
2939 retVal = false;
2940 }
2941
2942 if (curState == InWhitespace)
2943 {
2944 if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2945 {
2946 if (firstNonWS)
2947 toFill.append(chSpace);
2948 curState = InContent;
2949 firstNonWS = true;
2950 }
2951 else
2952 {
2953 srcPtr++;
2954 continue;
2955 }
2956 }
2957 else if (curState == InContent)
2958 {
2959 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2960 {
2961 curState = InWhitespace;
2962 srcPtr++;
2963
2964 // Check Validity Constraint for Standalone document declaration
2965 // XML 1.0, Section 2.9
2966 if (fStandalone && fValidate && isAttExternal)
2967 {
2968 if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr))
2969 {
2970 // Can't have a standalone document declaration of "yes" if attribute
2971 // values are subject to normalisation
2972 fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
2973 }
2974 }
2975 continue;
2976 }
2977 firstNonWS = true;
2978 }
2979
2980 // Add this char to the target buffer
2981 toFill.append(nextCh);
2982
2983 // And move up to the next character in the source
2984 srcPtr++;
2985 }
2986 }
2987
2988 return retVal;
2989 }
2990
2991 // This method will just normalize the input value as CDATA without
2992 // any standalone checking.
normalizeAttRawValue(const XMLCh * const attrName,const XMLCh * const value,XMLBuffer & toFill)2993 bool SGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName
2994 , const XMLCh* const value
2995 , XMLBuffer& toFill)
2996 {
2997 // Assume its going to go fine, and empty the target buffer in preperation
2998 bool retVal = true;
2999 toFill.reset();
3000
3001 // Loop through the chars of the source value and normalize it according
3002 // to the type.
3003 bool escaped;
3004 XMLCh nextCh;
3005 const XMLCh* srcPtr = value;
3006 while (*srcPtr)
3007 {
3008 // Get the next character from the source. We have to watch for
3009 // escaped characters (which are indicated by a 0xFFFF value followed
3010 // by the char that was escaped.)
3011 nextCh = *srcPtr;
3012 escaped = (nextCh == 0xFFFF);
3013 if (escaped)
3014 nextCh = *++srcPtr;
3015
3016 // If its not escaped, then make sure its not a < character, which is
3017 // not allowed in attribute values.
3018 if (!escaped && (*srcPtr == chOpenAngle))
3019 {
3020 emitError(XMLErrs::BracketInAttrValue, attrName);
3021 retVal = false;
3022 }
3023
3024 if (!escaped)
3025 {
3026 // NOTE: Yes this is a little redundant in that a 0x20 is
3027 // replaced with an 0x20. But its faster to do this (I think)
3028 // than checking for 9, A, and D separately.
3029 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
3030 nextCh = chSpace;
3031 }
3032
3033 // Add this char to the target buffer
3034 toFill.append(nextCh);
3035
3036 // And move up to the next character in the source
3037 srcPtr++;
3038 }
3039 return retVal;
3040 }
3041
3042 // This method will reset the scanner data structures, and related plugged
3043 // in stuff, for a new scan session. We get the input source for the primary
3044 // XML entity, create the reader for it, and push it on the stack so that
3045 // upon successful return from here we are ready to go.
scanReset(const InputSource & src)3046 void SGXMLScanner::scanReset(const InputSource& src)
3047 {
3048
3049 // This call implicitly tells us that we are going to reuse the scanner
3050 // if it was previously used. So tell the validator to reset itself.
3051 //
3052 // But, if the fUseCacheGrammar flag is set, then don't reset it.
3053 //
3054 // NOTE: The ReaderMgr is flushed on the way out, because that is
3055 // required to insure that files are closed.
3056 fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
3057 fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
3058
3059 // Clear transient schema info list.
3060 //
3061 fSchemaInfoList->removeAll ();
3062
3063 // fModel may need updating, as fGrammarResolver could have cleaned it
3064 if(fModel && getPSVIHandler())
3065 fModel = fGrammarResolver->getXSModel();
3066
3067 // Create dummy schema grammar
3068 if (!fSchemaGrammar) {
3069 fSchemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3070 }
3071
3072 fGrammar = fSchemaGrammar;
3073 fGrammarType = Grammar::DTDGrammarType;
3074 fRootGrammar = 0;
3075
3076 fValidator->setGrammar(fGrammar);
3077 if (fValidatorFromUser) {
3078
3079 ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter);
3080 ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver);
3081 ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal);
3082 }
3083
3084 // Reset validation
3085 fValidate = (fValScheme == Val_Always) ? true : false;
3086
3087 // And for all installed handlers, send reset events. This gives them
3088 // a chance to flush any cached data.
3089 if (fDocHandler)
3090 fDocHandler->resetDocument();
3091 if (fEntityHandler)
3092 fEntityHandler->resetEntities();
3093 if (fErrorReporter)
3094 fErrorReporter->resetErrors();
3095
3096 // Clear out the id reference list
3097 resetValidationContext();
3098
3099 // Reset the Root Element Name
3100 fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
3101 fRootElemName = 0;
3102
3103 // Reset IdentityConstraints
3104 if (fICHandler)
3105 fICHandler->reset();
3106
3107 // Reset the element stack, and give it the latest ids for the special
3108 // URIs it has to know about.
3109 fElemStack.reset
3110 (
3111 fEmptyNamespaceId
3112 , fUnknownNamespaceId
3113 , fXMLNamespaceId
3114 , fXMLNSNamespaceId
3115 );
3116
3117 if (!fSchemaNamespaceId)
3118 fSchemaNamespaceId = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
3119
3120 // Reset some status flags
3121 fInException = false;
3122 fStandalone = false;
3123 fErrorCount = 0;
3124 fHasNoDTD = true;
3125 fSeeXsi = false;
3126 fDoNamespaces = true;
3127 fDoSchema = true;
3128
3129 // Reset PSVI context
3130 // Note that we always need this around for DOMTypeInfo
3131 if (!fPSVIElement)
3132 fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager);
3133
3134 if (!fErrorStack)
3135 {
3136 fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager);
3137 }
3138 else
3139 {
3140 fErrorStack->removeAllElements();
3141 }
3142
3143 resetPSVIElemContext();
3144
3145 // Reset the validators
3146 fSchemaValidator->reset();
3147 fSchemaValidator->setErrorReporter(fErrorReporter);
3148 fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
3149 fSchemaValidator->setGrammarResolver(fGrammarResolver);
3150 if (fValidatorFromUser)
3151 fValidator->reset();
3152
3153 // Handle the creation of the XML reader object for this input source.
3154 // This will provide us with transcoding and basic lexing services.
3155 XMLReader* newReader = fReaderMgr.createReader
3156 (
3157 src
3158 , true
3159 , XMLReader::RefFrom_NonLiteral
3160 , XMLReader::Type_General
3161 , XMLReader::Source_External
3162 , fCalculateSrcOfs
3163 , fLowWaterMark
3164 );
3165
3166 if (!newReader) {
3167 if (src.getIssueFatalErrorIfNotFound())
3168 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
3169 else
3170 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
3171 }
3172
3173 // Push this read onto the reader manager
3174 fReaderMgr.pushReader(newReader, 0);
3175
3176 // and reset security-related things if necessary:
3177 if(fSecurityManager != 0)
3178 {
3179 fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
3180 fEntityExpansionCount = 0;
3181 }
3182 fElemCount = 0;
3183 if(fUIntPoolRowTotal >= 32)
3184 { // 8 KB tied up with validating attributes...
3185 fAttDefRegistry->removeAll();
3186 recreateUIntPool();
3187 }
3188 else
3189 {
3190 // note that this will implicitly reset the values of the hashtables,
3191 // though their buckets will still be tied up
3192 resetUIntPool();
3193 }
3194 fUndeclaredAttrRegistry->removeAll();
3195 }
3196
3197
3198 // This method is called between markup in content. It scans for character
3199 // data that is sent to the document handler. It watches for any markup
3200 // characters that would indicate that the character data has ended. It also
3201 // handles expansion of general and character entities.
3202 //
3203 // sendData() is a local static helper for this method which handles some
3204 // code that must be done in three different places here.
sendCharData(XMLBuffer & toSend)3205 void SGXMLScanner::sendCharData(XMLBuffer& toSend)
3206 {
3207 // If no data in the buffer, then nothing to do
3208 if (toSend.isEmpty())
3209 return;
3210
3211 // We do different things according to whether we are validating or
3212 // not. If not, its always just characters; else, it depends on the
3213 // current element's content model.
3214 if (fValidate)
3215 {
3216 // Get the raw data we need for the callback
3217 const XMLCh* rawBuf = toSend.getRawBuffer();
3218 const XMLSize_t len = toSend.getLen();
3219
3220 // Get the character data opts for the current element
3221 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
3222 // And see if the current element is a 'Children' style content model
3223 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
3224 if(currType)
3225 {
3226 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
3227 if(modelType == SchemaElementDecl::Children ||
3228 modelType == SchemaElementDecl::ElementOnlyEmpty)
3229 charOpts = XMLElementDecl::SpacesOk;
3230 else if(modelType == SchemaElementDecl::Empty)
3231 charOpts = XMLElementDecl::NoCharData;
3232 }
3233
3234 // should not be necessary once PSVI method on element decls
3235 // are removed
3236 if (charOpts == XMLElementDecl::NoCharData)
3237 {
3238 // They definitely cannot handle any type of char data
3239 fValidator->emitError(XMLValid::NoCharDataInCM);
3240 if (getPSVIHandler())
3241 {
3242 // REVISIT:
3243 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3244 }
3245 }
3246 else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
3247 {
3248 // Its all spaces. So, if they can take spaces, then send it
3249 // as ignorable whitespace. If they can handle any char data
3250 // send it as characters.
3251 if (charOpts == XMLElementDecl::SpacesOk) {
3252 if (fDocHandler)
3253 fDocHandler->ignorableWhitespace(rawBuf, len, false);
3254 }
3255 else if (charOpts == XMLElementDecl::AllCharData)
3256 {
3257 XMLSize_t xsLen;
3258 const XMLCh* xsNormalized;
3259 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
3260 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
3261 {
3262 // normalize the character according to schema whitespace facet
3263 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
3264 xsNormalized = fWSNormalizeBuf.getRawBuffer();
3265 xsLen = fWSNormalizeBuf.getLen();
3266 }
3267 else {
3268 xsNormalized = rawBuf;
3269 xsLen = len;
3270 }
3271
3272 // tell the schema validation about the character data for checkContent later
3273 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
3274
3275 // call all active identity constraints
3276 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
3277 fContent.append(xsNormalized, xsLen);
3278 }
3279
3280 if (fDocHandler) {
3281 if (fNormalizeData) {
3282 fDocHandler->docCharacters(xsNormalized, xsLen, false);
3283 }
3284 else {
3285 fDocHandler->docCharacters(rawBuf, len, false);
3286 }
3287 }
3288 }
3289 }
3290 else
3291 {
3292 // If they can take any char data, then send it. Otherwise, they
3293 // can only handle whitespace and can't handle this stuff so
3294 // issue an error.
3295 if (charOpts == XMLElementDecl::AllCharData)
3296 {
3297 XMLSize_t xsLen;
3298 const XMLCh *xsNormalized;
3299 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
3300 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
3301 {
3302 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
3303 xsNormalized = fWSNormalizeBuf.getRawBuffer();
3304 xsLen = fWSNormalizeBuf.getLen();
3305 }
3306 else {
3307 xsNormalized = rawBuf;
3308 xsLen = len;
3309 }
3310
3311 // tell the schema validation about the character data for checkContent later
3312 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
3313
3314 // call all active identity constraints
3315 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
3316 fContent.append(xsNormalized, xsLen);
3317 }
3318
3319 if (fDocHandler) {
3320 if (fNormalizeData) {
3321 fDocHandler->docCharacters(xsNormalized, xsLen, false);
3322 }
3323 else {
3324 fDocHandler->docCharacters(rawBuf, len, false);
3325 }
3326 }
3327 }
3328 else
3329 {
3330 fValidator->emitError(XMLValid::NoCharDataInCM);
3331 if (getPSVIHandler())
3332 {
3333 // REVISIT:
3334 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3335 }
3336 }
3337 }
3338 }
3339 else
3340 {
3341 // call all active identity constraints
3342 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount())
3343 fContent.append(toSend.getRawBuffer(), toSend.getLen());
3344
3345 // Always assume its just char data if not validating
3346 if (fDocHandler)
3347 fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
3348 }
3349
3350 // Reset buffer
3351 toSend.reset();
3352 }
3353
3354
3355
3356 // This method is called with a key/value string pair that represents an
3357 // xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
3358 // current top of the element stack based on this data. We know that when
3359 // we get here, that it is one of these forms, so we don't bother confirming
3360 // it.
3361 //
3362 // But we have to ensure
3363 // 1. xxx is not xmlns
3364 // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
3365 // 3. yyy is not XMLUni::fgXMLNSURIName
3366 // 4. if xxx is not null, then yyy cannot be an empty string.
updateNSMap(const XMLCh * const attrName,const XMLCh * const attrValue)3367 void SGXMLScanner::updateNSMap(const XMLCh* const attrName
3368 , const XMLCh* const attrValue)
3369 {
3370 updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon));
3371 }
3372
updateNSMap(const XMLCh * const attrName,const XMLCh * const attrValue,const int colonOfs)3373 void SGXMLScanner::updateNSMap(const XMLCh* const attrName
3374 , const XMLCh* const attrValue
3375 , const int colonOfs)
3376 {
3377 // We need a buffer to normalize the attribute value into
3378 XMLBufBid bbNormal(&fBufMgr);
3379 XMLBuffer& normalBuf = bbNormal.getBuffer();
3380
3381 // Normalize the value into the passed buffer. In this case, we don't
3382 // care about the return value. An error was issued for the error, which
3383 // is all we care about here.
3384 normalizeAttRawValue(attrName, attrValue, normalBuf);
3385 XMLCh* namespaceURI = normalBuf.getRawBuffer();
3386
3387 // We either have the default prefix (""), or we point it into the attr
3388 // name parameter. Note that the xmlns is not the prefix we care about
3389 // here. To us, the 'prefix' is really the local part of the attrName
3390 // parameter.
3391 //
3392 // Check 1. xxx is not xmlns
3393 // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
3394 // 3. yyy is not XMLUni::fgXMLNSURIName
3395 // 4. if xxx is not null, then yyy cannot be an empty string.
3396 const XMLCh* prefPtr = XMLUni::fgZeroLenString;
3397 if (colonOfs != -1) {
3398 prefPtr = &attrName[colonOfs + 1];
3399
3400 if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
3401 emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
3402 else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
3403 if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
3404 emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
3405 }
3406
3407 if (!namespaceURI)
3408 emitError(XMLErrs::NoEmptyStrNamespace, attrName);
3409 else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
3410 emitError(XMLErrs::NoEmptyStrNamespace, attrName);
3411 }
3412
3413 if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
3414 emitError(XMLErrs::NoUseOfxmlnsURI);
3415 else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
3416 if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
3417 emitError(XMLErrs::XMLURINotMatchXMLPrefix);
3418 }
3419
3420 // Ok, we have to get the unique id for the attribute value, which is the
3421 // URI that this value should be mapped to. The validator has the
3422 // namespace string pool, so we ask him to find or add this new one. Then
3423 // we ask the element stack to add this prefix to URI Id mapping.
3424 fElemStack.addPrefix
3425 (
3426 prefPtr
3427 , fURIStringPool->addOrFind(namespaceURI)
3428 );
3429 }
3430
scanRawAttrListforNameSpaces(XMLSize_t attCount)3431 void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
3432 {
3433 // Make an initial pass through the list and find any xmlns attributes or
3434 // schema attributes.
3435 // When we find one, send it off to be used to update the element stack's
3436 // namespace mappings.
3437 for (XMLSize_t index = 0; index < attCount; index++)
3438 {
3439 // each attribute has the prefix:suffix="value"
3440 const KVStringPair* curPair = fRawAttrList->elementAt(index);
3441 const XMLCh* rawPtr = curPair->getKey();
3442
3443 // If either the key begins with "xmlns:" or its just plain
3444 // "xmlns", then use it to update the map.
3445 if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
3446 || XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
3447 {
3448 const XMLCh* valuePtr = curPair->getValue();
3449
3450 updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
3451
3452 // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
3453 if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
3454 fSeeXsi = true;
3455 }
3456 }
3457 }
3458
3459 // walk through the list again to deal with "xsi:...."
3460 if (fSeeXsi)
3461 {
3462 // Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
3463 XMLBufBid bbXsi(&fBufMgr);
3464 XMLBuffer& fXsiType = bbXsi.getBuffer();
3465
3466 for (XMLSize_t index = 0; index < attCount; index++)
3467 {
3468 // each attribute has the prefix:suffix="value"
3469 const KVStringPair* curPair = fRawAttrList->elementAt(index);
3470 const XMLCh* rawPtr = curPair->getKey();
3471 const XMLCh* prefPtr;
3472
3473 int colonInd = fRawAttrColonList[index];
3474
3475 if (colonInd != -1) {
3476 fURIBuf.set(rawPtr, colonInd);
3477 prefPtr = fURIBuf.getRawBuffer();
3478 }
3479 else {
3480 prefPtr = XMLUni::fgZeroLenString;
3481 }
3482
3483 // if schema URI has been seen, scan for the schema location and uri
3484 // and resolve the schema grammar; or scan for schema type
3485 if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
3486
3487 const XMLCh* valuePtr = curPair->getValue();
3488 const XMLCh* suffPtr = &rawPtr[colonInd + 1];
3489
3490 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
3491 parseSchemaLocation(valuePtr);
3492 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
3493 resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
3494
3495 if( fValidator && fValidator->handlesSchema() )
3496 {
3497 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
3498 {
3499 // normalize the attribute according to schema whitespace facet
3500 DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
3501 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiType, true);
3502 }
3503 else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
3504 {
3505 // normalize the attribute according to schema whitespace facet
3506 XMLBuffer& fXsiNil = fBufMgr.bidOnBuffer();
3507 DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
3508 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiNil, true);
3509 if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE))
3510 ((SchemaValidator*)fValidator)->setNillable(true);
3511 else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE))
3512 ((SchemaValidator*)fValidator)->setNillable(false);
3513 else
3514 emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr);
3515 fBufMgr.releaseBuffer(fXsiNil);
3516 }
3517 }
3518 }
3519 }
3520
3521 if (fValidator && fValidator->handlesSchema()) {
3522 if (!fXsiType.isEmpty()) {
3523 int colonPos = -1;
3524 unsigned int uriId = resolveQName (
3525 fXsiType.getRawBuffer()
3526 , fPrefixBuf
3527 , ElemStack::Mode_Element
3528 , colonPos
3529 );
3530 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
3531 }
3532 }
3533 }
3534 }
3535
parseSchemaLocation(const XMLCh * const schemaLocationStr,bool ignoreLoadSchema)3536 void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema)
3537 {
3538 BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr, fMemoryManager);
3539 Janitor<BaseRefVectorOf<XMLCh> > janLoc(schemaLocation);
3540
3541 XMLSize_t size = schemaLocation->size();
3542 if (size % 2 != 0 ) {
3543 emitError(XMLErrs::BadSchemaLocation);
3544 } else {
3545 // We need a buffer to normalize the attribute value into
3546 XMLBuffer normalBuf(1023, fMemoryManager);
3547 for(XMLSize_t i=0; i<size; i=i+2) {
3548 normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, schemaLocation->elementAt(i), normalBuf);
3549 resolveSchemaGrammar(schemaLocation->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema);
3550 }
3551 }
3552 }
3553
resolveSchemaGrammar(const XMLCh * const loc,const XMLCh * const uri,bool ignoreLoadSchema)3554 void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) {
3555
3556 Grammar* grammar = 0;
3557
3558 {
3559 XMLSchemaDescriptionImpl theSchemaDescription(uri, fMemoryManager);
3560 theSchemaDescription.setLocationHints(loc);
3561 grammar = fGrammarResolver->getGrammar(&theSchemaDescription);
3562 }
3563
3564 // If multi-import is enabled, make sure the existing grammar came
3565 // from the import directive. Otherwise we may end up reloading
3566 // the same schema that came from the external grammar pool. Ideally,
3567 // we would move fSchemaInfoList to XMLGrammarPool so that it survives
3568 // the destruction of the scanner in which case we could rely on the
3569 // same logic we use to weed out duplicate schemas below.
3570 //
3571 if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType ||
3572 (getHandleMultipleImports() &&
3573 ((XMLSchemaDescription*)grammar->getGrammarDescription())->
3574 getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
3575 {
3576 if (fLoadSchema || ignoreLoadSchema)
3577 {
3578 XSDDOMParser parser(0, fMemoryManager, 0);
3579
3580 parser.setValidationScheme(XercesDOMParser::Val_Never);
3581 parser.setDoNamespaces(true);
3582 parser.setUserEntityHandler(fEntityHandler);
3583 parser.setUserErrorReporter(fErrorReporter);
3584
3585 //Normalize sysId
3586 XMLBufBid nnSys(&fBufMgr);
3587 XMLBuffer& normalizedSysId = nnSys.getBuffer();
3588 XMLString::removeChar(loc, 0xFFFF, normalizedSysId);
3589 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
3590
3591 // Create a buffer for expanding the system id
3592 XMLBufBid bbSys(&fBufMgr);
3593 XMLBuffer& expSysId = bbSys.getBuffer();
3594
3595 // Allow the entity handler to expand the system id if they choose
3596 // to do so.
3597 InputSource* srcToFill = 0;
3598 if (fEntityHandler)
3599 {
3600 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
3601 expSysId.set(normalizedURI);
3602
3603 ReaderMgr::LastExtEntityInfo lastInfo;
3604 fReaderMgr.getLastExtEntityInfo(lastInfo);
3605 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
3606 expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId,
3607 &fReaderMgr);
3608 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3609 }
3610 else
3611 {
3612 expSysId.set(normalizedURI);
3613 }
3614
3615 // If they didn't create a source via the entity handler, then we
3616 // have to create one on our own.
3617 if (!srcToFill)
3618 {
3619 if (fDisableDefaultEntityResolution)
3620 return;
3621
3622 ReaderMgr::LastExtEntityInfo lastInfo;
3623 fReaderMgr.getLastExtEntityInfo(lastInfo);
3624
3625 XMLURL urlTmp(fMemoryManager);
3626 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
3627 (urlTmp.isRelative()))
3628 {
3629 if (!fStandardUriConformant)
3630 {
3631 XMLBufBid ddSys(&fBufMgr);
3632 XMLBuffer& resolvedSysId = ddSys.getBuffer();
3633 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
3634
3635 srcToFill = new (fMemoryManager) LocalFileInputSource
3636 (
3637 lastInfo.systemId
3638 , resolvedSysId.getRawBuffer()
3639 , fMemoryManager
3640 );
3641 }
3642 else
3643 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3644 }
3645 else
3646 {
3647 if (fStandardUriConformant && urlTmp.hasInvalidChar())
3648 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3649
3650 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
3651 }
3652 }
3653
3654 // Put a janitor on the input source
3655 Janitor<InputSource> janSrc(srcToFill);
3656
3657 // Check if this exact schema has already been seen.
3658 //
3659 const XMLCh* sysId = srcToFill->getSystemId();
3660 unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId;
3661 SchemaInfo* importSchemaInfo = 0;
3662
3663 if (fUseCachedGrammar)
3664 importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId);
3665
3666 if (!importSchemaInfo && !fToCacheGrammar)
3667 importSchemaInfo = fSchemaInfoList->get(sysId, uriId);
3668
3669 if (importSchemaInfo)
3670 {
3671 // We haven't added any new grammars so it is safe to just
3672 // return.
3673 //
3674 return;
3675 }
3676
3677 // Should just issue warning if the schema is not found
3678 bool flag = srcToFill->getIssueFatalErrorIfNotFound();
3679 srcToFill->setIssueFatalErrorIfNotFound(false);
3680
3681 parser.parse(*srcToFill);
3682
3683 // Reset the InputSource
3684 srcToFill->setIssueFatalErrorIfNotFound(flag);
3685
3686 if (parser.getSawFatal() && fExitOnFirstFatal)
3687 emitError(XMLErrs::SchemaScanFatalError);
3688
3689 DOMDocument* document = parser.getDocument(); //Our Grammar
3690
3691 if (document != 0) {
3692
3693 DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
3694 if (root != 0)
3695 {
3696 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
3697 bool newGrammar = false;
3698 if (!XMLString::equals(newUri, uri)) {
3699 if (fValidate || fValScheme == Val_Auto) {
3700 fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
3701 }
3702
3703 grammar = fGrammarResolver->getGrammar(newUri);
3704 newGrammar = true;
3705 }
3706
3707 if (!grammar ||
3708 grammar->getGrammarType() == Grammar::DTDGrammarType ||
3709 (getHandleMultipleImports() &&
3710 ((XMLSchemaDescription*) grammar->getGrammarDescription())->
3711 getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
3712 {
3713 // If we switched namespace URI, recheck the schema info.
3714 //
3715 if (newGrammar)
3716 {
3717 unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId;
3718
3719 if (fUseCachedGrammar)
3720 importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId);
3721
3722 if (!importSchemaInfo && !fToCacheGrammar)
3723 importSchemaInfo = fSchemaInfoList->get(sysId, newUriId);
3724
3725 if (importSchemaInfo)
3726 return;
3727 }
3728
3729 // Since we have seen a grammar, set our validation flag
3730 // at this point if the validation scheme is auto
3731 if (fValScheme == Val_Auto && !fValidate) {
3732 fValidate = true;
3733 fElemStack.setValidationFlag(fValidate);
3734 }
3735
3736 bool grammarFound = grammar &&
3737 grammar->getGrammarType() == Grammar::SchemaGrammarType;
3738
3739 SchemaGrammar* schemaGrammar;
3740
3741 if (grammarFound) {
3742 schemaGrammar = (SchemaGrammar*) grammar;
3743 }
3744 else {
3745 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3746 }
3747
3748 XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
3749
3750 gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
3751 gramDesc->setLocationHints(sysId);
3752
3753 TraverseSchema traverseSchema
3754 (
3755 root
3756 , fURIStringPool
3757 , schemaGrammar
3758 , fGrammarResolver
3759 , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList
3760 , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList
3761 , this
3762 , sysId
3763 , fEntityHandler
3764 , fErrorReporter
3765 , fMemoryManager
3766 , grammarFound
3767 );
3768
3769 // Reset the now invalid schema roots in the collected
3770 // schema info entries.
3771 //
3772 {
3773 RefHash2KeysTableOfEnumerator<SchemaInfo> i (
3774 fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList);
3775
3776 while (i.hasMoreElements ())
3777 i.nextElement().resetRoot ();
3778 }
3779
3780 if (fGrammarType == Grammar::DTDGrammarType) {
3781 fGrammar = schemaGrammar;
3782 fGrammarType = Grammar::SchemaGrammarType;
3783 fValidator->setGrammar(fGrammar);
3784 }
3785
3786 if (fValidate) {
3787 // validate the Schema scan so far
3788 fValidator->preContentValidation(false);
3789 }
3790 }
3791 }
3792 }
3793 }
3794 }
3795 else
3796 {
3797 // Since we have seen a grammar, set our validation flag
3798 // at this point if the validation scheme is auto
3799 if (fValScheme == Val_Auto && !fValidate) {
3800 fValidate = true;
3801 fElemStack.setValidationFlag(fValidate);
3802 }
3803
3804 // we have seen a schema, so set up the fValidator as fSchemaValidator
3805 if (fGrammarType == Grammar::DTDGrammarType) {
3806 fGrammar = grammar;
3807 fGrammarType = Grammar::SchemaGrammarType;
3808 fValidator->setGrammar(fGrammar);
3809 }
3810 }
3811 // update fModel; rely on the grammar resolver to do this
3812 // efficiently
3813 if(getPSVIHandler())
3814 fModel = fGrammarResolver->getXSModel();
3815 }
3816
resolveSystemId(const XMLCh * const sysId,const XMLCh * const pubId)3817 InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId
3818 ,const XMLCh* const pubId)
3819 {
3820 //Normalize sysId
3821 XMLBufBid nnSys(&fBufMgr);
3822 XMLBuffer& normalizedSysId = nnSys.getBuffer();
3823 XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
3824 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
3825
3826 // Create a buffer for expanding the system id
3827 XMLBufBid bbSys(&fBufMgr);
3828 XMLBuffer& expSysId = bbSys.getBuffer();
3829
3830 // Allow the entity handler to expand the system id if they choose
3831 // to do so.
3832 InputSource* srcToFill = 0;
3833 if (fEntityHandler)
3834 {
3835 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
3836 expSysId.set(normalizedURI);
3837
3838 ReaderMgr::LastExtEntityInfo lastInfo;
3839 fReaderMgr.getLastExtEntityInfo(lastInfo);
3840 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
3841 expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
3842 &fReaderMgr);
3843 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3844 }
3845 else
3846 {
3847 expSysId.set(normalizedURI);
3848 }
3849
3850 // If they didn't create a source via the entity handler, then we
3851 // have to create one on our own.
3852 if (!srcToFill)
3853 {
3854 if (fDisableDefaultEntityResolution)
3855 return 0;
3856
3857 ReaderMgr::LastExtEntityInfo lastInfo;
3858 fReaderMgr.getLastExtEntityInfo(lastInfo);
3859
3860 XMLURL urlTmp(fMemoryManager);
3861 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
3862 (urlTmp.isRelative()))
3863 {
3864 if (!fStandardUriConformant)
3865 {
3866 XMLBufBid ddSys(&fBufMgr);
3867 XMLBuffer& resolvedSysId = ddSys.getBuffer();
3868 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
3869
3870 srcToFill = new (fMemoryManager) LocalFileInputSource
3871 (
3872 lastInfo.systemId
3873 , resolvedSysId.getRawBuffer()
3874 , fMemoryManager
3875 );
3876 }
3877 else
3878 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3879 }
3880 else
3881 {
3882 if (fStandardUriConformant && urlTmp.hasInvalidChar())
3883 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3884 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
3885 }
3886 }
3887
3888 return srcToFill;
3889 }
3890
3891
3892 // ---------------------------------------------------------------------------
3893 // SGXMLScanner: Private grammar preparsing methods
3894 // ---------------------------------------------------------------------------
loadXMLSchemaGrammar(const InputSource & src,const bool toCache)3895 Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
3896 const bool toCache)
3897 {
3898 // Reset the validators
3899 fSchemaValidator->reset();
3900 fSchemaValidator->setErrorReporter(fErrorReporter);
3901 fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
3902 fSchemaValidator->setGrammarResolver(fGrammarResolver);
3903
3904 if (fValidatorFromUser)
3905 fValidator->reset();
3906
3907 XSDDOMParser parser(0, fMemoryManager, 0);
3908
3909 parser.setValidationScheme(XercesDOMParser::Val_Never);
3910 parser.setDoNamespaces(true);
3911 parser.setUserEntityHandler(fEntityHandler);
3912 parser.setUserErrorReporter(fErrorReporter);
3913
3914 // Should just issue warning if the schema is not found
3915 bool flag = src.getIssueFatalErrorIfNotFound();
3916 ((InputSource&) src).setIssueFatalErrorIfNotFound(false);
3917
3918 parser.parse(src);
3919
3920 // Reset the InputSource
3921 ((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
3922
3923 if (parser.getSawFatal() && fExitOnFirstFatal)
3924 emitError(XMLErrs::SchemaScanFatalError);
3925
3926 DOMDocument* document = parser.getDocument(); //Our Grammar
3927
3928 if (document != 0) {
3929
3930 DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
3931 if (root != 0)
3932 {
3933 const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
3934 Grammar* grammar = fGrammarResolver->getGrammar(nsUri);
3935
3936 // Check if this exact schema has already been seen.
3937 //
3938 const XMLCh* sysId = src.getSystemId();
3939 SchemaInfo* importSchemaInfo = 0;
3940
3941 if (grammar)
3942 {
3943 if (nsUri && *nsUri)
3944 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri));
3945 else
3946 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId);
3947 }
3948
3949 if (!importSchemaInfo)
3950 {
3951 bool grammarFound = grammar &&
3952 grammar->getGrammarType() == Grammar::SchemaGrammarType &&
3953 getHandleMultipleImports();
3954
3955 SchemaGrammar* schemaGrammar;
3956
3957 if (grammarFound)
3958 schemaGrammar = (SchemaGrammar*) grammar;
3959 else
3960 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3961
3962 XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
3963 gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
3964 gramDesc->setLocationHints(sysId);
3965
3966 TraverseSchema traverseSchema
3967 (
3968 root
3969 , fURIStringPool
3970 , schemaGrammar
3971 , fGrammarResolver
3972 , fCachedSchemaInfoList
3973 , toCache ? fCachedSchemaInfoList : fSchemaInfoList
3974 , this
3975 , sysId
3976 , fEntityHandler
3977 , fErrorReporter
3978 , fMemoryManager
3979 , grammarFound
3980 );
3981
3982 grammar = schemaGrammar;
3983
3984 // Reset the now invalid schema roots in the collected
3985 // schema info entries.
3986 //
3987 {
3988 RefHash2KeysTableOfEnumerator<SchemaInfo> i (
3989 toCache ? fCachedSchemaInfoList : fSchemaInfoList);
3990
3991 while (i.hasMoreElements ())
3992 i.nextElement().resetRoot ();
3993 }
3994 }
3995
3996 if (fValidate) {
3997 // validate the Schema scan so far
3998 fValidator->setGrammar(grammar);
3999 fValidator->preContentValidation(false);
4000 }
4001
4002 if (toCache) {
4003 fGrammarResolver->cacheGrammars();
4004 }
4005
4006 if(getPSVIHandler())
4007 fModel = fGrammarResolver->getXSModel();
4008
4009 return grammar;
4010 }
4011 }
4012
4013 return 0;
4014 }
4015
4016
4017
4018 // ---------------------------------------------------------------------------
4019 // SGXMLScanner: Private parsing methods
4020 // ---------------------------------------------------------------------------
4021
4022 // This method is called to do a raw scan of an attribute value. It does not
4023 // do normalization (since we don't know their types yet.) It just scans the
4024 // value and does entity expansion.
4025 //
4026 // End of entity's must be dealt with here. During DTD scan, they can come
4027 // from external entities. During content, they can come from any entity.
4028 // We just eat the end of entity and continue with our scan until we come
4029 // to the closing quote. If an unterminated value causes us to go through
4030 // subsequent entities, that will cause errors back in the calling code,
4031 // but there's little we can do about it here.
basicAttrValueScan(const XMLCh * const attrName,XMLBuffer & toFill)4032 bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
4033 {
4034 // Reset the target buffer
4035 toFill.reset();
4036
4037 // Get the next char which must be a single or double quote
4038 XMLCh quoteCh;
4039 if (!fReaderMgr.skipIfQuote(quoteCh))
4040 return false;
4041
4042 // We have to get the current reader because we have to ignore closing
4043 // quotes until we hit the same reader again.
4044 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
4045
4046 // Loop until we get the attribute value. Note that we use a double
4047 // loop here to avoid the setup/teardown overhead of the exception
4048 // handler on every round.
4049 while (true)
4050 {
4051 try
4052 {
4053 while(true)
4054 {
4055 XMLCh nextCh = fReaderMgr.getNextChar();
4056
4057 if (nextCh != quoteCh)
4058 {
4059 if (nextCh != chAmpersand)
4060 {
4061 if ((nextCh < 0xD800) || (nextCh > 0xDFFF))
4062 {
4063 // Its got to at least be a valid XML character
4064 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4065 {
4066 if (nextCh == 0)
4067 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
4068
4069 XMLCh tmpBuf[9];
4070 XMLString::binToText
4071 (
4072 nextCh
4073 , tmpBuf
4074 , 8
4075 , 16
4076 , fMemoryManager
4077 );
4078 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
4079 }
4080 } else // its a surrogate
4081 {
4082 // Deal with surrogate pairs
4083
4084 // we expect a a leading surrogate.
4085 if (nextCh <= 0xDBFF)
4086 {
4087 toFill.append(nextCh);
4088
4089 // process the trailing surrogate
4090 nextCh = fReaderMgr.getNextChar();
4091
4092 // it should be a trailing surrogate.
4093 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
4094 {
4095 emitError(XMLErrs::Expected2ndSurrogateChar);
4096 }
4097 } else
4098 {
4099 // Its a trailing surrogate, but we are not expecting it
4100 emitError(XMLErrs::Unexpected2ndSurrogateChar);
4101 }
4102 }
4103 } else // its a chAmpersand
4104 {
4105 // Check for an entity ref . We ignore the empty flag in
4106 // this one.
4107
4108 bool escaped;
4109 XMLCh firstCh;
4110 XMLCh secondCh
4111 ;
4112 // If it was not returned directly, then jump back up
4113 if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned)
4114 {
4115 // If it was escaped, then put in a 0xFFFF value. This will
4116 // be used later during validation and normalization of the
4117 // value to know that the following character was via an
4118 // escape char.
4119 if (escaped)
4120 toFill.append(0xFFFF);
4121
4122 toFill.append(firstCh);
4123 if (secondCh)
4124 toFill.append(secondCh);
4125 }
4126 continue;
4127 }
4128 } else // its a quoteCh
4129 {
4130 // Check for our ending quote. It has to be in the same entity
4131 // as where we started. Quotes in nested entities are ignored.
4132
4133 if (curReader == fReaderMgr.getCurrentReaderNum())
4134 {
4135 return true;
4136 }
4137
4138 // Watch for spillover into a previous entity
4139 if (curReader > fReaderMgr.getCurrentReaderNum())
4140 {
4141 emitError(XMLErrs::PartialMarkupInEntity);
4142 return false;
4143 }
4144 }
4145
4146 // add it to the buffer
4147 toFill.append(nextCh);
4148
4149 }
4150 }
4151 catch(const EndOfEntityException&)
4152 {
4153 // Just eat it and continue.
4154 }
4155 }
4156 return true;
4157 }
4158
4159
4160 // This method scans a CDATA section. It collects the character into one
4161 // of the temp buffers and calls the document handler, if any, with the
4162 // characters. It assumes that the <![CDATA string has been scanned before
4163 // this call.
scanCDSection()4164 void SGXMLScanner::scanCDSection()
4165 {
4166 static const XMLCh CDataClose[] =
4167 {
4168 chCloseSquare, chCloseAngle, chNull
4169 };
4170
4171 // The next character should be the opening square bracket. If not
4172 // issue an error, but then try to recover by skipping any whitespace
4173 // and checking again.
4174 if (!fReaderMgr.skippedChar(chOpenSquare))
4175 {
4176 emitError(XMLErrs::ExpectedOpenSquareBracket);
4177 fReaderMgr.skipPastSpaces();
4178
4179 // If we still don't find it, then give up, else keep going
4180 if (!fReaderMgr.skippedChar(chOpenSquare))
4181 return;
4182 }
4183
4184 // Get a buffer for this
4185 XMLBufBid bbCData(&fBufMgr);
4186
4187 // We just scan forward until we hit the end of CDATA section sequence.
4188 // CDATA is effectively a big escape mechanism so we don't treat markup
4189 // characters specially here.
4190 bool emittedError = false;
4191 bool gotLeadingSurrogate = false;
4192
4193 // Get the character data opts for the current element
4194 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
4195 // And see if the current element is a 'Children' style content model
4196 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
4197 if(currType)
4198 {
4199 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
4200 if(modelType == SchemaElementDecl::Children ||
4201 modelType == SchemaElementDecl::ElementOnlyEmpty)
4202 charOpts = XMLElementDecl::SpacesOk;
4203 else if(modelType == SchemaElementDecl::Empty)
4204 charOpts = XMLElementDecl::NoCharData;
4205 }
4206
4207 // should not be necessary when PSVI on element decl removed
4208 const ElemStack::StackElem* topElem = fElemStack.topElement();
4209
4210 while (true)
4211 {
4212 const XMLCh nextCh = fReaderMgr.getNextChar();
4213
4214 // Watch for unexpected end of file
4215 if (!nextCh)
4216 {
4217 emitError(XMLErrs::UnterminatedCDATASection);
4218 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
4219 }
4220
4221 if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
4222 {
4223 // This document is standalone; this ignorable CDATA whitespace is forbidden.
4224 // XML 1.0, Section 2.9
4225 // And see if the current element is a 'Children' style content model
4226 if (topElem->fThisElement->isExternal()) {
4227
4228 if (charOpts == XMLElementDecl::SpacesOk) // Element Content
4229 {
4230 // Error - standalone should have a value of "no" as whitespace detected in an
4231 // element type with element content whose element declaration was external
4232 fValidator->emitError(XMLValid::NoWSForStandalone);
4233 if (getPSVIHandler())
4234 {
4235 // REVISIT:
4236 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4237 }
4238 }
4239 }
4240 }
4241
4242 // If this is a close square bracket it could be our closing
4243 // sequence.
4244 if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
4245 {
4246 // make sure we were not expecting a trailing surrogate.
4247 if (gotLeadingSurrogate) {
4248 emitError(XMLErrs::Expected2ndSurrogateChar);
4249 }
4250
4251 XMLSize_t xsLen = bbCData.getLen();
4252 const XMLCh* xsNormalized = bbCData.getRawBuffer();
4253 if (fValidate) {
4254
4255 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
4256 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
4257 {
4258 // normalize the character according to schema whitespace facet
4259 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
4260 xsNormalized = fWSNormalizeBuf.getRawBuffer();
4261 xsLen = fWSNormalizeBuf.getLen();
4262 }
4263
4264 // tell the schema validation about the character data for checkContent later
4265 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
4266
4267 if (charOpts != XMLElementDecl::AllCharData)
4268 {
4269 // They definitely cannot handle any type of char data
4270 fValidator->emitError(XMLValid::NoCharDataInCM);
4271 if (getPSVIHandler())
4272 {
4273 // REVISIT:
4274 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4275 }
4276 }
4277 }
4278
4279 // call all active identity constraints
4280 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
4281 fContent.append(xsNormalized, xsLen);
4282 }
4283
4284 // If we have a doc handler, call it
4285 if (fDocHandler)
4286 {
4287 if (fNormalizeData) {
4288 fDocHandler->docCharacters(xsNormalized, xsLen, true);
4289 }
4290 else {
4291 fDocHandler->docCharacters(
4292 bbCData.getRawBuffer(), bbCData.getLen(), true
4293 );
4294 }
4295 }
4296
4297 // And we are done
4298 break;
4299 }
4300
4301 // Make sure its a valid character. But if we've emitted an error
4302 // already, don't bother with the overhead since we've already told
4303 // them about it.
4304 if (!emittedError)
4305 {
4306 // Deal with surrogate pairs
4307 if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
4308 {
4309 // Its a leading surrogate. If we already got one, then
4310 // issue an error, else set leading flag to make sure that
4311 // we look for a trailing next time.
4312 if (gotLeadingSurrogate)
4313 emitError(XMLErrs::Expected2ndSurrogateChar);
4314 else
4315 gotLeadingSurrogate = true;
4316 }
4317 else
4318 {
4319 // If its a trailing surrogate, make sure that we are
4320 // prepared for that. Else, its just a regular char so make
4321 // sure that we were not expected a trailing surrogate.
4322 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
4323 {
4324 // Its trailing, so make sure we were expecting it
4325 if (!gotLeadingSurrogate)
4326 emitError(XMLErrs::Unexpected2ndSurrogateChar);
4327 }
4328 else
4329 {
4330 // Its just a char, so make sure we were not expecting a
4331 // trailing surrogate.
4332 if (gotLeadingSurrogate)
4333 emitError(XMLErrs::Expected2ndSurrogateChar);
4334
4335 // Its got to at least be a valid XML character
4336 else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4337 {
4338 XMLCh tmpBuf[9];
4339 XMLString::binToText
4340 (
4341 nextCh
4342 , tmpBuf
4343 , 8
4344 , 16
4345 , fMemoryManager
4346 );
4347 emitError(XMLErrs::InvalidCharacter, tmpBuf);
4348 emittedError = true;
4349 }
4350 }
4351 gotLeadingSurrogate = false;
4352 }
4353 }
4354
4355 // Add it to the buffer
4356 bbCData.append(nextCh);
4357 }
4358 }
4359
4360
scanCharData(XMLBuffer & toUse)4361 void SGXMLScanner::scanCharData(XMLBuffer& toUse)
4362 {
4363 // We have to watch for the stupid ]]> sequence, which is illegal in
4364 // character data. So this is a little state machine that handles that.
4365 enum States
4366 {
4367 State_Waiting
4368 , State_GotOne
4369 , State_GotTwo
4370 };
4371
4372 // Reset the buffer before we start
4373 toUse.reset();
4374
4375 // Turn on the 'throw at end' flag of the reader manager
4376 ThrowEOEJanitor jan(&fReaderMgr, true);
4377
4378 // In order to be more efficient we have to use kind of a deeply nested
4379 // set of blocks here. The outer block puts on a try and catches end of
4380 // entity exceptions. The inner loop is the per-character loop. If we
4381 // put the try inside the inner loop, it would work but would require
4382 // the exception handling code setup/teardown code to be invoked for
4383 // each character.
4384 XMLCh nextCh;
4385 XMLCh secondCh = 0;
4386 States curState = State_Waiting;
4387 bool escaped = false;
4388 bool gotLeadingSurrogate = false;
4389 bool notDone = true;
4390 while (notDone)
4391 {
4392 try
4393 {
4394 while (true)
4395 {
4396 // Eat through as many plain content characters as possible without
4397 // needing special handling. Moving most content characters here,
4398 // in this one call, rather than running the overall loop once
4399 // per content character, is a speed optimization.
4400 if (curState == State_Waiting && !gotLeadingSurrogate)
4401 {
4402 fReaderMgr.movePlainContentChars(toUse);
4403 }
4404
4405 // Try to get another char from the source
4406 // The code from here on down covers all contengencies,
4407 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
4408 {
4409 // If we were waiting for a trailing surrogate, its an error
4410 if (gotLeadingSurrogate)
4411 emitError(XMLErrs::Expected2ndSurrogateChar);
4412
4413 notDone = false;
4414 break;
4415 }
4416
4417 // Watch for a reference. Note that the escapement mechanism
4418 // is ignored in this content.
4419 escaped = false;
4420 if (nextCh == chAmpersand)
4421 {
4422 sendCharData(toUse);
4423
4424 // Turn off the throwing at the end of entity during this
4425 ThrowEOEJanitor jan(&fReaderMgr, false);
4426
4427 if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
4428 {
4429 gotLeadingSurrogate = false;
4430 continue;
4431 }
4432 }
4433 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
4434 {
4435 // Deal with surrogate pairs
4436 // Its a leading surrogate. If we already got one, then
4437 // issue an error, else set leading flag to make sure that
4438 // we look for a trailing next time.
4439 if (gotLeadingSurrogate)
4440 emitError(XMLErrs::Expected2ndSurrogateChar);
4441 else
4442 gotLeadingSurrogate = true;
4443 }
4444 else
4445 {
4446 // If its a trailing surrogate, make sure that we are
4447 // prepared for that. Else, its just a regular char so make
4448 // sure that we were not expected a trailing surrogate.
4449 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
4450 {
4451 // Its trailing, so make sure we were expecting it
4452 if (!gotLeadingSurrogate)
4453 emitError(XMLErrs::Unexpected2ndSurrogateChar);
4454 }
4455 else
4456 {
4457 // Its just a char, so make sure we were not expecting a
4458 // trailing surrogate.
4459 if (gotLeadingSurrogate)
4460 emitError(XMLErrs::Expected2ndSurrogateChar);
4461
4462 // Make sure the returned char is a valid XML char
4463 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4464 {
4465 XMLCh tmpBuf[9];
4466 XMLString::binToText
4467 (
4468 nextCh
4469 , tmpBuf
4470 , 8
4471 , 16
4472 , fMemoryManager
4473 );
4474 emitError(XMLErrs::InvalidCharacter, tmpBuf);
4475 }
4476 }
4477 gotLeadingSurrogate = false;
4478 }
4479
4480 // Keep the state machine up to date
4481 if (!escaped)
4482 {
4483 if (nextCh == chCloseSquare)
4484 {
4485 if (curState == State_Waiting)
4486 curState = State_GotOne;
4487 else if (curState == State_GotOne)
4488 curState = State_GotTwo;
4489 }
4490 else if (nextCh == chCloseAngle)
4491 {
4492 if (curState == State_GotTwo)
4493 emitError(XMLErrs::BadSequenceInCharData);
4494 curState = State_Waiting;
4495 }
4496 else
4497 {
4498 curState = State_Waiting;
4499 }
4500 }
4501 else
4502 {
4503 curState = State_Waiting;
4504 }
4505
4506 // Add this char to the buffer
4507 toUse.append(nextCh);
4508
4509 if (secondCh)
4510 {
4511 toUse.append(secondCh);
4512 secondCh=0;
4513 }
4514 }
4515 }
4516 catch(const EndOfEntityException& toCatch)
4517 {
4518 // Some entity ended, so we have to send any accumulated
4519 // chars and send an end of entity event.
4520 sendCharData(toUse);
4521 gotLeadingSurrogate = false;
4522
4523 if (fDocHandler)
4524 fDocHandler->endEntityReference(toCatch.getEntity());
4525 }
4526 }
4527
4528 // Check the validity constraints as per XML 1.0 Section 2.9
4529 if (fValidate && fStandalone)
4530 {
4531 // See if the text contains whitespace
4532 // Get the raw data we need for the callback
4533 const XMLCh* rawBuf = toUse.getRawBuffer();
4534 const XMLSize_t len = toUse.getLen();
4535 const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
4536
4537 if (isSpaces)
4538 {
4539 // And see if the current element is a 'Children' style content model
4540 const ElemStack::StackElem* topElem = fElemStack.topElement();
4541
4542 if (topElem->fThisElement->isExternal()) {
4543
4544 // Get the character data opts for the current element
4545 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
4546 // And see if the current element is a 'Children' style content model
4547 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
4548 if(currType)
4549 {
4550 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
4551 if(modelType == SchemaElementDecl::Children ||
4552 modelType == SchemaElementDecl::ElementOnlyEmpty)
4553 charOpts = XMLElementDecl::SpacesOk;
4554 }
4555
4556 if (charOpts == XMLElementDecl::SpacesOk) // => Element Content
4557 {
4558 // Error - standalone should have a value of "no" as whitespace detected in an
4559 // element type with element content whose element declaration was external
4560 //
4561 fValidator->emitError(XMLValid::NoWSForStandalone);
4562 if (getPSVIHandler())
4563 {
4564 // REVISIT:
4565 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4566 }
4567 }
4568 }
4569 }
4570 }
4571 // Send any char data that we accumulated into the buffer
4572 sendCharData(toUse);
4573 }
4574
4575
4576 // This method will scan a general/character entity ref. It will either
4577 // expand a char ref and return it directly, or push a reader for a general
4578 // entity.
4579 //
4580 // The return value indicates whether the char parameters hold the value
4581 // or whether the value was pushed as a reader, or that it failed.
4582 //
4583 // The escaped flag tells the caller whether the returned parameter resulted
4584 // from a character reference, which escapes the character in some cases. It
4585 // only makes any difference if the return value indicates the value was
4586 // returned directly.
4587 SGXMLScanner::EntityExpRes
scanEntityRef(const bool,XMLCh & firstCh,XMLCh & secondCh,bool & escaped)4588 SGXMLScanner::scanEntityRef( const bool
4589 , XMLCh& firstCh
4590 , XMLCh& secondCh
4591 , bool& escaped)
4592 {
4593 // Assume no escape
4594 secondCh = 0;
4595 escaped = false;
4596
4597 // We have to insure that its all in one entity
4598 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
4599
4600 // If the next char is a pound, then its a character reference and we
4601 // need to expand it always.
4602 if (fReaderMgr.skippedChar(chPound))
4603 {
4604 // Its a character reference, so scan it and get back the numeric
4605 // value it represents.
4606 if (!scanCharRef(firstCh, secondCh))
4607 return EntityExp_Failed;
4608
4609 escaped = true;
4610
4611 if (curReader != fReaderMgr.getCurrentReaderNum())
4612 emitError(XMLErrs::PartialMarkupInEntity);
4613
4614 return EntityExp_Returned;
4615 }
4616
4617 // Expand it since its a normal entity ref
4618 XMLBufBid bbName(&fBufMgr);
4619 int colonPosition;
4620 if (!fReaderMgr.getQName(bbName.getBuffer(), &colonPosition))
4621 {
4622 if (bbName.isEmpty())
4623 emitError(XMLErrs::ExpectedEntityRefName);
4624 else
4625 emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
4626 return EntityExp_Failed;
4627 }
4628
4629 // Next char must be a semi-colon. But if its not, just emit
4630 // an error and try to continue.
4631 if (!fReaderMgr.skippedChar(chSemiColon))
4632 emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
4633
4634 // Make sure we ended up on the same entity reader as the & char
4635 if (curReader != fReaderMgr.getCurrentReaderNum())
4636 emitError(XMLErrs::PartialMarkupInEntity);
4637
4638 // Look up the name in the general entity pool
4639 // If it does not exist, then obviously an error
4640 if (!fEntityTable->containsKey(bbName.getRawBuffer()))
4641 {
4642 // XML 1.0 Section 4.1
4643 // Well-formedness Constraint for entity not found:
4644 // In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
4645 // or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
4646 // or a parameter entity
4647 if (fStandalone || fHasNoDTD)
4648 emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
4649
4650 return EntityExp_Failed;
4651 }
4652
4653 // here's where we need to check if there's a SecurityManager,
4654 // how many entity references we've had
4655 if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
4656 XMLCh expLimStr[32];
4657 XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
4658 emitError
4659 (
4660 XMLErrs::EntityExpansionLimitExceeded
4661 , expLimStr
4662 );
4663 // there seems nothing better to be done than to reset the entity expansion limit
4664 fEntityExpansionCount = 0;
4665 }
4666
4667 firstCh = fEntityTable->get(bbName.getRawBuffer());
4668 escaped = true;
4669 return EntityExp_Returned;
4670 }
4671
4672
switchGrammar(const XMLCh * const newGrammarNameSpace)4673 bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
4674 {
4675 Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
4676
4677 if (!tempGrammar) {
4678 tempGrammar = fSchemaGrammar;
4679 }
4680
4681 if (!tempGrammar)
4682 return false;
4683 else {
4684 fGrammar = tempGrammar;
4685 fGrammarType = fGrammar->getGrammarType();
4686 if (fGrammarType == Grammar::DTDGrammarType) {
4687 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
4688 }
4689
4690 fValidator->setGrammar(fGrammar);
4691 return true;
4692 }
4693 }
4694
4695 // check if we should skip or lax the validation of the element
4696 // if skip - no validation
4697 // if lax - validate only if the element if found
laxElementValidation(QName * element,ContentLeafNameTypeVector * cv,const XMLContentModel * const cm,const XMLSize_t parentElemDepth)4698 bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
4699 const XMLContentModel* const cm,
4700 const XMLSize_t parentElemDepth)
4701 {
4702 bool skipThisOne = false;
4703 bool laxThisOne = false;
4704 unsigned int elementURI = element->getURI();
4705 unsigned int currState = fElemState[parentElemDepth];
4706 unsigned int currLoop = fElemLoopState[parentElemDepth];
4707
4708 if (currState == XMLContentModel::gInvalidTrans) {
4709 return laxThisOne;
4710 }
4711
4712 SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
4713
4714 if (cv) {
4715 XMLSize_t i = 0;
4716 XMLSize_t leafCount = cv->getLeafCount();
4717 unsigned int nextState = 0;
4718
4719 for (; i < leafCount; i++) {
4720
4721 QName* fElemMap = cv->getLeafNameAt(i);
4722 unsigned int uri = fElemMap->getURI();
4723 ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
4724
4725 if (type == ContentSpecNode::Leaf) {
4726 if (((uri == elementURI)
4727 && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
4728 || comparator.isEquivalentTo(element, fElemMap)) {
4729
4730 nextState = cm->getNextState(currState, i);
4731
4732 if (nextState != XMLContentModel::gInvalidTrans)
4733 break;
4734 }
4735 } else if ((type & 0x0f) == ContentSpecNode::Any) {
4736 nextState = cm->getNextState(currState, i);
4737 if (nextState != XMLContentModel::gInvalidTrans)
4738 break;
4739 }
4740 else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
4741 if (uri != elementURI && elementURI != fEmptyNamespaceId) {
4742 nextState = cm->getNextState(currState, i);
4743 if (nextState != XMLContentModel::gInvalidTrans)
4744 break;
4745 }
4746 }
4747 else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
4748 if (uri == elementURI) {
4749 nextState = cm->getNextState(currState, i);
4750 if (nextState != XMLContentModel::gInvalidTrans)
4751 break;
4752 }
4753 }
4754
4755 } // for
4756
4757 if (i == leafCount) { // no match
4758 fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
4759 fElemLoopState[parentElemDepth] = 0;
4760 return laxThisOne;
4761 }
4762
4763 ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
4764 if ((type & 0x0f) == ContentSpecNode::Any ||
4765 (type & 0x0f) == ContentSpecNode::Any_Other ||
4766 (type & 0x0f) == ContentSpecNode::Any_NS)
4767 {
4768 if (type == ContentSpecNode::Any_Skip ||
4769 type == ContentSpecNode::Any_NS_Skip ||
4770 type == ContentSpecNode::Any_Other_Skip) {
4771 skipThisOne = true;
4772 }
4773 else if (type == ContentSpecNode::Any_Lax ||
4774 type == ContentSpecNode::Any_NS_Lax ||
4775 type == ContentSpecNode::Any_Other_Lax) {
4776 laxThisOne = true;
4777 }
4778 }
4779 fElemState[parentElemDepth] = nextState;
4780 fElemLoopState[parentElemDepth] = currLoop;
4781 } // if
4782
4783 if (skipThisOne) {
4784 fValidate = false;
4785 fElemStack.setValidationFlag(fValidate);
4786 }
4787
4788 return laxThisOne;
4789 }
4790
4791
4792 // check if there is an AnyAttribute, and if so, see if we should lax or skip
4793 // if skip - no validation
4794 // if lax - validate only if the attribute if found
anyAttributeValidation(SchemaAttDef * attWildCard,unsigned int uriId,bool & skipThisOne,bool & laxThisOne)4795 bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
4796 {
4797 XMLAttDef::AttTypes wildCardType = attWildCard->getType();
4798 bool anyEncountered = false;
4799 skipThisOne = false;
4800 laxThisOne = false;
4801 if (wildCardType == XMLAttDef::Any_Any)
4802 anyEncountered = true;
4803 else if (wildCardType == XMLAttDef::Any_Other) {
4804 if (attWildCard->getAttName()->getURI() != uriId
4805 && uriId != fEmptyNamespaceId)
4806 anyEncountered = true;
4807 }
4808 else if (wildCardType == XMLAttDef::Any_List) {
4809 ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
4810 XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0;
4811
4812 if (listSize) {
4813 for (XMLSize_t i=0; i < listSize; i++) {
4814 if (nameURIList->elementAt(i) == uriId)
4815 anyEncountered = true;
4816 }
4817 }
4818 }
4819
4820 if (anyEncountered) {
4821 XMLAttDef::DefAttTypes defType = attWildCard->getDefaultType();
4822 if (defType == XMLAttDef::ProcessContents_Skip) {
4823 // attribute should just be bypassed,
4824 skipThisOne = true;
4825 if (getPSVIHandler())
4826 {
4827 // REVISIT:
4828 // PSVIAttribute->setValidationAttempted(PSVIItem::VALIDATION_NONE);
4829 }
4830 }
4831 else if (defType == XMLAttDef::ProcessContents_Lax) {
4832 laxThisOne = true;
4833 }
4834 }
4835
4836 return anyEncountered;
4837 }
4838
getAttDefList(ComplexTypeInfo * currType,XMLElementDecl * elemDecl)4839 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl)
4840 {
4841 if (currType)
4842 return currType->getAttDefList();
4843 else
4844 return elemDecl->getAttDefList();
4845 }
4846
endElementPSVI(SchemaElementDecl * const elemDecl,DatatypeValidator * const memberDV)4847 void SGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
4848 DatatypeValidator* const memberDV)
4849 {
4850 PSVIElement::ASSESSMENT_TYPE validationAttempted;
4851 PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
4852
4853 if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
4854 validationAttempted = PSVIElement::VALIDATION_FULL;
4855 else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
4856 validationAttempted = PSVIElement::VALIDATION_NONE;
4857 else
4858 {
4859 validationAttempted = PSVIElement::VALIDATION_PARTIAL;
4860 fPSVIElemContext.fFullValidationDepth =
4861 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
4862 }
4863
4864 if (fValidate && elemDecl->isDeclared())
4865 {
4866 validity = (fPSVIElemContext.fErrorOccurred)
4867 ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
4868 }
4869
4870 XSTypeDefinition* typeDef = 0;
4871 bool isMixed = false;
4872 if (fPSVIElemContext.fCurrentTypeInfo)
4873 {
4874 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
4875 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
4876 isMixed = (modelType == SchemaElementDecl::Mixed_Simple
4877 || modelType == SchemaElementDecl::Mixed_Complex);
4878 }
4879 else if (fPSVIElemContext.fCurrentDV)
4880 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
4881
4882 XMLCh* canonicalValue = 0;
4883 if (fPSVIElemContext.fNormalizedValue && !isMixed &&
4884 validity == PSVIElement::VALIDITY_VALID)
4885 {
4886 if (memberDV)
4887 canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
4888 else if (fPSVIElemContext.fCurrentDV)
4889 canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
4890 }
4891
4892 fPSVIElement->reset
4893 (
4894 validity
4895 , validationAttempted
4896 , fRootElemName
4897 , fPSVIElemContext.fIsSpecified
4898 , (elemDecl->isDeclared())
4899 ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
4900 , typeDef
4901 , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
4902 , fModel
4903 , elemDecl->getDefaultValue()
4904 , fPSVIElemContext.fNormalizedValue
4905 , canonicalValue
4906 );
4907
4908 fPSVIHandler->handleElementPSVI
4909 (
4910 elemDecl->getBaseName()
4911 , fURIStringPool->getValueForId(elemDecl->getURI())
4912 , fPSVIElement
4913 );
4914
4915 // decrease element depth
4916 fPSVIElemContext.fElemDepth--;
4917
4918 }
4919
resetPSVIElemContext()4920 void SGXMLScanner::resetPSVIElemContext()
4921 {
4922 fPSVIElemContext.fIsSpecified = false;
4923 fPSVIElemContext.fErrorOccurred = false;
4924 fPSVIElemContext.fElemDepth = -1;
4925 fPSVIElemContext.fFullValidationDepth = -1;
4926 fPSVIElemContext.fNoneValidationDepth = -1;
4927 fPSVIElemContext.fCurrentDV = 0;
4928 fPSVIElemContext.fCurrentTypeInfo = 0;
4929 fPSVIElemContext.fNormalizedValue = 0;
4930 }
4931
4932 XERCES_CPP_NAMESPACE_END
4933