1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /**
19   * $Id$
20   */
21 
22 
23 
24 // ---------------------------------------------------------------------------
25 //  Includes
26 // ---------------------------------------------------------------------------
27 #include <xercesc/validators/schema/XSDDOMParser.hpp>
28 #include <xercesc/validators/schema/SchemaSymbols.hpp>
29 #include <xercesc/internal/XMLScanner.hpp>
30 #include <xercesc/internal/ElemStack.hpp>
31 #include <xercesc/dom/DOMDocument.hpp>
32 #include <xercesc/dom/impl/DOMElementImpl.hpp>
33 #include <xercesc/dom/impl/DOMAttrImpl.hpp>
34 #include <xercesc/dom/impl/DOMTextImpl.hpp>
35 #include <xercesc/framework/XMLValidityCodes.hpp>
36 
37 XERCES_CPP_NAMESPACE_BEGIN
38 
39 // ---------------------------------------------------------------------------
40 //  XSDDOMParser: Constructors and Destructor
41 // ---------------------------------------------------------------------------
XSDDOMParser(XMLValidator * const valToAdopt,MemoryManager * const manager,XMLGrammarPool * const gramPool)42 XSDDOMParser::XSDDOMParser( XMLValidator* const   valToAdopt
43                           , MemoryManager* const  manager
44                           , XMLGrammarPool* const gramPool):
45     XercesDOMParser(valToAdopt, manager, gramPool)
46     , fSawFatal(false)
47     , fAnnotationDepth(-1)
48     , fInnerAnnotationDepth(-1)
49     , fDepth(-1)
50     , fUserErrorReporter(0)
51     , fUserEntityHandler(0)
52     , fURIs(0)
53     , fAnnotationBuf(1023, manager)
54 
55 {
56     fURIs = new (manager) ValueVectorOf<unsigned int>(16, manager);
57     fXSDErrorReporter.setErrorReporter(this);
58     setValidationScheme(XercesDOMParser::Val_Never);
59     setDoNamespaces(true);
60 }
61 
62 
~XSDDOMParser()63 XSDDOMParser::~XSDDOMParser()
64 {
65     delete fURIs;
66 }
67 
68 
69 // ---------------------------------------------------------------------------
70 //  XSDDOMParser: Helper methods
71 // ---------------------------------------------------------------------------
createElementNSNode(const XMLCh * namespaceURI,const XMLCh * qualifiedName)72 DOMElement* XSDDOMParser::createElementNSNode(const XMLCh *namespaceURI,
73                                               const XMLCh *qualifiedName)
74 {
75     ReaderMgr::LastExtEntityInfo lastInfo;
76     ((ReaderMgr*) fScanner->getLocator())->getLastExtEntityInfo(lastInfo);
77 
78     return getDocument()->createElementNS(namespaceURI, qualifiedName,
79                                           lastInfo.lineNumber, lastInfo.colNumber);
80 }
81 
82 
startAnnotation(const XMLElementDecl & elemDecl,const RefVectorOf<XMLAttr> & attrList,const XMLSize_t attrCount)83 void XSDDOMParser::startAnnotation( const XMLElementDecl&       elemDecl
84                                   , const RefVectorOf<XMLAttr>& attrList
85                                   , const XMLSize_t             attrCount)
86 {
87     fAnnotationBuf.append(chOpenAngle);
88 	fAnnotationBuf.append(elemDecl.getFullName());
89     fAnnotationBuf.append(chSpace);
90 
91     // attributes are a bit of a pain.  To get this right, we have to keep track
92     // of the namespaces we've seen declared, then examine the namespace context
93     // for other namespaces so that we can also include them.
94     // optimized for simplicity and the case that not many
95     // namespaces are declared on this annotation...
96     fURIs->removeAllElements();
97     for (XMLSize_t i=0; i < attrCount; i++) {
98 
99         const XMLAttr* oneAttrib = attrList.elementAt(i);
100         const XMLCh* attrValue = oneAttrib->getValue();
101 
102         if (XMLString::equals(oneAttrib->getName(), XMLUni::fgXMLNSString))
103             fURIs->addElement(fScanner->getPrefixId(XMLUni::fgZeroLenString));
104         else  if (!XMLString::compareNString(oneAttrib->getQName(), XMLUni::fgXMLNSColonString, 6))
105             fURIs->addElement(fScanner->getPrefixId(oneAttrib->getName()));
106 
107         fAnnotationBuf.append(oneAttrib->getQName());
108         fAnnotationBuf.append(chEqual);
109         fAnnotationBuf.append(chDoubleQuote);
110         fAnnotationBuf.append(attrValue);
111         fAnnotationBuf.append(chDoubleQuote);
112         fAnnotationBuf.append(chSpace);
113     }
114 
115     // now we have to look through currently in-scope namespaces to see what
116     // wasn't declared here
117     ValueVectorOf<PrefMapElem*>* namespaceContext = fScanner->getNamespaceContext();
118     for (XMLSize_t j=0; j < namespaceContext->size(); j++)
119     {
120         unsigned int prefId = namespaceContext->elementAt(j)->fPrefId;
121 
122         if (!fURIs->containsElement(prefId)) {
123 
124             const XMLCh* prefix = fScanner->getPrefixForId(prefId);
125 
126             if (XMLString::equals(prefix, XMLUni::fgZeroLenString)) {
127                 fAnnotationBuf.append(XMLUni::fgXMLNSString);
128             }
129             else  {
130                 fAnnotationBuf.append(XMLUni::fgXMLNSColonString);
131                 fAnnotationBuf.append(prefix);
132             }
133 
134             fAnnotationBuf.append(chEqual);
135             fAnnotationBuf.append(chDoubleQuote);
136             fAnnotationBuf.append(fScanner->getURIText(namespaceContext->elementAt(j)->fURIId));
137             fAnnotationBuf.append(chDoubleQuote);
138             fAnnotationBuf.append(chSpace);
139 
140             fURIs->addElement(prefId);
141         }
142     }
143 
144     fAnnotationBuf.append(chCloseAngle);
145     fAnnotationBuf.append(chLF);
146 }
147 
startAnnotationElement(const XMLElementDecl & elemDecl,const RefVectorOf<XMLAttr> & attrList,const XMLSize_t attrCount)148 void XSDDOMParser::startAnnotationElement( const XMLElementDecl&       elemDecl
149                                          , const RefVectorOf<XMLAttr>& attrList
150                                          , const XMLSize_t             attrCount)
151 {
152     fAnnotationBuf.append(chOpenAngle);
153     fAnnotationBuf.append(elemDecl.getFullName());
154     //fAnnotationBuf.append(chSpace);
155 
156     for(XMLSize_t i=0; i < attrCount; i++) {
157 
158         const XMLAttr* oneAttr = attrList.elementAt(i);
159         fAnnotationBuf.append(chSpace);
160         fAnnotationBuf.append(oneAttr ->getQName());
161         fAnnotationBuf.append(chEqual);
162         fAnnotationBuf.append(chDoubleQuote);
163         fAnnotationBuf.append(oneAttr->getValue());
164         fAnnotationBuf.append(chDoubleQuote);
165     }
166 
167     fAnnotationBuf.append(chCloseAngle);
168 }
169 
endAnnotationElement(const XMLElementDecl & elemDecl,bool complete)170 void XSDDOMParser::endAnnotationElement( const XMLElementDecl& elemDecl
171                                        , bool complete)
172 {
173     if (complete)
174     {
175         fAnnotationBuf.append(chLF);
176         fAnnotationBuf.append(chOpenAngle);
177         fAnnotationBuf.append(chForwardSlash);
178         fAnnotationBuf.append(elemDecl.getFullName());
179         fAnnotationBuf.append(chCloseAngle);
180 
181         // note that this is always called after endElement on <annotation>'s
182         // child and before endElement on annotation.
183         // hence, we must make this the child of the current
184         // parent's only child.
185         DOMTextImpl *node = (DOMTextImpl *)fDocument->createTextNode(fAnnotationBuf.getRawBuffer());
186         fCurrentNode->appendChild(node);
187         fAnnotationBuf.reset();
188     }
189     else      //capturing character calls
190     {
191         fAnnotationBuf.append(chOpenAngle);
192         fAnnotationBuf.append(chForwardSlash);
193         fAnnotationBuf.append(elemDecl.getFullName());
194         fAnnotationBuf.append(chCloseAngle);
195     }
196 }
197 
198 
199 // ---------------------------------------------------------------------------
200 //  XSDDOMParser: Setter methods
201 // ---------------------------------------------------------------------------
setUserErrorReporter(XMLErrorReporter * const errorReporter)202 void XSDDOMParser::setUserErrorReporter(XMLErrorReporter* const errorReporter)
203 {
204     fUserErrorReporter = errorReporter;
205     fScanner->setErrorReporter(this);
206 }
207 
setUserEntityHandler(XMLEntityHandler * const entityHandler)208 void XSDDOMParser::setUserEntityHandler(XMLEntityHandler* const entityHandler)
209 {
210     fUserEntityHandler = entityHandler;
211     fScanner->setEntityHandler(this);
212 }
213 
214 
215 // ---------------------------------------------------------------------------
216 //  XSDDOMParser: Implementation of the XMLDocumentHandler interface
217 // ---------------------------------------------------------------------------
startElement(const XMLElementDecl & elemDecl,const unsigned int urlId,const XMLCh * const elemPrefix,const RefVectorOf<XMLAttr> & attrList,const XMLSize_t attrCount,const bool isEmpty,const bool isRoot)218 void XSDDOMParser::startElement( const XMLElementDecl&       elemDecl
219                                , const unsigned int          urlId
220                                , const XMLCh* const          elemPrefix
221                                , const RefVectorOf<XMLAttr>& attrList
222                                , const XMLSize_t             attrCount
223                                , const bool                  isEmpty
224                                , const bool                  isRoot)
225 {
226     fDepth++;
227 
228     // while it is true that non-whitespace character data
229     // may only occur in appInfo or documentation
230     // elements, it's certainly legal for comments and PI's to
231     // occur as children of annotation; we need
232     // to account for these here.
233     if (fAnnotationDepth == -1)
234     {
235         if (XMLString::equals(elemDecl.getBaseName(), SchemaSymbols::fgELT_ANNOTATION) &&
236             XMLString::equals(getURIText(urlId), SchemaSymbols::fgURI_SCHEMAFORSCHEMA))
237         {
238 
239             fAnnotationDepth = fDepth;
240             startAnnotation(elemDecl, attrList, attrCount);
241         }
242     }
243     else if (fDepth == fAnnotationDepth+1)
244     {
245         fInnerAnnotationDepth = fDepth;
246         startAnnotationElement(elemDecl, attrList, attrCount);
247     }
248     else
249     {
250         startAnnotationElement(elemDecl, attrList, attrCount);
251         if(isEmpty)
252             endElement(elemDecl, urlId, isRoot, elemPrefix);
253         // avoid falling through; don't call startElement in this case
254         return;
255     }
256 
257     DOMElement *elem;
258     if (urlId != fScanner->getEmptyNamespaceId())  //TagName has a prefix
259     {
260         if (elemPrefix && *elemPrefix)
261         {
262             XMLBufBid elemQName(&fBufMgr);
263             elemQName.set(elemPrefix);
264             elemQName.append(chColon);
265             elemQName.append(elemDecl.getBaseName());
266             elem = createElementNSNode(
267                 fScanner->getURIText(urlId), elemQName.getRawBuffer());
268         }
269         else {
270             elem = createElementNSNode(
271                 fScanner->getURIText(urlId), elemDecl.getBaseName());
272         }
273     }
274     else {
275         elem = createElementNSNode(0, elemDecl.getBaseName());
276     }
277 
278     DOMElementImpl *elemImpl = (DOMElementImpl *) elem;
279     for (XMLSize_t index = 0; index < attrCount; ++index)
280     {
281         const XMLAttr* oneAttrib = attrList.elementAt(index);
282         unsigned int attrURIId = oneAttrib->getURIId();
283         const XMLCh* namespaceURI = 0;
284 
285         //for xmlns=...
286         if (XMLString::equals(oneAttrib->getName(), XMLUni::fgXMLNSString))
287             attrURIId = fScanner->getXMLNSNamespaceId();
288 
289         //TagName has a prefix
290         if (attrURIId != fScanner->getEmptyNamespaceId())
291             namespaceURI = fScanner->getURIText(attrURIId); //get namespaceURI
292 
293         //  revisit.  Optimize to init the named node map to the
294         //            right size up front.
295         DOMAttrImpl *attr = (DOMAttrImpl *)
296             fDocument->createAttributeNS(namespaceURI, oneAttrib->getQName());
297         attr->setValue(oneAttrib -> getValue());
298         DOMNode* remAttr = elemImpl->setAttributeNodeNS(attr);
299         if (remAttr)
300             remAttr->release();
301 
302         // Attributes of type ID.  If this is one, add it to the hashtable of IDs
303         //   that is constructed for use by GetElementByID().
304         if (oneAttrib->getType()==XMLAttDef::ID)
305         {
306             if (fDocument->fNodeIDMap == 0)
307                 fDocument->fNodeIDMap = new (fDocument) DOMNodeIDMap(500, fDocument);
308             fDocument->fNodeIDMap->add(attr);
309             attr->fNode.isIdAttr(true);
310         }
311 
312         attr->setSpecified(oneAttrib->getSpecified());
313     }
314 
315     // set up the default attributes
316     if (elemDecl.hasAttDefs())
317 	{
318         XMLAttDefList* defAttrs = &elemDecl.getAttDefList();
319         XMLAttDef* attr = 0;
320         DOMAttrImpl * insertAttr = 0;
321 
322         for (XMLSize_t i=0; i<defAttrs->getAttDefCount(); i++)
323         {
324             attr = &defAttrs->getAttDef(i);
325 
326             const XMLAttDef::DefAttTypes defType = attr->getDefaultType();
327             if ((defType == XMLAttDef::Default)
328             ||  (defType == XMLAttDef::Fixed))
329             {
330                 // DOM Level 2 wants all namespace declaration attributes
331                 // to be bound to "http://www.w3.org/2000/xmlns/"
332                 // So as long as the XML parser doesn't do it, it needs to
333                 // done here.
334                 const XMLCh* qualifiedName = attr->getFullName();
335                 XMLBufBid bbPrefixQName(&fBufMgr);
336                 XMLBuffer& prefixBuf = bbPrefixQName.getBuffer();
337                 int colonPos = -1;
338                 unsigned int uriId = fScanner->resolveQName(qualifiedName, prefixBuf, ElemStack::Mode_Attribute, colonPos);
339 
340                 const XMLCh* namespaceURI = 0;
341                 if (XMLString::equals(qualifiedName, XMLUni::fgXMLNSString))
342                     uriId = fScanner->getXMLNSNamespaceId();
343 
344                 //TagName has a prefix
345                 if (uriId != fScanner->getEmptyNamespaceId())
346                     namespaceURI = fScanner->getURIText(uriId);
347 
348                 insertAttr = (DOMAttrImpl *) fDocument->createAttributeNS(
349                     namespaceURI, qualifiedName);
350 
351                 DOMAttr* remAttr = elemImpl->setDefaultAttributeNodeNS(insertAttr);
352                 if (remAttr)
353                     remAttr->release();
354 
355                 if (attr->getValue() != 0)
356                 {
357                     insertAttr->setValue(attr->getValue());
358                     insertAttr->setSpecified(false);
359                 }
360             }
361 
362             insertAttr = 0;
363             attr->reset();
364         }
365     }
366 
367     fCurrentParent->appendChild(elem);
368     fCurrentParent = elem;
369     fCurrentNode = elem;
370     fWithinElement = true;
371 
372     // If an empty element, do end right now (no endElement() will be called)
373     if (isEmpty)
374         endElement(elemDecl, urlId, isRoot, elemPrefix);
375 }
376 
377 
378 
endElement(const XMLElementDecl & elemDecl,const unsigned int,const bool,const XMLCh * const)379 void XSDDOMParser::endElement( const XMLElementDecl& elemDecl
380                              , const unsigned int
381                              , const bool
382                              , const XMLCh* const)
383 {
384     if(fAnnotationDepth > -1)
385     {
386         if (fInnerAnnotationDepth == fDepth)
387         {
388             fInnerAnnotationDepth = -1;
389             endAnnotationElement(elemDecl, false);
390 	    }
391         else if (fAnnotationDepth == fDepth)
392         {
393             fAnnotationDepth = -1;
394             endAnnotationElement(elemDecl, true);
395         }
396         else
397         {   // inside a child of annotation
398             endAnnotationElement(elemDecl, false);
399             fDepth--;
400             return;
401         }
402     }
403 
404     fDepth--;
405     fCurrentNode   = fCurrentParent;
406     fCurrentParent = fCurrentNode->getParentNode ();
407 
408     // If we've hit the end of content, clear the flag.
409     //
410     if (fCurrentParent == fDocument)
411         fWithinElement = false;
412 }
413 
docCharacters(const XMLCh * const chars,const XMLSize_t length,const bool cdataSection)414 void XSDDOMParser::docCharacters(  const   XMLCh* const    chars
415                               , const XMLSize_t       length
416                               , const bool            cdataSection)
417 {
418     // Ignore chars outside of content
419     if (!fWithinElement)
420         return;
421 
422     if (fInnerAnnotationDepth == -1)
423     {
424         if (!((ReaderMgr*) fScanner->getReaderMgr())->getCurrentReader()->isAllSpaces(chars, length))
425         {
426             ReaderMgr::LastExtEntityInfo lastInfo;
427             fScanner->getReaderMgr()->getLastExtEntityInfo(lastInfo);
428             fXSLocator.setValues(lastInfo.systemId, lastInfo.publicId, lastInfo.lineNumber, lastInfo.colNumber);
429             fXSDErrorReporter.emitError(XMLValid::NonWSContent, XMLUni::fgValidityDomain, &fXSLocator);
430         }
431     }
432     // when it's within either of the 2 annotation subelements, characters are
433     // allowed and we need to store them.
434     else if (cdataSection == true)
435     {
436         fAnnotationBuf.append(XMLUni::fgCDataStart);
437         fAnnotationBuf.append(chars, length);
438         fAnnotationBuf.append(XMLUni::fgCDataEnd);
439     }
440     else
441     {
442         for(unsigned int i = 0; i < length; i++ )
443         {
444             if(chars[i] == chAmpersand)
445             {
446                 fAnnotationBuf.append(chAmpersand);
447                 fAnnotationBuf.append(XMLUni::fgAmp);
448                 fAnnotationBuf.append(chSemiColon);
449             }
450             else if (chars[i] == chOpenAngle)
451             {
452                 fAnnotationBuf.append(chAmpersand);
453                 fAnnotationBuf.append(XMLUni::fgLT);
454                 fAnnotationBuf.append(chSemiColon);
455             }
456             else {
457                 fAnnotationBuf.append(chars[i]);
458             }
459         }
460     }
461 }
462 
docComment(const XMLCh * const comment)463 void XSDDOMParser::docComment(const XMLCh* const comment)
464 {
465     if (fAnnotationDepth > -1)
466     {
467         fAnnotationBuf.append(XMLUni::fgCommentString);
468         fAnnotationBuf.append(comment);
469         fAnnotationBuf.append(chDash);
470         fAnnotationBuf.append(chDash);
471         fAnnotationBuf.append(chCloseAngle);
472     }
473 }
474 
startEntityReference(const XMLEntityDecl &)475 void XSDDOMParser::startEntityReference(const XMLEntityDecl&)
476 {
477 }
478 
endEntityReference(const XMLEntityDecl &)479 void XSDDOMParser::endEntityReference(const XMLEntityDecl&)
480 {
481 }
482 
ignorableWhitespace(const XMLCh * const chars,const XMLSize_t length,const bool)483 void XSDDOMParser::ignorableWhitespace( const XMLCh* const chars
484                                       , const XMLSize_t    length
485                                       , const bool)
486 {
487     // Ignore chars before the root element
488     if (!fWithinElement || !fIncludeIgnorableWhitespace)
489         return;
490 
491     if (fAnnotationDepth > -1)
492         fAnnotationBuf.append(chars, length);
493 }
494 
495 // ---------------------------------------------------------------------------
496 //  XSDDOMParser: Implementation of the XMLErrorReporter interface
497 // ---------------------------------------------------------------------------
error(const unsigned int code,const XMLCh * const msgDomain,const XMLErrorReporter::ErrTypes errType,const XMLCh * const errorText,const XMLCh * const systemId,const XMLCh * const publicId,const XMLFileLoc lineNum,const XMLFileLoc colNum)498 void XSDDOMParser::error(const   unsigned int                code
499                          , const XMLCh* const                msgDomain
500                          , const XMLErrorReporter::ErrTypes  errType
501                          , const XMLCh* const                errorText
502                          , const XMLCh* const                systemId
503                          , const XMLCh* const                publicId
504                          , const XMLFileLoc                  lineNum
505                          , const XMLFileLoc                  colNum)
506 {
507     if (errType >= XMLErrorReporter::ErrType_Fatal)
508         fSawFatal = true;
509 
510     if (fUserErrorReporter)
511         fUserErrorReporter->error(code, msgDomain, errType, errorText,
512                                   systemId, publicId, lineNum, colNum);
513 }
514 
515 InputSource*
resolveEntity(XMLResourceIdentifier * resourceIdentifier)516 XSDDOMParser::resolveEntity(XMLResourceIdentifier* resourceIdentifier)
517 {
518     if (fUserEntityHandler)
519         return fUserEntityHandler->resolveEntity(resourceIdentifier);
520 
521     return 0;
522 }
523 
524 XERCES_CPP_NAMESPACE_END
525