1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id$
20  */
21 
22 
23 // ---------------------------------------------------------------------------
24 //  Includes
25 // ---------------------------------------------------------------------------
26 #include <string.h>
27 #include <xercesc/util/RuntimeException.hpp>
28 #include <xercesc/framework/XMLElementDecl.hpp>
29 #include <xercesc/validators/common/ContentSpecNode.hpp>
30 #include <xercesc/validators/common/MixedContentModel.hpp>
31 #include <xercesc/validators/common/CMStateSet.hpp>
32 #include <xercesc/validators/common/Grammar.hpp>
33 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
34 
35 XERCES_CPP_NAMESPACE_BEGIN
36 
37 // ---------------------------------------------------------------------------
38 //  MixedContentModel: Constructors and Destructor
39 // ---------------------------------------------------------------------------
MixedContentModel(const bool dtd,ContentSpecNode * const parentContentSpec,const bool ordered,MemoryManager * const manager)40 MixedContentModel::MixedContentModel(const bool             dtd
41                                    , ContentSpecNode* const parentContentSpec
42                                    , const bool             ordered
43                                    , MemoryManager* const   manager) :
44    fCount(0)
45  , fChildren(0)
46  , fChildTypes(0)
47  , fOrdered(ordered)
48  , fDTD(dtd)
49  , fMemoryManager(manager)
50 {
51     //
52     //  Create a vector of unsigned ints that will be filled in with the
53     //  ids of the child nodes. It will be expanded as needed but we give
54     //  it an initial capacity of 64 which should be more than enough for
55     //  99% of the scenarios.
56     //
57     ValueVectorOf<QName*> children(64, fMemoryManager);
58     ValueVectorOf<ContentSpecNode::NodeTypes> childTypes(64, fMemoryManager);
59 
60     //
61     //  Get the parent element's content spec. This is the head of the tree
62     //  of nodes that describes the content model. We will iterate this
63     //  tree.
64     //
65     ContentSpecNode* curNode = parentContentSpec;
66     if (!curNode)
67         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_NoParentCSN, fMemoryManager);
68 
69     // And now call the private recursive method that iterates the tree
70     buildChildList(curNode, children, childTypes);
71 
72     //
73     //  And now we know how many elements we need in our member list. So
74     //  fill them in.
75     //
76     fCount = children.size();
77     fChildren = (QName**) fMemoryManager->allocate(fCount * sizeof(QName*)); //new QName*[fCount];
78     fChildTypes = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
79     (
80         fCount * sizeof(ContentSpecNode::NodeTypes)
81     ); //new ContentSpecNode::NodeTypes[fCount];
82     for (XMLSize_t index = 0; index < fCount; index++) {
83         fChildren[index] = new (fMemoryManager) QName(*children.elementAt(index));
84         fChildTypes[index] = childTypes.elementAt(index);
85     }
86 }
87 
~MixedContentModel()88 MixedContentModel::~MixedContentModel()
89 {
90     for (XMLSize_t index = 0; index < fCount; index++) {
91         delete fChildren[index];
92     }
93     fMemoryManager->deallocate(fChildren); //delete [] fChildren;
94     fMemoryManager->deallocate(fChildTypes); //delete [] fChildTypes;
95 }
96 
97 
98 // ---------------------------------------------------------------------------
99 //  MixedContentModel: Getter methods
100 // ---------------------------------------------------------------------------
hasDups() const101 bool MixedContentModel::hasDups() const
102 {
103     // Can't have dups if only one child
104     if (fCount == 1)
105         return false;
106 
107     for (XMLSize_t index = 0; index < fCount; index++)
108     {
109         const QName* curVal = fChildren[index];
110         for (XMLSize_t iIndex = 0; iIndex < fCount; iIndex++)
111         {
112             if (iIndex == index)
113                 continue;
114 
115             if (fDTD) {
116                 if (XMLString::equals(curVal->getRawName(), fChildren[iIndex]->getRawName())) {
117                     return true;
118                 }
119             }
120             else {
121                 if ((curVal->getURI() == fChildren[iIndex]->getURI()) &&
122                     (XMLString::equals(curVal->getLocalPart(), fChildren[iIndex]->getLocalPart()))) {
123                     return true;
124                 }
125             }
126         }
127     }
128     return false;
129 }
130 
131 
132 // ---------------------------------------------------------------------------
133 //  MixedContentModel: Implementation of the ContentModel virtual interface
134 // ---------------------------------------------------------------------------
135 //
136 //Under the XML Schema mixed model,
137 //the order and number of child elements appearing in an instance
138 //must agree with
139 //the order and number of child elements specified in the model.
140 //
141 bool
validateContent(QName ** const children,XMLSize_t childCount,unsigned int,XMLSize_t * indexFailingChild,MemoryManager * const) const142 MixedContentModel::validateContent( QName** const         children
143                                   , XMLSize_t             childCount
144                                   , unsigned int
145                                   , XMLSize_t*            indexFailingChild
146                                   , MemoryManager*    const) const
147 {
148     // must match order
149     if (fOrdered) {
150         unsigned int inIndex = 0;
151         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
152 
153             // Get the current child out of the source index
154             const QName* curChild = children[outIndex];
155 
156             // If its PCDATA, then we just accept that
157             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
158                 continue;
159 
160             ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
161             const QName* inChild = fChildren[inIndex];
162 
163             if (type == ContentSpecNode::Leaf) {
164                 if (fDTD) {
165                     if (!XMLString::equals(inChild->getRawName(), curChild->getRawName())) {
166                         *indexFailingChild=outIndex;
167                         return false;
168                     }
169                 }
170                 else {
171                     if ((inChild->getURI() != curChild->getURI()) ||
172                         (!XMLString::equals(inChild->getLocalPart(), curChild->getLocalPart()))) {
173                         *indexFailingChild=outIndex;
174                         return false;
175                     }
176                 }
177             }
178             else if (type == ContentSpecNode::Any) {
179             }
180             else if (type == ContentSpecNode::Any_NS) {
181                 if (inChild->getURI() != curChild->getURI())
182                 {
183                     *indexFailingChild=outIndex;
184                     return false;
185                 }
186             }
187             else if (type == ContentSpecNode::Any_Other)
188             {
189                 // Here we assume that empty string has id 1.
190                 //
191                 unsigned int uriId = curChild->getURI();
192                 if (uriId == 1 || uriId == inChild->getURI())
193                 {
194                     *indexFailingChild=outIndex;
195                     return false;
196                 }
197             }
198 
199             // advance index
200             inIndex++;
201         }
202     }
203 
204     // can appear in any order
205     else {
206         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
207             // Get the current child out of the source index
208             const QName* curChild = children[outIndex];
209 
210             // If its PCDATA, then we just accept that
211             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
212                 continue;
213 
214             // And try to find it in our list
215             unsigned int inIndex = 0;
216             for (; inIndex < fCount; inIndex++)
217             {
218                 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
219                 const QName* inChild = fChildren[inIndex];
220 
221                 if (type == ContentSpecNode::Leaf) {
222                     if (fDTD) {
223                         if (XMLString::equals(inChild->getRawName(), curChild->getRawName())) {
224                             break;
225                         }
226                     }
227                     else {
228                         if ((inChild->getURI() == curChild->getURI()) &&
229                             (XMLString::equals(inChild->getLocalPart(), curChild->getLocalPart()))) {
230                             break;
231                         }
232                     }
233                 }
234                 else if (type == ContentSpecNode::Any) {
235                     break;
236                 }
237                 else if (type == ContentSpecNode::Any_NS) {
238                     if (inChild->getURI() == curChild->getURI())
239                         break;
240                 }
241                 else if (type == ContentSpecNode::Any_Other)
242                 {
243                     // Here we assume that empty string has id 1.
244                     //
245                     unsigned int uriId = curChild->getURI();
246                     if (uriId != 1 && uriId != inChild->getURI())
247                         break;
248                 }
249 
250                 // REVISIT: What about checking for multiple ANY matches?
251                 //          The content model ambiguity *could* be checked
252                 //          by the caller before constructing the mixed
253                 //          content model.
254             }
255             // We did not find this one, so the validation failed
256             if (inIndex == fCount)
257             {
258                 *indexFailingChild=outIndex;
259                 return false;
260             }
261         }
262     }
263 
264     // Everything seems to be in order, so return success
265     return true;
266 }
267 
268 
validateContentSpecial(QName ** const children,XMLSize_t childCount,unsigned int,GrammarResolver * const pGrammarResolver,XMLStringPool * const pStringPool,XMLSize_t * indexFailingChild,MemoryManager * const) const269 bool MixedContentModel::validateContentSpecial(QName** const          children
270                                             , XMLSize_t               childCount
271                                             , unsigned int
272                                             , GrammarResolver*  const pGrammarResolver
273                                             , XMLStringPool*    const pStringPool
274                                             , XMLSize_t*              indexFailingChild
275                                             , MemoryManager*    const) const
276 {
277 
278     SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);
279 
280     // must match order
281     if (fOrdered) {
282         unsigned int inIndex = 0;
283         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
284 
285             // Get the current child out of the source index
286             QName* curChild = children[outIndex];
287 
288             // If its PCDATA, then we just accept that
289             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
290                 continue;
291 
292             ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
293             QName* inChild = fChildren[inIndex];
294 
295             if (type == ContentSpecNode::Leaf) {
296                 if ( !comparator.isEquivalentTo(curChild, inChild))
297                 {
298                     *indexFailingChild=outIndex;
299                     return false;
300                 }
301             }
302             else if (type == ContentSpecNode::Any) {
303             }
304             else if (type == ContentSpecNode::Any_NS) {
305                 if (inChild->getURI() != curChild->getURI())
306                 {
307                     *indexFailingChild=outIndex;
308                     return false;
309                 }
310             }
311             else if (type == ContentSpecNode::Any_Other)
312             {
313                 // Here we assume that empty string has id 1.
314                 //
315                 unsigned int uriId = curChild->getURI();
316                 if (uriId == 1 || uriId == inChild->getURI())
317                 {
318                     *indexFailingChild=outIndex;
319                     return false;
320                 }
321             }
322 
323             // advance index
324             inIndex++;
325         }
326     }
327 
328     // can appear in any order
329     else {
330         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
331             // Get the current child out of the source index
332             QName* curChild = children[outIndex];
333 
334             // If its PCDATA, then we just accept that
335             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
336                 continue;
337 
338             // And try to find it in our list
339             unsigned int inIndex = 0;
340             for (; inIndex < fCount; inIndex++)
341             {
342                 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
343                 QName* inChild = fChildren[inIndex];
344 
345                 if (type == ContentSpecNode::Leaf) {
346                     if ( comparator.isEquivalentTo(curChild, inChild))
347                         break;
348                 }
349                 else if (type == ContentSpecNode::Any) {
350                     break;
351                 }
352                 else if (type == ContentSpecNode::Any_NS) {
353                     if (inChild->getURI() == curChild->getURI())
354                         break;
355                 }
356                 else if (type == ContentSpecNode::Any_Other)
357                 {
358                   // Here we assume that empty string has id 1.
359                   //
360                   unsigned int uriId = curChild->getURI();
361                   if (uriId != 1 && uriId != inChild->getURI())
362                     break;
363                 }
364 
365                 // REVISIT: What about checking for multiple ANY matches?
366                 //          The content model ambiguity *could* be checked
367                 //          by the caller before constructing the mixed
368                 //          content model.
369             }
370             // We did not find this one, so the validation failed
371             if (inIndex == fCount)
372             {
373                 *indexFailingChild=outIndex;
374                 return false;
375             }
376         }
377     }
378 
379     // Everything seems to be in order, so return success
380     return true;
381 }
382 
383 // ---------------------------------------------------------------------------
384 //  MixedContentModel: Private helper methods
385 // ---------------------------------------------------------------------------
386 void
buildChildList(ContentSpecNode * const curNode,ValueVectorOf<QName * > & toFill,ValueVectorOf<ContentSpecNode::NodeTypes> & toType)387 MixedContentModel::buildChildList(  ContentSpecNode* const       curNode
388                                   , ValueVectorOf<QName*>&       toFill
389                                   , ValueVectorOf<ContentSpecNode::NodeTypes>& toType)
390 {
391     // Get the type of spec node our current node is
392     const ContentSpecNode::NodeTypes curType = curNode->getType();
393 
394     // If its a leaf, then store its id in the target list
395     if ((curType == ContentSpecNode::Leaf)      ||
396         (curType == ContentSpecNode::Any)       ||
397         (curType == ContentSpecNode::Any_Other) ||
398         (curType == ContentSpecNode::Any_NS)   )
399     {
400         toFill.addElement(curNode->getElement());
401         toType.addElement(curType);
402         return;
403     }
404 
405     // Get both the child node pointers
406     ContentSpecNode* leftNode = curNode->getFirst();
407     ContentSpecNode* rightNode = curNode->getSecond();
408 
409     // And recurse according to the type of node
410     if (((curType & 0x0f) == ContentSpecNode::Choice)
411     ||  ((curType & 0x0f) == ContentSpecNode::Sequence))
412     {
413         // Recurse on the left and right nodes
414         buildChildList(leftNode, toFill, toType);
415 
416         // The last node of a choice or sequence has a null right
417         if (rightNode)
418             buildChildList(rightNode, toFill, toType);
419     }
420     else if ((curType == ContentSpecNode::OneOrMore)
421          ||  (curType == ContentSpecNode::ZeroOrOne)
422          ||  (curType == ContentSpecNode::ZeroOrMore))
423     {
424         // Just do the left node on this one
425         buildChildList(leftNode, toFill, toType);
426     }
427 }
428 
429 XERCES_CPP_NAMESPACE_END
430