1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id: MixedContentModel.cpp 676911 2008-07-15 13:27:32Z amassari $
20 */
21
22
23 // ---------------------------------------------------------------------------
24 // Includes
25 // ---------------------------------------------------------------------------
26 #include <string.h>
27 #include <xercesc/util/RuntimeException.hpp>
28 #include <xercesc/framework/XMLElementDecl.hpp>
29 #include <xercesc/validators/common/ContentSpecNode.hpp>
30 #include <xercesc/validators/common/MixedContentModel.hpp>
31 #include <xercesc/validators/common/CMStateSet.hpp>
32 #include <xercesc/validators/common/Grammar.hpp>
33 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
34
35 XERCES_CPP_NAMESPACE_BEGIN
36
37 // ---------------------------------------------------------------------------
38 // MixedContentModel: Constructors and Destructor
39 // ---------------------------------------------------------------------------
MixedContentModel(const bool dtd,ContentSpecNode * const parentContentSpec,const bool ordered,MemoryManager * const manager)40 MixedContentModel::MixedContentModel(const bool dtd
41 , ContentSpecNode* const parentContentSpec
42 , const bool ordered
43 , MemoryManager* const manager) :
44 fCount(0)
45 , fChildren(0)
46 , fChildTypes(0)
47 , fOrdered(ordered)
48 , fDTD(dtd)
49 , fMemoryManager(manager)
50 {
51 //
52 // Create a vector of unsigned ints that will be filled in with the
53 // ids of the child nodes. It will be expanded as needed but we give
54 // it an initial capacity of 64 which should be more than enough for
55 // 99% of the scenarios.
56 //
57 ValueVectorOf<QName*> children(64, fMemoryManager);
58 ValueVectorOf<ContentSpecNode::NodeTypes> childTypes(64, fMemoryManager);
59
60 //
61 // Get the parent element's content spec. This is the head of the tree
62 // of nodes that describes the content model. We will iterate this
63 // tree.
64 //
65 ContentSpecNode* curNode = parentContentSpec;
66 if (!curNode)
67 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_NoParentCSN, fMemoryManager);
68
69 // And now call the private recursive method that iterates the tree
70 buildChildList(curNode, children, childTypes);
71
72 //
73 // And now we know how many elements we need in our member list. So
74 // fill them in.
75 //
76 fCount = children.size();
77 fChildren = (QName**) fMemoryManager->allocate(fCount * sizeof(QName*)); //new QName*[fCount];
78 fChildTypes = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
79 (
80 fCount * sizeof(ContentSpecNode::NodeTypes)
81 ); //new ContentSpecNode::NodeTypes[fCount];
82 for (XMLSize_t index = 0; index < fCount; index++) {
83 fChildren[index] = new (fMemoryManager) QName(*children.elementAt(index));
84 fChildTypes[index] = childTypes.elementAt(index);
85 }
86 }
87
~MixedContentModel()88 MixedContentModel::~MixedContentModel()
89 {
90 for (XMLSize_t index = 0; index < fCount; index++) {
91 delete fChildren[index];
92 }
93 fMemoryManager->deallocate(fChildren); //delete [] fChildren;
94 fMemoryManager->deallocate(fChildTypes); //delete [] fChildTypes;
95 }
96
97
98 // ---------------------------------------------------------------------------
99 // MixedContentModel: Getter methods
100 // ---------------------------------------------------------------------------
hasDups() const101 bool MixedContentModel::hasDups() const
102 {
103 // Can't have dups if only one child
104 if (fCount == 1)
105 return false;
106
107 for (XMLSize_t index = 0; index < fCount; index++)
108 {
109 const QName* curVal = fChildren[index];
110 for (XMLSize_t iIndex = 0; iIndex < fCount; iIndex++)
111 {
112 if (iIndex == index)
113 continue;
114
115 if (fDTD) {
116 if (XMLString::equals(curVal->getRawName(), fChildren[iIndex]->getRawName())) {
117 return true;
118 }
119 }
120 else {
121 if ((curVal->getURI() == fChildren[iIndex]->getURI()) &&
122 (XMLString::equals(curVal->getLocalPart(), fChildren[iIndex]->getLocalPart()))) {
123 return true;
124 }
125 }
126 }
127 }
128 return false;
129 }
130
131
132 // ---------------------------------------------------------------------------
133 // MixedContentModel: Implementation of the ContentModel virtual interface
134 // ---------------------------------------------------------------------------
135 //
136 //Under the XML Schema mixed model,
137 //the order and number of child elements appearing in an instance
138 //must agree with
139 //the order and number of child elements specified in the model.
140 //
141 bool
validateContent(QName ** const children,XMLSize_t childCount,unsigned int,XMLSize_t * indexFailingChild,MemoryManager * const) const142 MixedContentModel::validateContent( QName** const children
143 , XMLSize_t childCount
144 , unsigned int
145 , XMLSize_t* indexFailingChild
146 , MemoryManager* const) const
147 {
148 // must match order
149 if (fOrdered) {
150 unsigned int inIndex = 0;
151 for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
152
153 // Get the current child out of the source index
154 const QName* curChild = children[outIndex];
155
156 // If its PCDATA, then we just accept that
157 if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
158 continue;
159
160 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
161 const QName* inChild = fChildren[inIndex];
162
163 if (type == ContentSpecNode::Leaf) {
164 if (fDTD) {
165 if (!XMLString::equals(inChild->getRawName(), curChild->getRawName())) {
166 *indexFailingChild=outIndex;
167 return false;
168 }
169 }
170 else {
171 if ((inChild->getURI() != curChild->getURI()) ||
172 (!XMLString::equals(inChild->getLocalPart(), curChild->getLocalPart()))) {
173 *indexFailingChild=outIndex;
174 return false;
175 }
176 }
177 }
178 else if (type == ContentSpecNode::Any) {
179 }
180 else if (type == ContentSpecNode::Any_NS) {
181 if (inChild->getURI() != curChild->getURI())
182 {
183 *indexFailingChild=outIndex;
184 return false;
185 }
186 }
187 else if (type == ContentSpecNode::Any_Other)
188 {
189 // Here we assume that empty string has id 1.
190 //
191 unsigned int uriId = curChild->getURI();
192 if (uriId == 1 || uriId == inChild->getURI())
193 {
194 *indexFailingChild=outIndex;
195 return false;
196 }
197 }
198
199 // advance index
200 inIndex++;
201 }
202 }
203
204 // can appear in any order
205 else {
206 for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
207 // Get the current child out of the source index
208 const QName* curChild = children[outIndex];
209
210 // If its PCDATA, then we just accept that
211 if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
212 continue;
213
214 // And try to find it in our list
215 unsigned int inIndex = 0;
216 for (; inIndex < fCount; inIndex++)
217 {
218 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
219 const QName* inChild = fChildren[inIndex];
220
221 if (type == ContentSpecNode::Leaf) {
222 if (fDTD) {
223 if (XMLString::equals(inChild->getRawName(), curChild->getRawName())) {
224 break;
225 }
226 }
227 else {
228 if ((inChild->getURI() == curChild->getURI()) &&
229 (XMLString::equals(inChild->getLocalPart(), curChild->getLocalPart()))) {
230 break;
231 }
232 }
233 }
234 else if (type == ContentSpecNode::Any) {
235 break;
236 }
237 else if (type == ContentSpecNode::Any_NS) {
238 if (inChild->getURI() == curChild->getURI())
239 break;
240 }
241 else if (type == ContentSpecNode::Any_Other)
242 {
243 // Here we assume that empty string has id 1.
244 //
245 unsigned int uriId = curChild->getURI();
246 if (uriId != 1 && uriId != inChild->getURI())
247 break;
248 }
249
250 // REVISIT: What about checking for multiple ANY matches?
251 // The content model ambiguity *could* be checked
252 // by the caller before constructing the mixed
253 // content model.
254 }
255 // We did not find this one, so the validation failed
256 if (inIndex == fCount)
257 {
258 *indexFailingChild=outIndex;
259 return false;
260 }
261 }
262 }
263
264 // Everything seems to be in order, so return success
265 return true;
266 }
267
268
validateContentSpecial(QName ** const children,XMLSize_t childCount,unsigned int,GrammarResolver * const pGrammarResolver,XMLStringPool * const pStringPool,XMLSize_t * indexFailingChild,MemoryManager * const) const269 bool MixedContentModel::validateContentSpecial(QName** const children
270 , XMLSize_t childCount
271 , unsigned int
272 , GrammarResolver* const pGrammarResolver
273 , XMLStringPool* const pStringPool
274 , XMLSize_t* indexFailingChild
275 , MemoryManager* const) const
276 {
277
278 SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);
279
280 // must match order
281 if (fOrdered) {
282 unsigned int inIndex = 0;
283 for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
284
285 // Get the current child out of the source index
286 QName* curChild = children[outIndex];
287
288 // If its PCDATA, then we just accept that
289 if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
290 continue;
291
292 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
293 QName* inChild = fChildren[inIndex];
294
295 if (type == ContentSpecNode::Leaf) {
296 if ( !comparator.isEquivalentTo(curChild, inChild))
297 {
298 *indexFailingChild=outIndex;
299 return false;
300 }
301 }
302 else if (type == ContentSpecNode::Any) {
303 }
304 else if (type == ContentSpecNode::Any_NS) {
305 if (inChild->getURI() != curChild->getURI())
306 {
307 *indexFailingChild=outIndex;
308 return false;
309 }
310 }
311 else if (type == ContentSpecNode::Any_Other)
312 {
313 // Here we assume that empty string has id 1.
314 //
315 unsigned int uriId = curChild->getURI();
316 if (uriId == 1 || uriId == inChild->getURI())
317 {
318 *indexFailingChild=outIndex;
319 return false;
320 }
321 }
322
323 // advance index
324 inIndex++;
325 }
326 }
327
328 // can appear in any order
329 else {
330 for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
331 // Get the current child out of the source index
332 QName* curChild = children[outIndex];
333
334 // If its PCDATA, then we just accept that
335 if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
336 continue;
337
338 // And try to find it in our list
339 unsigned int inIndex = 0;
340 for (; inIndex < fCount; inIndex++)
341 {
342 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
343 QName* inChild = fChildren[inIndex];
344
345 if (type == ContentSpecNode::Leaf) {
346 if ( comparator.isEquivalentTo(curChild, inChild))
347 break;
348 }
349 else if (type == ContentSpecNode::Any) {
350 break;
351 }
352 else if (type == ContentSpecNode::Any_NS) {
353 if (inChild->getURI() == curChild->getURI())
354 break;
355 }
356 else if (type == ContentSpecNode::Any_Other)
357 {
358 // Here we assume that empty string has id 1.
359 //
360 unsigned int uriId = curChild->getURI();
361 if (uriId != 1 && uriId != inChild->getURI())
362 break;
363 }
364
365 // REVISIT: What about checking for multiple ANY matches?
366 // The content model ambiguity *could* be checked
367 // by the caller before constructing the mixed
368 // content model.
369 }
370 // We did not find this one, so the validation failed
371 if (inIndex == fCount)
372 {
373 *indexFailingChild=outIndex;
374 return false;
375 }
376 }
377 }
378
379 // Everything seems to be in order, so return success
380 return true;
381 }
382
383 // ---------------------------------------------------------------------------
384 // MixedContentModel: Private helper methods
385 // ---------------------------------------------------------------------------
386 void
buildChildList(ContentSpecNode * const curNode,ValueVectorOf<QName * > & toFill,ValueVectorOf<ContentSpecNode::NodeTypes> & toType)387 MixedContentModel::buildChildList( ContentSpecNode* const curNode
388 , ValueVectorOf<QName*>& toFill
389 , ValueVectorOf<ContentSpecNode::NodeTypes>& toType)
390 {
391 // Get the type of spec node our current node is
392 const ContentSpecNode::NodeTypes curType = curNode->getType();
393
394 // If its a leaf, then store its id in the target list
395 if ((curType == ContentSpecNode::Leaf) ||
396 (curType == ContentSpecNode::Any) ||
397 (curType == ContentSpecNode::Any_Other) ||
398 (curType == ContentSpecNode::Any_NS) )
399 {
400 toFill.addElement(curNode->getElement());
401 toType.addElement(curType);
402 return;
403 }
404
405 // Get both the child node pointers
406 ContentSpecNode* leftNode = curNode->getFirst();
407 ContentSpecNode* rightNode = curNode->getSecond();
408
409 // And recurse according to the type of node
410 if (((curType & 0x0f) == ContentSpecNode::Choice)
411 || ((curType & 0x0f) == ContentSpecNode::Sequence))
412 {
413 // Recurse on the left and right nodes
414 buildChildList(leftNode, toFill, toType);
415
416 // The last node of a choice or sequence has a null right
417 if (rightNode)
418 buildChildList(rightNode, toFill, toType);
419 }
420 else if ((curType == ContentSpecNode::OneOrMore)
421 || (curType == ContentSpecNode::ZeroOrOne)
422 || (curType == ContentSpecNode::ZeroOrMore))
423 {
424 // Just do the left node on this one
425 buildChildList(leftNode, toFill, toType);
426 }
427 }
428
429 XERCES_CPP_NAMESPACE_END
430