1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id: DTDValidator.cpp 729944 2008-12-29 17:03:32Z amassari $
20 */
21
22
23 // ---------------------------------------------------------------------------
24 // Includes
25 // ---------------------------------------------------------------------------
26 #include <xercesc/util/Janitor.hpp>
27 #include <xercesc/util/XMLUniDefs.hpp>
28 #include <xercesc/util/XMLUni.hpp>
29 #include <xercesc/internal/ReaderMgr.hpp>
30 #include <xercesc/internal/XMLScanner.hpp>
31 #include <xercesc/validators/DTD/DTDValidator.hpp>
32
33 XERCES_CPP_NAMESPACE_BEGIN
34
35 // ---------------------------------------------------------------------------
36 // DTDValidator: Constructors and Destructor
37 // ---------------------------------------------------------------------------
DTDValidator(XMLErrorReporter * const errReporter)38 DTDValidator::DTDValidator(XMLErrorReporter* const errReporter) :
39
40 XMLValidator(errReporter)
41 , fDTDGrammar(0)
42 {
43 reset();
44 }
45
~DTDValidator()46 DTDValidator::~DTDValidator()
47 {
48 }
49
50
51 // ---------------------------------------------------------------------------
52 // DTDValidator: Implementation of the XMLValidator interface
53 // ---------------------------------------------------------------------------
checkContent(XMLElementDecl * const elemDecl,QName ** const children,XMLSize_t childCount,XMLSize_t * indexFailingChild)54 bool DTDValidator::checkContent(XMLElementDecl* const elemDecl
55 , QName** const children
56 , XMLSize_t childCount
57 , XMLSize_t* indexFailingChild)
58 {
59 //
60 // Look up the element id in our element decl pool. This will get us
61 // the element decl in our own way of looking at them.
62 //
63 if (!elemDecl)
64 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_InvalidElemId, getScanner()->getMemoryManager());
65
66 //
67 // Get the content spec type of this element. This will tell us what
68 // to do to validate it.
69 //
70 const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType();
71
72 if (modelType == DTDElementDecl::Empty)
73 {
74 //
75 // We can do this one here. It cannot have any children. If it does
76 // we return 0 as the index of the first bad child.
77 //
78 if (childCount)
79 {
80 *indexFailingChild=0;
81 return false;
82 }
83 }
84 else if (modelType == DTDElementDecl::Any)
85 {
86 // We pass no judgement on this one, anything goes
87 }
88 else if ((modelType == DTDElementDecl::Mixed_Simple)
89 || (modelType == DTDElementDecl::Children))
90 {
91 // Get the element's content model or fault it in
92 const XMLContentModel* elemCM = elemDecl->getContentModel();
93
94 // Ask it to validate and return its return
95 return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId(), indexFailingChild, getScanner()->getMemoryManager());
96 }
97 else
98 {
99 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMType, getScanner()->getMemoryManager());
100 }
101
102 // Went ok, so return success
103 return true;
104 }
105
106
faultInAttr(XMLAttr & toFill,const XMLAttDef & attDef) const107 void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const
108 {
109 toFill.set(0, attDef.getFullName(), attDef.getValue(), attDef.getType());
110 }
111
reset()112 void DTDValidator::reset()
113 {
114 }
115
116
requiresNamespaces() const117 bool DTDValidator::requiresNamespaces() const
118 {
119 // Namespaces are not supported for DTDs
120 return false;
121 }
122
123
124 void
validateAttrValue(const XMLAttDef * attDef,const XMLCh * const attrValue,bool preValidation,const XMLElementDecl *)125 DTDValidator::validateAttrValue(const XMLAttDef* attDef
126 , const XMLCh* const attrValue
127 , bool preValidation
128 , const XMLElementDecl*)
129 {
130 //
131 // Get quick refs to lost of of the stuff in the passed objects in
132 // order to simplify the code below, which will reference them very
133 // often.
134 //
135 const XMLAttDef::AttTypes type = attDef->getType();
136 const XMLAttDef::DefAttTypes defType = attDef->getDefaultType();
137 const XMLCh* const valueText = attDef->getValue();
138 const XMLCh* const fullName = attDef->getFullName();
139 const XMLCh* const enumList = attDef->getEnumeration();
140
141 //
142 // If the default type is fixed, then make sure the passed value maps
143 // to the fixed value.
144 // If during preContentValidation, the value we are validating is the fixed value itself
145 // so no need to compare.
146 // Only need to do this for regular attribute value validation
147 //
148 if (defType == XMLAttDef::Fixed && !preValidation)
149 {
150 if (!XMLString::equals(attrValue, valueText))
151 emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText);
152 }
153
154 //
155 // If its a CDATA attribute, then we are done with any DTD level
156 // validation else do the rest.
157 //
158 if (type == XMLAttDef::CData)
159 return;
160
161
162
163 // An empty string cannot be valid for any of the other types
164 if (!attrValue[0])
165 {
166 emitError(XMLValid::InvalidEmptyAttValue, fullName);
167 return;
168 }
169
170 // See whether we are doing multiple values or not
171 const bool multipleValues =
172 (
173 (type == XMLAttDef::IDRefs)
174 || (type == XMLAttDef::Entities)
175 || (type == XMLAttDef::NmTokens)
176 || (type == XMLAttDef::Notation)
177 || (type == XMLAttDef::Enumeration)
178 );
179
180 // And whether we must check for a first name char
181 const bool firstNameChar =
182 (
183 (type == XMLAttDef::ID)
184 || (type == XMLAttDef::IDRef)
185 || (type == XMLAttDef::IDRefs)
186 || (type == XMLAttDef::Entity)
187 || (type == XMLAttDef::Entities)
188 || (type == XMLAttDef::Notation)
189 );
190
191 // Whether it requires ref checking stuff
192 const bool isARefType
193 (
194 (type == XMLAttDef::ID)
195 || (type == XMLAttDef::IDRef)
196 || (type == XMLAttDef::IDRefs)
197 );
198
199 // Some trigger flags to avoid issuing redundant errors and whatnot
200 bool alreadyCapped = false;
201
202 //
203 // Make a copy of the text that we can mangle and get a pointer we can
204 // move through the value
205 //
206
207 // Use a stack-based buffer, when possible...
208 XMLCh tempBuffer[100];
209
210 XMLCh* pszTmpVal = 0;
211
212 ArrayJanitor<XMLCh> janTmpVal(0);
213
214 if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
215 {
216 XMLString::copyString(tempBuffer, attrValue);
217 pszTmpVal = tempBuffer;
218 }
219 else
220 {
221 janTmpVal.reset(XMLString::replicate(attrValue, getScanner()->getMemoryManager()), getScanner()->getMemoryManager());
222 pszTmpVal = janTmpVal.get();
223 }
224
225 XMLCh* valPtr = pszTmpVal;
226
227 bool doNamespace = getScanner()->getDoNamespaces();
228
229 while (true)
230 {
231 //
232 // Make sure the first character is a valid first name char, i.e.
233 // if its a Name value. For NmToken values we don't treat the first
234 // char any differently.
235 //
236 if (firstNameChar)
237 {
238 // If its not, emit and error but try to keep going
239 if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr))
240 emitError(XMLValid::AttrValNotName, valPtr, fullName);
241 valPtr++;
242 }
243
244 // Make sure all the remaining chars are valid name chars
245 while (*valPtr)
246 {
247 //
248 // If we hit a whitespace, its either a break between two
249 // or more values, or an error if we have a single value.
250 //
251 //
252 // XML1.0-3rd
253 //
254 // [6] Names ::= Name (#x20 Name)*
255 // [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
256 //
257 // only and only ONE #x20 is allowed to be the delimiter
258 //
259 if (*valPtr==chSpace)
260 {
261 if (!multipleValues)
262 {
263 emitError(XMLValid::NoMultipleValues, fullName);
264 return;
265 }
266
267 break;
268 }
269
270 // Now this attribute can be of type
271 // ID, IDREF, IDREFS, ENTITY, ENTITIES, NOTATION, NMTOKEN, NMTOKENS, ENUMERATION
272 // All these must be valid XMLName
273 // If namespace is enabled, colon is not allowed in the first 6
274
275 if (doNamespace && *valPtr == chColon && firstNameChar)
276 emitError(XMLValid::ColonNotValidWithNS);
277
278 if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr))
279 {
280 emitError(XMLValid::AttrValNotName, valPtr, fullName);
281 return;
282 }
283 valPtr++;
284 }
285
286 //
287 // Cap it off at the current non-name char. If already capped,
288 // then remember this.
289 //
290 if (!(*valPtr))
291 alreadyCapped = true;
292 *valPtr = 0;
293
294 //
295 // If this type of attribute requires that we track reference
296 // stuff, then handle that.
297 //
298 if (isARefType)
299 {
300 if ((type == XMLAttDef::ID)
301 || (type == XMLAttDef::IDRef)
302 || (type == XMLAttDef::IDRefs))
303 {
304 XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal);
305 if (find)
306 {
307 if (find->getDeclared() && (type == XMLAttDef::ID))
308 emitError(XMLValid::ReusedIDValue, pszTmpVal);
309 }
310 else
311 {
312 find = new (getScanner()->getMemoryManager()) XMLRefInfo
313 (
314 pszTmpVal
315 , false
316 , false
317 , getScanner()->getMemoryManager()
318 );
319 getScanner()->getIDRefList()->put((void*)find->getRefName(), find);
320 }
321
322 //
323 // Mark it declared or used, which might be redundant in some cases
324 // but not worth checking
325 //
326 if (type == XMLAttDef::ID)
327 find->setDeclared(true);
328 else {
329 if (!preValidation) {
330 find->setUsed(true);
331 }
332 }
333 }
334 }
335 else if (!preValidation && ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities)))
336 {
337 //
338 // If its refering to a entity, then look up the name in the
339 // general entity pool. If not there, then its an error. If its
340 // not an external unparsed entity, then its an error.
341 //
342 // In case of pre-validation, the above errors should be ignored.
343 //
344 const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal);
345 if (decl)
346 {
347 if (!decl->isUnparsed())
348 emitError(XMLValid::BadEntityRefAttr, pszTmpVal, fullName);
349 }
350 else
351 {
352 emitError
353 (
354 XMLValid::UnknownEntityRefAttr
355 , fullName
356 , pszTmpVal
357 );
358 }
359 }
360 else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration))
361 {
362 //
363 // Make sure that this value maps to one of the enumeration or
364 // notation values in the enumList parameter. We don't have to
365 // look it up in the notation pool (if a notation) because we
366 // will look up the enumerated values themselves. If they are in
367 // the notation pool (after the DTD is parsed), then obviously
368 // this value will be legal since it matches one of them.
369 //
370 if (!XMLString::isInList(pszTmpVal, enumList))
371 emitError(XMLValid::DoesNotMatchEnumList, pszTmpVal, fullName);
372 }
373
374 // If not doing multiple values, then we are done
375 if (!multipleValues)
376 break;
377
378 //
379 // If we are at the end, then break out now, else move up to the
380 // next char and update the base pointer.
381 //
382 if (alreadyCapped)
383 break;
384
385 valPtr++;
386 pszTmpVal = valPtr;
387 }
388
389 }
390
preContentValidation(bool reuseGrammar,bool validateDefAttr)391 void DTDValidator::preContentValidation(bool
392 #if defined(XERCES_DEBUG)
393 reuseGrammar
394 #endif
395 ,bool validateDefAttr)
396 {
397 //
398 // Lets enumerate all of the elements in the element decl pool
399 // and put out an error for any that did not get declared.
400 // We also check all of the attributes as well.
401 //
402 NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator();
403 fDTDGrammar->setValidated(true);
404 while (elemEnum.hasMoreElements())
405 {
406 const DTDElementDecl& curElem = elemEnum.nextElement();
407 const DTDElementDecl::CreateReasons reason = curElem.getCreateReason();
408
409 //
410 // See if this element decl was ever marked as declared. If
411 // not, then put out an error. In some cases its just
412 // a warning, such as being referenced in a content model.
413 //
414 if (reason != XMLElementDecl::Declared)
415 {
416 if (reason == XMLElementDecl::AttList)
417 {
418 getScanner()->emitError
419 (
420 XMLErrs::UndeclaredElemInAttList
421 , curElem.getFullName()
422 );
423 }
424 else if (reason == XMLElementDecl::AsRootElem)
425 {
426 // It's ok that the root element is not declared in the DTD
427 /*
428 emitError
429 (
430 XMLValid::UndeclaredElemInDocType
431 , curElem.getFullName()
432 );*/
433 }
434 else if (reason == XMLElementDecl::InContentModel)
435 {
436 getScanner()->emitError
437 (
438 XMLErrs::UndeclaredElemInCM
439 , curElem.getFullName()
440 );
441 }
442 else
443 {
444 #if defined(XERCES_DEBUG)
445 if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){
446 }
447 else
448 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::DTD_UnknownCreateReason, getScanner()->getMemoryManager());
449 #endif
450 }
451 }
452
453 //
454 // Check all of the attributes of the current element.
455 // We check for:
456 //
457 // 1) Multiple ID attributes
458 // 2) That all of the default values of attributes are
459 // valid for their type.
460 // 3) That for any notation types, that their lists
461 // of possible values refer to declared notations.
462 //
463 // 4) XML1.0(3rd edition)
464 //
465 // Validity constraint: One Notation Per Element Type
466 // An element type MUST NOT have more than one NOTATION attribute specified.
467 //
468 // Validity constraint: No Notation on Empty Element
469 // For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY.
470 //
471 // Validity constraint: No Duplicate Tokens
472 // The notation names in a single NotationType attribute declaration, as well as
473 // the NmTokens in a single Enumeration attribute declaration, MUST all be distinct.
474 //
475
476 XMLAttDefList& attDefList = curElem.getAttDefList();
477 bool seenId = false;
478 bool seenNOTATION = false;
479 bool elemEmpty = (curElem.getModelType() == DTDElementDecl::Empty);
480
481 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
482 {
483 const XMLAttDef& curAttDef = attDefList.getAttDef(i);
484
485 if (curAttDef.getType() == XMLAttDef::ID)
486 {
487 if (seenId)
488 {
489 emitError
490 (
491 XMLValid::MultipleIdAttrs
492 , curElem.getFullName()
493 );
494 break;
495 }
496
497 seenId = true;
498 }
499 else if (curAttDef.getType() == XMLAttDef::Notation)
500 {
501 if (seenNOTATION)
502 {
503 emitError
504 (
505 XMLValid::ElemOneNotationAttr
506 , curElem.getFullName()
507 );
508
509 break;
510 }
511
512 seenNOTATION = true;
513
514 // no notation attribute on empty element
515 if (elemEmpty)
516 {
517 emitError
518 (
519 XMLValid::EmptyElemNotationAttr
520 , curElem.getFullName()
521 , curAttDef.getFullName()
522 );
523
524 break;
525 }
526
527 //go through enumeration list to check
528 // distinct
529 // notation declaration
530 if (curAttDef.getEnumeration())
531 {
532 checkTokenList(curAttDef, true);
533 }
534 }
535 else if (curAttDef.getType() == XMLAttDef::Enumeration )
536 {
537 //go through enumeration list to check
538 // distinct only
539 if (curAttDef.getEnumeration())
540 {
541 checkTokenList(curAttDef, false);
542 }
543 }
544
545 // If it has a default/fixed value, then validate it
546 if (validateDefAttr && curAttDef.getValue())
547 {
548 validateAttrValue
549 (
550 &curAttDef
551 , curAttDef.getValue()
552 , true
553 , &curElem
554 );
555 }
556 }
557 }
558
559 //
560 // And enumerate all of the general entities. If any of them
561 // reference a notation, then make sure the notation exists.
562 //
563 NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator();
564 while (entEnum.hasMoreElements())
565 {
566 const DTDEntityDecl& curEntity = entEnum.nextElement();
567
568 if (!curEntity.getNotationName())
569 continue;
570
571 // It has a notation name, so look it up
572 if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName()))
573 {
574 emitError
575 (
576 XMLValid::NotationNotDeclared
577 , curEntity.getNotationName()
578 );
579 }
580 }
581 }
582
postParseValidation()583 void DTDValidator::postParseValidation()
584 {
585 //
586 // At this time, there is nothing to do here. The scanner itself handles
587 // ID/IDREF validation, since that is the same no matter what kind of
588 // validator.
589 //
590 }
591
592 //
593 // We need to verify that all of its possible values
594 // (in the enum list)
595 // is distinct and
596 // refer to valid notations if toValidateNotation is set on
597 //
checkTokenList(const XMLAttDef & curAttDef,bool toValidateNotation)598 void DTDValidator::checkTokenList(const XMLAttDef& curAttDef
599 , bool toValidateNotation)
600 {
601
602 XMLCh* list = XMLString::replicate(curAttDef.getEnumeration(), getScanner()->getMemoryManager());
603 ArrayJanitor<XMLCh> janList(list, getScanner()->getMemoryManager());
604
605 //
606 // Search forward for a space or a null. If a null,
607 // we are done. If a space, cap it and look it up.
608 //
609 bool breakFlag = false;
610 XMLCh* listPtr = list;
611 XMLCh* lastPtr = listPtr;
612 while (true)
613 {
614 while (*listPtr && (*listPtr != chSpace))
615 listPtr++;
616
617 //
618 // If at the end, indicate we need to break after
619 // this one. Else, cap it off here.
620 //
621 if (!*listPtr)
622 breakFlag = true;
623 else
624 *listPtr++ = chNull;
625
626 //distinction check
627 //there should be no same token found in the remaining list
628 if (XMLString::isInList(lastPtr, listPtr))
629 {
630 emitError
631 (
632 XMLValid::AttrDupToken
633 , curAttDef.getFullName()
634 , lastPtr
635 );
636 }
637
638 if (toValidateNotation && !fDTDGrammar->getNotationDecl(lastPtr))
639 {
640 emitError
641 (
642 XMLValid::UnknownNotRefAttr
643 , curAttDef.getFullName()
644 , lastPtr
645 );
646 }
647
648 // Break out if we hit the end last time
649 if (breakFlag)
650 break;
651
652 // Else move upwards and try again
653 lastPtr = listPtr;
654 }
655 }
656
657 XERCES_CPP_NAMESPACE_END
658