1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id: DTDValidator.cpp 729944 2008-12-29 17:03:32Z amassari $
20  */
21 
22 
23 // ---------------------------------------------------------------------------
24 //  Includes
25 // ---------------------------------------------------------------------------
26 #include <xercesc/util/Janitor.hpp>
27 #include <xercesc/util/XMLUniDefs.hpp>
28 #include <xercesc/util/XMLUni.hpp>
29 #include <xercesc/internal/ReaderMgr.hpp>
30 #include <xercesc/internal/XMLScanner.hpp>
31 #include <xercesc/validators/DTD/DTDValidator.hpp>
32 
33 XERCES_CPP_NAMESPACE_BEGIN
34 
35 // ---------------------------------------------------------------------------
36 //  DTDValidator: Constructors and Destructor
37 // ---------------------------------------------------------------------------
DTDValidator(XMLErrorReporter * const errReporter)38 DTDValidator::DTDValidator(XMLErrorReporter* const errReporter) :
39 
40     XMLValidator(errReporter)
41     , fDTDGrammar(0)
42 {
43     reset();
44 }
45 
~DTDValidator()46 DTDValidator::~DTDValidator()
47 {
48 }
49 
50 
51 // ---------------------------------------------------------------------------
52 //  DTDValidator: Implementation of the XMLValidator interface
53 // ---------------------------------------------------------------------------
checkContent(XMLElementDecl * const elemDecl,QName ** const children,XMLSize_t childCount,XMLSize_t * indexFailingChild)54 bool DTDValidator::checkContent(XMLElementDecl* const elemDecl
55                               , QName** const         children
56                               , XMLSize_t             childCount
57                               , XMLSize_t*         indexFailingChild)
58 {
59     //
60     //  Look up the element id in our element decl pool. This will get us
61     //  the element decl in our own way of looking at them.
62     //
63     if (!elemDecl)
64         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_InvalidElemId, getScanner()->getMemoryManager());
65 
66     //
67     //  Get the content spec type of this element. This will tell us what
68     //  to do to validate it.
69     //
70     const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType();
71 
72     if (modelType == DTDElementDecl::Empty)
73     {
74         //
75         //  We can do this one here. It cannot have any children. If it does
76         //  we return 0 as the index of the first bad child.
77         //
78         if (childCount)
79         {
80             *indexFailingChild=0;
81             return false;
82         }
83     }
84      else if (modelType == DTDElementDecl::Any)
85     {
86         // We pass no judgement on this one, anything goes
87     }
88      else if ((modelType == DTDElementDecl::Mixed_Simple)
89           ||  (modelType == DTDElementDecl::Children))
90     {
91         // Get the element's content model or fault it in
92         const XMLContentModel* elemCM = elemDecl->getContentModel();
93 
94         // Ask it to validate and return its return
95         return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId(), indexFailingChild, getScanner()->getMemoryManager());
96     }
97      else
98     {
99         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMType, getScanner()->getMemoryManager());
100     }
101 
102     // Went ok, so return success
103     return true;
104 }
105 
106 
faultInAttr(XMLAttr & toFill,const XMLAttDef & attDef) const107 void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const
108 {
109     toFill.set(0, attDef.getFullName(), attDef.getValue(), attDef.getType());
110 }
111 
reset()112 void DTDValidator::reset()
113 {
114 }
115 
116 
requiresNamespaces() const117 bool DTDValidator::requiresNamespaces() const
118 {
119     // Namespaces are not supported for DTDs
120     return false;
121 }
122 
123 
124 void
validateAttrValue(const XMLAttDef * attDef,const XMLCh * const attrValue,bool preValidation,const XMLElementDecl *)125 DTDValidator::validateAttrValue(const   XMLAttDef*      attDef
126                                 , const XMLCh* const    attrValue
127                                 , bool                  preValidation
128                                 , const XMLElementDecl*)
129 {
130     //
131     //  Get quick refs to lost of of the stuff in the passed objects in
132     //  order to simplify the code below, which will reference them very
133     //  often.
134     //
135     const XMLAttDef::AttTypes       type = attDef->getType();
136     const XMLAttDef::DefAttTypes    defType = attDef->getDefaultType();
137     const XMLCh* const              valueText = attDef->getValue();
138     const XMLCh* const              fullName = attDef->getFullName();
139     const XMLCh* const              enumList = attDef->getEnumeration();
140 
141     //
142     //  If the default type is fixed, then make sure the passed value maps
143     //  to the fixed value.
144     //  If during preContentValidation, the value we are validating is the fixed value itself
145     //  so no need to compare.
146     //  Only need to do this for regular attribute value validation
147     //
148     if (defType == XMLAttDef::Fixed && !preValidation)
149     {
150         if (!XMLString::equals(attrValue, valueText))
151             emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText);
152     }
153 
154     //
155     //  If its a CDATA attribute, then we are done with any DTD level
156     //  validation else do the rest.
157     //
158     if (type == XMLAttDef::CData)
159         return;
160 
161 
162 
163     // An empty string cannot be valid for any of the other types
164     if (!attrValue[0])
165     {
166         emitError(XMLValid::InvalidEmptyAttValue, fullName);
167         return;
168     }
169 
170     // See whether we are doing multiple values or not
171     const bool multipleValues =
172     (
173         (type == XMLAttDef::IDRefs)
174         || (type == XMLAttDef::Entities)
175         || (type == XMLAttDef::NmTokens)
176         || (type == XMLAttDef::Notation)
177         || (type == XMLAttDef::Enumeration)
178     );
179 
180     // And whether we must check for a first name char
181     const bool firstNameChar =
182     (
183         (type == XMLAttDef::ID)
184         || (type == XMLAttDef::IDRef)
185         || (type == XMLAttDef::IDRefs)
186         || (type == XMLAttDef::Entity)
187         || (type == XMLAttDef::Entities)
188         || (type == XMLAttDef::Notation)
189     );
190 
191     // Whether it requires ref checking stuff
192     const bool isARefType
193     (
194         (type == XMLAttDef::ID)
195         || (type == XMLAttDef::IDRef)
196         || (type == XMLAttDef::IDRefs)
197     );
198 
199     // Some trigger flags to avoid issuing redundant errors and whatnot
200     bool alreadyCapped = false;
201 
202     //
203     //  Make a copy of the text that we can mangle and get a pointer we can
204     //  move through the value
205     //
206 
207     // Use a stack-based buffer, when possible...
208     XMLCh   tempBuffer[100];
209 
210     XMLCh* pszTmpVal = 0;
211 
212     ArrayJanitor<XMLCh> janTmpVal(0);
213 
214     if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
215     {
216         XMLString::copyString(tempBuffer, attrValue);
217         pszTmpVal = tempBuffer;
218     }
219     else
220     {
221         janTmpVal.reset(XMLString::replicate(attrValue, getScanner()->getMemoryManager()), getScanner()->getMemoryManager());
222         pszTmpVal = janTmpVal.get();
223     }
224 
225     XMLCh* valPtr = pszTmpVal;
226 
227     bool doNamespace = getScanner()->getDoNamespaces();
228 
229     while (true)
230     {
231         //
232         //  Make sure the first character is a valid first name char, i.e.
233         //  if its a Name value. For NmToken values we don't treat the first
234         //  char any differently.
235         //
236         if (firstNameChar)
237         {
238             // If its not, emit and error but try to keep going
239             if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr))
240                 emitError(XMLValid::AttrValNotName, valPtr, fullName);
241             valPtr++;
242         }
243 
244         // Make sure all the remaining chars are valid name chars
245         while (*valPtr)
246         {
247             //
248             //  If we hit a whitespace, its either a break between two
249             //  or more values, or an error if we have a single value.
250             //
251             //
252             //   XML1.0-3rd
253             //
254             //   [6]   Names   ::=   Name (#x20 Name)*
255             //   [8]   Nmtokens   ::=   Nmtoken (#x20 Nmtoken)*
256             //
257             //   only and only ONE #x20 is allowed to be the delimiter
258             //
259             if (*valPtr==chSpace)
260             {
261                 if (!multipleValues)
262                 {
263                     emitError(XMLValid::NoMultipleValues, fullName);
264                     return;
265                 }
266 
267                 break;
268             }
269 
270             // Now this attribute can be of type
271             //     ID, IDREF, IDREFS, ENTITY, ENTITIES, NOTATION, NMTOKEN, NMTOKENS, ENUMERATION
272             //  All these must be valid XMLName
273             // If namespace is enabled, colon is not allowed in the first 6
274 
275             if (doNamespace && *valPtr == chColon && firstNameChar)
276                 emitError(XMLValid::ColonNotValidWithNS);
277 
278             if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr))
279             {
280                 emitError(XMLValid::AttrValNotName, valPtr, fullName);
281                 return;
282             }
283             valPtr++;
284         }
285 
286         //
287         //  Cap it off at the current non-name char. If already capped,
288         //  then remember this.
289         //
290         if (!(*valPtr))
291             alreadyCapped = true;
292         *valPtr = 0;
293 
294         //
295         //  If this type of attribute requires that we track reference
296         //  stuff, then handle that.
297         //
298         if (isARefType)
299         {
300             if ((type == XMLAttDef::ID)
301             ||  (type == XMLAttDef::IDRef)
302             ||  (type == XMLAttDef::IDRefs))
303             {
304                 XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal);
305                 if (find)
306                 {
307                     if (find->getDeclared() && (type == XMLAttDef::ID))
308                         emitError(XMLValid::ReusedIDValue, pszTmpVal);
309                 }
310                  else
311                 {
312                     find = new (getScanner()->getMemoryManager()) XMLRefInfo
313                     (
314                         pszTmpVal
315                         , false
316                         , false
317                         , getScanner()->getMemoryManager()
318                     );
319                     getScanner()->getIDRefList()->put((void*)find->getRefName(), find);
320                 }
321 
322                 //
323                 //  Mark it declared or used, which might be redundant in some cases
324                 //  but not worth checking
325                 //
326                 if (type == XMLAttDef::ID)
327                     find->setDeclared(true);
328                 else {
329                     if (!preValidation) {
330                         find->setUsed(true);
331                     }
332                 }
333             }
334         }
335          else if (!preValidation && ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities)))
336         {
337             //
338             //  If its refering to a entity, then look up the name in the
339             //  general entity pool. If not there, then its an error. If its
340             //  not an external unparsed entity, then its an error.
341             //
342             //  In case of pre-validation, the above errors should be ignored.
343             //
344             const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal);
345             if (decl)
346             {
347                 if (!decl->isUnparsed())
348                     emitError(XMLValid::BadEntityRefAttr, pszTmpVal, fullName);
349             }
350              else
351             {
352                 emitError
353                 (
354                     XMLValid::UnknownEntityRefAttr
355                     , fullName
356                     , pszTmpVal
357                 );
358             }
359         }
360          else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration))
361         {
362             //
363             //  Make sure that this value maps to one of the enumeration or
364             //  notation values in the enumList parameter. We don't have to
365             //  look it up in the notation pool (if a notation) because we
366             //  will look up the enumerated values themselves. If they are in
367             //  the notation pool (after the DTD is parsed), then obviously
368             //  this value will be legal since it matches one of them.
369             //
370             if (!XMLString::isInList(pszTmpVal, enumList))
371                 emitError(XMLValid::DoesNotMatchEnumList, pszTmpVal, fullName);
372         }
373 
374         // If not doing multiple values, then we are done
375         if (!multipleValues)
376             break;
377 
378         //
379         //  If we are at the end, then break out now, else move up to the
380         //  next char and update the base pointer.
381         //
382         if (alreadyCapped)
383             break;
384 
385         valPtr++;
386         pszTmpVal = valPtr;
387     }
388 
389 }
390 
preContentValidation(bool reuseGrammar,bool validateDefAttr)391 void DTDValidator::preContentValidation(bool
392 #if defined(XERCES_DEBUG)
393 										reuseGrammar
394 #endif
395                                        ,bool validateDefAttr)
396 {
397     //
398     //  Lets enumerate all of the elements in the element decl pool
399     //  and put out an error for any that did not get declared.
400     //  We also check all of the attributes as well.
401     //
402     NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator();
403     fDTDGrammar->setValidated(true);
404     while (elemEnum.hasMoreElements())
405     {
406         const DTDElementDecl& curElem = elemEnum.nextElement();
407         const DTDElementDecl::CreateReasons reason = curElem.getCreateReason();
408 
409         //
410         //  See if this element decl was ever marked as declared. If
411         //  not, then put out an error. In some cases its just
412         //  a warning, such as being referenced in a content model.
413         //
414         if (reason != XMLElementDecl::Declared)
415         {
416             if (reason == XMLElementDecl::AttList)
417             {
418                 getScanner()->emitError
419                 (
420                     XMLErrs::UndeclaredElemInAttList
421                     , curElem.getFullName()
422                 );
423             }
424              else if (reason == XMLElementDecl::AsRootElem)
425             {
426                 // It's ok that the root element is not declared in the DTD
427                 /*
428                 emitError
429                 (
430                     XMLValid::UndeclaredElemInDocType
431                     , curElem.getFullName()
432                 );*/
433             }
434              else if (reason == XMLElementDecl::InContentModel)
435             {
436                 getScanner()->emitError
437                 (
438                     XMLErrs::UndeclaredElemInCM
439                     , curElem.getFullName()
440                 );
441             }
442             else
443             {
444                 #if defined(XERCES_DEBUG)
445                   if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){
446                   }
447                   else
448                       ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::DTD_UnknownCreateReason, getScanner()->getMemoryManager());
449                 #endif
450             }
451         }
452 
453         //
454         //  Check all of the attributes of the current element.
455         //  We check for:
456         //
457         //  1) Multiple ID attributes
458         //  2) That all of the default values of attributes are
459         //      valid for their type.
460         //  3) That for any notation types, that their lists
461         //      of possible values refer to declared notations.
462         //
463         //  4) XML1.0(3rd edition)
464         //
465         //     Validity constraint: One Notation Per Element Type
466         //     An element type MUST NOT have more than one NOTATION attribute specified.
467         //
468         //     Validity constraint: No Notation on Empty Element
469         //     For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY.
470         //
471         //     Validity constraint: No Duplicate Tokens
472         //     The notation names in a single NotationType attribute declaration, as well as
473         //     the NmTokens in a single Enumeration attribute declaration, MUST all be distinct.
474         //
475 
476         XMLAttDefList& attDefList = curElem.getAttDefList();
477         bool seenId = false;
478         bool seenNOTATION = false;
479         bool elemEmpty = (curElem.getModelType() == DTDElementDecl::Empty);
480 
481         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
482         {
483             const XMLAttDef& curAttDef = attDefList.getAttDef(i);
484 
485             if (curAttDef.getType() == XMLAttDef::ID)
486             {
487                 if (seenId)
488                 {
489                     emitError
490                     (
491                         XMLValid::MultipleIdAttrs
492                         , curElem.getFullName()
493                     );
494                     break;
495                 }
496 
497                 seenId = true;
498             }
499              else if (curAttDef.getType() == XMLAttDef::Notation)
500             {
501                 if (seenNOTATION)
502                 {
503                     emitError
504                     (
505                         XMLValid::ElemOneNotationAttr
506                       , curElem.getFullName()
507                     );
508 
509                     break;
510                 }
511 
512                 seenNOTATION = true;
513 
514                 // no notation attribute on empty element
515                 if (elemEmpty)
516                 {
517                     emitError
518                    (
519                       XMLValid::EmptyElemNotationAttr
520                     , curElem.getFullName()
521                     , curAttDef.getFullName()
522                     );
523 
524                     break;
525                 }
526 
527                 //go through enumeration list to check
528                 // distinct
529                 // notation declaration
530                 if (curAttDef.getEnumeration())
531                 {
532                     checkTokenList(curAttDef, true);
533                 }
534              }
535              else if (curAttDef.getType() == XMLAttDef::Enumeration )
536              {
537                 //go through enumeration list to check
538                 // distinct only
539                 if (curAttDef.getEnumeration())
540                 {
541                     checkTokenList(curAttDef, false);
542                 }
543              }
544 
545             // If it has a default/fixed value, then validate it
546             if (validateDefAttr && curAttDef.getValue())
547             {
548                 validateAttrValue
549                 (
550                     &curAttDef
551                     , curAttDef.getValue()
552                     , true
553                     , &curElem
554                 );
555             }
556         }
557     }
558 
559     //
560     //  And enumerate all of the general entities. If any of them
561     //  reference a notation, then make sure the notation exists.
562     //
563     NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator();
564     while (entEnum.hasMoreElements())
565     {
566         const DTDEntityDecl& curEntity = entEnum.nextElement();
567 
568         if (!curEntity.getNotationName())
569             continue;
570 
571         // It has a notation name, so look it up
572         if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName()))
573         {
574             emitError
575             (
576                 XMLValid::NotationNotDeclared
577                 , curEntity.getNotationName()
578             );
579         }
580     }
581 }
582 
postParseValidation()583 void DTDValidator::postParseValidation()
584 {
585     //
586     //  At this time, there is nothing to do here. The scanner itself handles
587     //  ID/IDREF validation, since that is the same no matter what kind of
588     //  validator.
589     //
590 }
591 
592 //
593 //  We need to verify that all of its possible values
594 //  (in the enum list)
595 //   is distinct and
596 //   refer to valid notations if toValidateNotation is set on
597 //
checkTokenList(const XMLAttDef & curAttDef,bool toValidateNotation)598 void DTDValidator::checkTokenList(const XMLAttDef&  curAttDef
599                                 ,       bool        toValidateNotation)
600 {
601 
602     XMLCh* list = XMLString::replicate(curAttDef.getEnumeration(), getScanner()->getMemoryManager());
603     ArrayJanitor<XMLCh> janList(list, getScanner()->getMemoryManager());
604 
605     //
606     //  Search forward for a space or a null. If a null,
607     //  we are done. If a space, cap it and look it up.
608     //
609     bool    breakFlag = false;
610     XMLCh*  listPtr = list;
611     XMLCh*  lastPtr = listPtr;
612     while (true)
613     {
614         while (*listPtr && (*listPtr != chSpace))
615             listPtr++;
616 
617         //
618         //  If at the end, indicate we need to break after
619         //  this one. Else, cap it off here.
620         //
621         if (!*listPtr)
622             breakFlag = true;
623         else
624             *listPtr++ = chNull;
625 
626         //distinction check
627         //there should be no same token found in the remaining list
628         if (XMLString::isInList(lastPtr, listPtr))
629         {
630             emitError
631                 (
632                 XMLValid::AttrDupToken
633                 , curAttDef.getFullName()
634                 , lastPtr
635                 );
636         }
637 
638         if (toValidateNotation && !fDTDGrammar->getNotationDecl(lastPtr))
639         {
640             emitError
641                 (
642                 XMLValid::UnknownNotRefAttr
643                 , curAttDef.getFullName()
644                 , lastPtr
645                 );
646         }
647 
648         // Break out if we hit the end last time
649         if (breakFlag)
650             break;
651 
652         // Else move upwards and try again
653         lastPtr = listPtr;
654     }
655 }
656 
657 XERCES_CPP_NAMESPACE_END
658