1{-# LANGUAGE FlexibleContexts #-} 2 3-- ------------------------------------------------------------ 4 5{- | 6 Module : Text.XML.HXT.DTDValidation.TypeDefs 7 Copyright : Copyright (C) 2008 Uwe Schmidt 8 License : MIT 9 10 Maintainer : Uwe Schmidt (uwe@fh-wedel.de) 11 Stability : experimental 12 Portability: portable 13 14 This module provides functions for validating the DTD of XML documents 15 represented as XmlTree. 16 17 Unlike other popular XML validation tools the validation process returns 18 a list of errors instead of aborting after the first error was found. 19 20 21 Unlike validation of the document, the DTD branch is traversed four times: 22 23 - Validation of Notations 24 25 - Validation of Unparsed Entities 26 27 - Validation of Element declarations 28 29 - Validation of Attribute declarations 30 31-} 32 33-- ------------------------------------------------------------ 34 35module Text.XML.HXT.DTDValidation.DTDValidation 36 ( removeDoublicateDefs 37 , validateDTD 38 ) 39where 40 41import Text.XML.HXT.DTDValidation.AttributeValueValidation 42import Text.XML.HXT.DTDValidation.TypeDefs 43 44-- | 45-- Validate a DTD. 46-- 47-- - returns : a functions which takes the DTD subset of the XmlTree, checks 48-- if the DTD is valid and returns a list of errors 49 50validateDTD :: XmlArrow 51validateDTD -- dtdPart 52 = isDTDDoctype 53 `guards` 54 ( listA getChildren 55 >>> 56 ( validateParts $<< (getNotationNames &&& getElemNames) ) 57 ) 58 where 59 validateParts notationNames elemNames 60 = validateNotations 61 <+> 62 validateEntities notationNames 63 <+> 64 validateElements elemNames 65 <+> 66 validateAttributes elemNames notationNames 67 68 getNotationNames :: LA [XmlTree] [String] 69 getNotationNames = listA $ unlistA >>> isDTDNotation >>> getDTDAttrValue a_name 70 71 getElemNames :: LA [XmlTree] [String] 72 getElemNames = listA $ unlistA >>> isDTDElement >>> getDTDAttrValue a_name 73 74-- ------------------------------------------------------------ 75 76checkName :: String -> SLA [String] XmlTree XmlTree -> SLA [String] XmlTree XmlTree 77checkName name msg 78 = ifA ( getState 79 >>> 80 isA (name `elem`) 81 ) 82 msg 83 (nextState (name:) >>> none) 84 85-- ------------------------------------------------------------ 86 87-- | 88-- Validation of Notations, checks if all notation names are unique. 89-- Validity constraint: Unique Notation Name (4.7 \/ p.44 in Spec) 90-- 91-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node 92-- 93-- - returns : a list of errors 94 95validateNotations :: LA XmlTrees XmlTree 96validateNotations 97 = fromSLA [] ( unlistA 98 >>> 99 isDTDNotation 100 >>> 101 (checkForUniqueNotation $< getDTDAttrl) 102 ) 103 where 104 checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree 105 checkForUniqueNotation al 106 = checkName name $ 107 err ( "Notation "++ show name ++ " was already specified." ) 108 where 109 name = dtd_name al 110 111-- | 112-- Validation of Entities. 113-- 114-- 1. Issues a warning if entities are declared multiple times. 115-- 116-- Optional warning: (4.2 \/ p.35 in Spec) 117-- 118-- 119-- 2. Validates that a notation is declared for an unparsed entity. 120-- 121-- Validity constraint: Notation Declared (4.2.2 \/ p.36 in Spec) 122-- 123-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node 124-- 125-- - 2.parameter notationNames : list of all notation names declared in the DTD 126-- 127-- - returns : a list of errors 128 129validateEntities :: [String] -> LA XmlTrees XmlTree 130validateEntities notationNames 131 = ( fromSLA [] ( unlistA 132 >>> 133 isDTDEntity 134 >>> 135 (checkForUniqueEntity $< getDTDAttrl) 136 ) 137 ) 138 <+> 139 ( unlistA 140 >>> 141 isUnparsedEntity 142 >>> 143 (checkNotationDecl $< getDTDAttrl) 144 ) 145 where 146 147 -- Check if entities are declared multiple times 148 149 checkForUniqueEntity :: Attributes -> SLA [String] XmlTree XmlTree 150 checkForUniqueEntity al 151 = checkName name $ 152 warn ( "Entity "++ show name ++ " was already specified. " ++ 153 "First declaration will be used." ) 154 where 155 name = dtd_name al 156 157 -- Find unparsed entities for which no notation is specified 158 159 checkNotationDecl :: Attributes -> XmlArrow 160 checkNotationDecl al 161 | notationName `elem` notationNames 162 = none 163 | otherwise 164 = err ( "The notation " ++ show notationName ++ " must be declared " ++ 165 "when referenced in the unparsed entity declaration for " ++ 166 show upEntityName ++ "." 167 ) 168 where 169 notationName = lookup1 k_ndata al 170 upEntityName = dtd_name al 171 172-- | 173-- Validation of Element declarations. 174-- 175-- 1. Validates that an element is not declared multiple times. 176-- 177-- Validity constraint: Unique Element Type Declaration (3.2 \/ p.21 in Spec) 178-- 179-- 180-- 2. Validates that an element name only appears once in a mixed-content declaration. 181-- 182-- Validity constraint: No Duplicate Types (3.2 \/ p.21 in Spec) 183-- 184-- 185-- 3. Issues a warning if an element mentioned in a content model is not declared in the 186-- DTD. 187-- 188-- Optional warning: (3.2 \/ p.21 in Spec) 189-- 190-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node 191-- 192-- - 2.parameter elemNames : list of all element names declared in the DTD 193-- 194-- - returns : a list of errors 195 196 197validateElements :: [String] -> LA XmlTrees XmlTree 198validateElements elemNames -- dtdPart 199 = ( fromSLA [] ( unlistA 200 >>> 201 isDTDElement 202 >>> 203 (checkForUniqueElement $< getDTDAttrl) 204 ) 205 ) 206 <+> 207 ( unlistA 208 >>> 209 isMixedContentElement 210 >>> 211 (checkMixedContent $< getDTDAttrl) 212 ) 213 <+> 214 ( unlistA 215 >>> 216 isDTDElement 217 >>> 218 (checkContentModel elemNames $< getDTDAttrl) 219 ) 220 where 221 222 -- Validates that an element is not declared multiple times 223 224 checkForUniqueElement :: Attributes -> SLA [String] XmlTree XmlTree 225 checkForUniqueElement al 226 = checkName name $ 227 err ( "Element type " ++ show name ++ 228 " must not be declared more than once." ) 229 where 230 name = dtd_name al 231 232 -- Validates that an element name only appears once in a mixed-content declaration 233 234 checkMixedContent :: Attributes -> XmlArrow 235 checkMixedContent al 236 = fromSLA [] ( getChildren 237 >>> 238 getChildren 239 >>> 240 isDTDName 241 >>> 242 (check $< getDTDAttrl) 243 ) 244 where 245 elemName = dtd_name al 246 check al' 247 = checkName name $ 248 err ( "The element type " ++ show name ++ 249 " was already specified in the mixed-content model of the element declaration " ++ 250 show elemName ++ "." ) 251 where 252 name = dtd_name al' 253 254 -- Issues a warning if an element mentioned in a content model is not 255 -- declared in the DTD. 256 checkContentModel :: [String] -> Attributes -> XmlArrow 257 checkContentModel names al 258 | cm `elem` [v_children, v_mixed] 259 = getChildren >>> checkContent 260 | otherwise 261 = none 262 where 263 elemName = dtd_name al 264 cm = dtd_type al 265 266 checkContent :: XmlArrow 267 checkContent 268 = choiceA 269 [ isDTDName :-> ( checkName' $< getDTDAttrl ) 270 , isDTDContent :-> ( getChildren >>> checkContent ) 271 , this :-> none 272 ] 273 where 274 checkName' al' 275 | childElemName `elem` names 276 = none 277 | otherwise 278 = warn ( "The element type "++ show childElemName ++ 279 ", used in content model of element "++ show elemName ++ 280 ", is not declared." 281 ) 282 where 283 childElemName = dtd_name al' 284 285-- | 286-- Validation of Attribute declarations. 287-- 288-- (1) Issues a warning if an attribute is declared for an element type not itself 289-- decared. 290-- 291-- Optinal warning: (3.3 \/ p. 24 in Spec) 292-- 293-- 294-- 2. Issues a warning if more than one definition is provided for the same 295-- attribute of a given element type. Fist declaration is binding, later 296-- definitions are ignored. 297-- 298-- Optional warning: (3.3 \/ p.24 in Spec) 299-- 300-- 301-- 3. Issues a warning if the same Nmtoken occures more than once in enumerated 302-- attribute types of a single element type. 303-- 304-- Optional warning: (3.3.1 \/ p.27 in Spec) 305-- 306-- 307-- 4. Validates that an element type has not more than one ID attribute defined. 308-- 309-- Validity constraint: One ID per Element Type (3.3.1 \/ p.26 in Spec) 310-- 311-- 312-- 5. Validates that an element type has not more than one NOTATION attribute defined. 313-- 314-- Validity constraint: One Notation per Element Type (3.3.1 \/ p.27 in Spec) 315-- 316-- 317-- 6. Validates that an ID attributes has the type #IMPLIED or #REQUIRED. 318-- 319-- Validity constraint: ID Attribute Default (3.3.1 \/ p.26 in Spec) 320-- 321-- 322-- 7. Validates that all referenced notations are declared. 323-- 324-- Validity constraint: Notation Attributes (3.3.1 \/ p.27 in Spec) 325-- 326-- 327-- 8. Validates that notations are not declared for EMPTY elements. 328-- 329-- Validity constraint: No Notation on Empty Element (3.3.1 \/p.27 in Spec) 330-- 331-- 332-- 9. Validates that the default value matches the lexical constraints of it's type. 333-- 334-- Validity constraint: Attribute default legal (3.3.2 \/ p.28 in Spec) 335-- 336-- 337-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node 338-- 339-- - 2.parameter elemNames : list of all element names declared in the DTD 340-- 341-- - 3.parameter notationNames : list of all notation names declared in the DTD 342-- 343-- - returns : a list of errors 344 345validateAttributes :: [String] -> [String] -> LA XmlTrees XmlTree 346validateAttributes elemNames notationNames 347 = -- 1. Find attributes for which no elements are declared 348 ( runCheck this (checkDeclaredElements elemNames) ) 349 <+> 350 -- 2. Find attributes which are declared more than once 351 ( runNameCheck this checkForUniqueAttributeDeclaration ) 352 <+> 353 -- 3. Find enumerated attribute types which nmtokens are declared more than once 354 ( runCheck (isEnumAttrType `orElse` isNotationAttrType) checkEnumeratedTypes ) 355 <+> 356 -- 4. Validate that there exists only one ID attribute for an element 357 ( runNameCheck isIdAttrType checkForUniqueId ) 358 <+> 359 -- 5. Validate that there exists only one NOTATION attribute for an element 360 ( runNameCheck isNotationAttrType checkForUniqueNotation ) 361 <+> 362 -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED 363 ( runCheck isIdAttrType checkIdKindConstraint ) 364 <+> 365 -- 7. Validate that all referenced notations are declared 366 ( runCheck isNotationAttrType (checkNotationDeclaration notationNames) ) 367 <+> 368 -- 8. Validate that notations are not declared for EMPTY elements 369 ( checkNoNotationForEmptyElements $< listA ( unlistA 370 >>> 371 isEmptyElement 372 >>> 373 getDTDAttrValue a_name 374 ) 375 ) 376 <+> 377 -- 9. Validate that the default value matches the lexical constraints of it's type 378 ( checkDefaultValueTypes $< this ) 379 380 where 381 -- ------------------------------------------------------------ 382 -- control structures 383 384 runCheck select check 385 = unlistA >>> isDTDAttlist 386 >>> 387 select 388 >>> 389 (check $< getDTDAttrl) 390 391 runNameCheck select check 392 = fromSLA [] $ runCheck select check 393 394 -------------------------------------------------------------------------- 395 396 -- 1. Find attributes for which no elements are declared 397 398 checkDeclaredElements :: [String] -> Attributes -> XmlArrow 399 checkDeclaredElements elemNames' al 400 | en `elem` elemNames' 401 = none 402 | otherwise 403 = warn ( "The element type \""++ en ++ "\" used in dclaration "++ 404 "of attribute \""++ an ++"\" is not declared." 405 ) 406 where 407 en = dtd_name al 408 an = dtd_value al 409 410 -------------------------------------------------------------------------- 411 412 -- 2. Find attributes which are declared more than once 413 414 checkForUniqueAttributeDeclaration :: Attributes -> SLA [String] XmlTree XmlTree 415 checkForUniqueAttributeDeclaration al 416 = checkName name $ 417 warn ( "Attribute \""++ aname ++"\" for element type \""++ 418 ename ++"\" is already declared. First "++ 419 "declaration will be used." ) 420 where 421 ename = dtd_name al 422 aname = dtd_value al 423 name = ename ++ "|" ++ aname 424 425 -------------------------------------------------------------------------- 426 427 -- 3. Find enumerated attribute types which nmtokens are declared more than once 428 429 checkEnumeratedTypes :: Attributes -> XmlArrow 430 checkEnumeratedTypes al 431 = fromSLA [] ( getChildren 432 >>> 433 isDTDName 434 >>> 435 (checkForUniqueType $< getDTDAttrl) 436 ) 437 where 438 checkForUniqueType :: Attributes -> SLA [String] XmlTree XmlTree 439 checkForUniqueType al' 440 = checkName nmtoken $ 441 warn ( "Nmtoken \""++ nmtoken ++"\" should not "++ 442 "occur more than once in attribute \""++ dtd_value al ++ 443 "\" for element \""++ dtd_name al ++ "\"." ) 444 where 445 nmtoken = dtd_name al' 446 447 -------------------------------------------------------------------------- 448 449 -- 4. Validate that there exists only one ID attribute for an element 450 451 checkForUniqueId :: Attributes -> SLA [String] XmlTree XmlTree 452 checkForUniqueId al 453 = checkName ename $ 454 err ( "Element \""++ ename ++ "\" already has attribute of type "++ 455 "ID, another attribute \""++ dtd_value al ++ "\" of type ID is "++ 456 "not permitted." ) 457 where 458 ename = dtd_name al 459 460 -------------------------------------------------------------------------- 461 462 -- 5. Validate that there exists only one NOTATION attribute for an element 463 464 checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree 465 checkForUniqueNotation al 466 = checkName ename $ 467 err ( "Element \""++ ename ++ "\" already has attribute of type "++ 468 "NOTATION, another attribute \""++ dtd_value al ++ "\" of type NOTATION "++ 469 "is not permitted." ) 470 where 471 ename = dtd_name al 472 473 -------------------------------------------------------------------------- 474 475 -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED 476 477 checkIdKindConstraint :: Attributes -> XmlArrow 478 checkIdKindConstraint al 479 | attKind `elem` [k_implied, k_required] 480 = none 481 | otherwise 482 = err ( "ID attribute \""++ dtd_value al ++"\" must have a declared default "++ 483 "of \"#IMPLIED\" or \"REQUIRED\"") 484 where 485 attKind = dtd_kind al 486 487 488 -------------------------------------------------------------------------- 489 490 -- 7. Validate that all referenced notations are declared 491 492 checkNotationDeclaration :: [String] -> Attributes -> XmlArrow 493 checkNotationDeclaration notations al 494 = getChildren 495 >>> 496 isDTDName 497 >>> 498 (checkNotations $< getDTDAttrl) 499 where 500 checkNotations :: Attributes -> XmlArrow 501 checkNotations al' 502 | notation `elem` notations 503 = none 504 | otherwise 505 = err ( "The notation \""++ notation ++"\" must be declared when "++ 506 "referenced in the notation type list for attribute \""++ dtd_value al ++ 507 "\" of element \""++ dtd_name al ++"\"." 508 ) 509 where 510 notation = dtd_name al' 511 512 -------------------------------------------------------------------------- 513 514 -- 8. Validate that notations are not declared for EMPTY elements 515 516 checkNoNotationForEmptyElements :: [String] -> LA XmlTrees XmlTree 517 checkNoNotationForEmptyElements emptyElems 518 = unlistA 519 >>> 520 isDTDAttlist 521 >>> 522 isNotationAttrType 523 >>> 524 (checkNoNotationForEmptyElement $< getDTDAttrl) 525 where 526 checkNoNotationForEmptyElement :: Attributes -> XmlArrow 527 checkNoNotationForEmptyElement al 528 | ename `elem` emptyElems 529 = err ( "Attribute \""++ dtd_value al ++"\" of type NOTATION must not be "++ 530 "declared on the element \""++ ename ++"\" declared EMPTY." 531 ) 532 | otherwise 533 = none 534 where 535 ename = dtd_name al 536 537 -------------------------------------------------------------------------- 538 539 -- 9. Validate that default values meet the lexical constraints of the attribute types 540 541 checkDefaultValueTypes :: XmlTrees -> LA XmlTrees XmlTree 542 checkDefaultValueTypes dtdPart' 543 = unlistA >>> isDTDAttlist 544 >>> 545 isDefaultAttrKind 546 >>> 547 (checkAttributeValue dtdPart' $< this) 548 549-- ------------------------------------------------------------ 550 551-- | 552-- Removes doublicate declarations from the DTD, which first declaration is 553-- binding. This is the case for ATTLIST and ENTITY declarations. 554-- 555-- - returns : A function that replaces the children of DOCTYPE nodes by a list 556-- where all multiple declarations are removed. 557 558removeDoublicateDefs :: XmlArrow 559removeDoublicateDefs 560 = replaceChildren 561 ( fromSLA [] ( getChildren 562 >>> 563 choiceA [ isDTDAttlist :-> (removeDoubleAttlist $< getDTDAttrl) 564 , isDTDEntity :-> (removeDoubleEntity $< getDTDAttrl) 565 , this :-> this 566 ] 567 ) 568 ) 569 `when` 570 isDTDDoctype 571 where 572 checkName' n' 573 = ifA ( getState 574 >>> 575 isA (n' `elem`) 576 ) 577 none 578 (this >>> perform (nextState (n':))) 579 580 removeDoubleAttlist :: Attributes -> SLA [String] XmlTree XmlTree 581 removeDoubleAttlist al 582 = checkName' elemAttr 583 where 584 elemAttr = elemName ++ "|" ++ attrName 585 attrName = dtd_value al 586 elemName = dtd_name al 587 588 removeDoubleEntity :: Attributes -> SLA [String] XmlTree XmlTree 589 removeDoubleEntity al 590 = checkName' (dtd_name al) 591 592-- ------------------------------------------------------------ 593