1{-# LANGUAGE FlexibleContexts #-}
2
3-- ------------------------------------------------------------
4
5{- |
6   Module     : Text.XML.HXT.DTDValidation.TypeDefs
7   Copyright  : Copyright (C) 2008 Uwe Schmidt
8   License    : MIT
9
10   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
11   Stability  : experimental
12   Portability: portable
13
14   This module provides functions for validating the DTD of XML documents
15   represented as XmlTree.
16
17   Unlike other popular XML validation tools the validation process returns
18   a list of errors instead of aborting after the first error was found.
19
20
21   Unlike validation of the document, the DTD branch is traversed four times:
22
23    - Validation of Notations
24
25    - Validation of Unparsed Entities
26
27    - Validation of Element declarations
28
29    - Validation of Attribute declarations
30
31-}
32
33-- ------------------------------------------------------------
34
35module Text.XML.HXT.DTDValidation.DTDValidation
36    ( removeDoublicateDefs
37    , validateDTD
38    )
39where
40
41import           Text.XML.HXT.DTDValidation.AttributeValueValidation
42import           Text.XML.HXT.DTDValidation.TypeDefs
43
44-- |
45-- Validate a DTD.
46--
47--    - returns : a functions which takes the DTD subset of the XmlTree, checks
48--                  if the DTD is valid and returns a list of errors
49
50validateDTD :: XmlArrow
51validateDTD -- dtdPart
52    = isDTDDoctype
53      `guards`
54      ( listA getChildren
55        >>>
56        ( validateParts $<< (getNotationNames &&& getElemNames) )
57      )
58    where
59    validateParts notationNames elemNames
60        = validateNotations
61          <+>
62          validateEntities notationNames
63          <+>
64          validateElements elemNames
65          <+>
66          validateAttributes elemNames notationNames
67
68    getNotationNames    :: LA [XmlTree] [String]
69    getNotationNames    = listA $ unlistA >>> isDTDNotation >>> getDTDAttrValue a_name
70
71    getElemNames        :: LA [XmlTree] [String]
72    getElemNames        = listA $ unlistA >>> isDTDElement  >>> getDTDAttrValue a_name
73
74-- ------------------------------------------------------------
75
76checkName       :: String -> SLA [String] XmlTree XmlTree -> SLA [String] XmlTree XmlTree
77checkName name msg
78    = ifA ( getState
79            >>>
80            isA (name `elem`)
81          )
82      msg
83      (nextState (name:) >>> none)
84
85-- ------------------------------------------------------------
86
87-- |
88-- Validation of Notations, checks if all notation names are unique.
89-- Validity constraint: Unique Notation Name (4.7 \/ p.44 in Spec)
90--
91--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
92--
93--    - returns : a list of errors
94
95validateNotations :: LA XmlTrees XmlTree
96validateNotations
97    = fromSLA [] ( unlistA
98                   >>>
99                   isDTDNotation
100                   >>>
101                   (checkForUniqueNotation $< getDTDAttrl)
102                 )
103      where
104      checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree
105      checkForUniqueNotation al
106          = checkName name $
107            err ( "Notation "++ show name ++ " was already specified." )
108          where
109          name = dtd_name al
110
111-- |
112-- Validation of Entities.
113--
114-- 1. Issues a warning if entities are declared multiple times.
115--
116--    Optional warning: (4.2 \/ p.35 in Spec)
117--
118--
119-- 2. Validates that a notation is declared for an unparsed entity.
120--
121--    Validity constraint: Notation Declared (4.2.2 \/ p.36 in Spec)
122--
123--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
124--
125--    - 2.parameter notationNames :  list of all notation names declared in the DTD
126--
127--    - returns : a list of errors
128
129validateEntities        :: [String] -> LA XmlTrees XmlTree
130validateEntities notationNames
131    = ( fromSLA [] ( unlistA
132                     >>>
133                     isDTDEntity
134                     >>>
135                     (checkForUniqueEntity $< getDTDAttrl)
136                   )
137      )
138      <+>
139      ( unlistA
140        >>>
141        isUnparsedEntity
142        >>>
143        (checkNotationDecl $< getDTDAttrl)
144      )
145      where
146
147      -- Check if entities are declared multiple times
148
149      checkForUniqueEntity      :: Attributes -> SLA [String] XmlTree XmlTree
150      checkForUniqueEntity al
151          = checkName name $
152            warn ( "Entity "++ show name ++ " was already specified. " ++
153                    "First declaration will be used." )
154          where
155          name = dtd_name al
156
157      -- Find unparsed entities for which no notation is specified
158
159      checkNotationDecl         :: Attributes -> XmlArrow
160      checkNotationDecl al
161          | notationName `elem` notationNames
162              = none
163          | otherwise
164              = err ( "The notation " ++ show notationName ++ " must be declared " ++
165                      "when referenced in the unparsed entity declaration for " ++
166                      show upEntityName ++ "."
167                    )
168          where
169          notationName = lookup1 k_ndata al
170          upEntityName = dtd_name  al
171
172-- |
173-- Validation of Element declarations.
174--
175-- 1. Validates that an element is not declared multiple times.
176--
177--    Validity constraint: Unique Element Type Declaration (3.2 \/ p.21 in Spec)
178--
179--
180-- 2. Validates that an element name only appears once in a mixed-content declaration.
181--
182--    Validity constraint: No Duplicate Types (3.2 \/ p.21 in Spec)
183--
184--
185-- 3. Issues a warning if an element mentioned in a content model is not declared in the
186--    DTD.
187--
188--    Optional warning: (3.2 \/ p.21 in Spec)
189--
190--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
191--
192--    - 2.parameter elemNames :  list of all element names declared in the DTD
193--
194--    - returns : a list of errors
195
196
197validateElements        :: [String] -> LA XmlTrees XmlTree
198validateElements elemNames -- dtdPart
199    = ( fromSLA [] ( unlistA
200                     >>>
201                     isDTDElement
202                     >>>
203                     (checkForUniqueElement $< getDTDAttrl)
204                   )
205      )
206      <+>
207      ( unlistA
208        >>>
209        isMixedContentElement
210        >>>
211        (checkMixedContent $< getDTDAttrl)
212      )
213      <+>
214      ( unlistA
215        >>>
216        isDTDElement
217        >>>
218        (checkContentModel elemNames $< getDTDAttrl)
219      )
220      where
221
222      -- Validates that an element is not declared multiple times
223
224      checkForUniqueElement :: Attributes -> SLA [String] XmlTree XmlTree
225      checkForUniqueElement al
226          = checkName name $
227            err ( "Element type " ++ show name ++
228                  " must not be declared more than once." )
229          where
230          name = dtd_name al
231
232      -- Validates that an element name only appears once in a mixed-content declaration
233
234      checkMixedContent :: Attributes -> XmlArrow
235      checkMixedContent al
236          = fromSLA [] ( getChildren
237                         >>>
238                         getChildren
239                         >>>
240                         isDTDName
241                         >>>
242                         (check $< getDTDAttrl)
243                       )
244            where
245            elemName = dtd_name al
246            check al'
247                = checkName name $
248                  err ( "The element type " ++ show name ++
249                         " was already specified in the mixed-content model of the element declaration " ++
250                         show elemName ++ "." )
251                where
252                name = dtd_name al'
253
254      -- Issues a warning if an element mentioned in a content model is not
255      -- declared in the DTD.
256      checkContentModel :: [String] -> Attributes -> XmlArrow
257      checkContentModel names al
258          | cm `elem` [v_children, v_mixed]
259              = getChildren >>> checkContent
260          | otherwise
261              = none
262          where
263          elemName = dtd_name al
264          cm       = dtd_type al
265
266          checkContent :: XmlArrow
267          checkContent
268              = choiceA
269                [ isDTDName    :-> ( checkName' $< getDTDAttrl )
270                , isDTDContent :-> ( getChildren >>> checkContent )
271                , this         :-> none
272                ]
273              where
274              checkName' al'
275                  | childElemName `elem` names
276                      = none
277                  | otherwise
278                      = warn ( "The element type "++ show childElemName ++
279                               ", used in content model of element "++ show elemName ++
280                               ", is not declared."
281                             )
282                  where
283                  childElemName = dtd_name al'
284
285-- |
286-- Validation of Attribute declarations.
287--
288-- (1) Issues a warning if an attribute is declared for an element type not itself
289--    decared.
290--
291--    Optinal warning: (3.3 \/ p. 24 in Spec)
292--
293--
294-- 2. Issues a warning if more than one definition is provided for the same
295--    attribute of a given element type. Fist declaration is binding, later
296--    definitions are ignored.
297--
298--    Optional warning: (3.3 \/ p.24 in Spec)
299--
300--
301-- 3. Issues a warning if the same Nmtoken occures more than once in enumerated
302--    attribute types of a single element type.
303--
304--    Optional warning: (3.3.1 \/ p.27 in Spec)
305--
306--
307-- 4. Validates that an element type has not more than one ID attribute defined.
308--
309--    Validity constraint: One ID per Element Type (3.3.1 \/ p.26 in Spec)
310--
311--
312-- 5. Validates that an element type has not more than one NOTATION attribute defined.
313--
314--    Validity constraint: One Notation per Element Type (3.3.1 \/ p.27 in Spec)
315--
316--
317-- 6. Validates that an ID attributes has the type #IMPLIED or #REQUIRED.
318--
319--    Validity constraint: ID Attribute Default (3.3.1 \/ p.26 in Spec)
320--
321--
322-- 7. Validates that all referenced notations are declared.
323--
324--    Validity constraint: Notation Attributes (3.3.1 \/ p.27 in Spec)
325--
326--
327-- 8. Validates that notations are not declared for EMPTY elements.
328--
329--    Validity constraint: No Notation on Empty Element (3.3.1 \/p.27 in Spec)
330--
331--
332-- 9. Validates that the default value matches the lexical constraints of it's type.
333--
334--    Validity constraint: Attribute default legal (3.3.2 \/ p.28 in Spec)
335--
336--
337--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
338--
339--    - 2.parameter elemNames :  list of all element names declared in the DTD
340--
341--    - 3.parameter notationNames :  list of all notation names declared in the DTD
342--
343--    - returns : a list of errors
344
345validateAttributes :: [String] -> [String] -> LA XmlTrees XmlTree
346validateAttributes elemNames notationNames
347    = -- 1. Find attributes for which no elements are declared
348      ( runCheck this (checkDeclaredElements elemNames) )
349      <+>
350      -- 2. Find attributes which are declared more than once
351      ( runNameCheck this checkForUniqueAttributeDeclaration )
352      <+>
353      -- 3. Find enumerated attribute types which nmtokens are declared more than once
354      ( runCheck (isEnumAttrType `orElse` isNotationAttrType) checkEnumeratedTypes )
355      <+>
356      -- 4. Validate that there exists only one ID attribute for an element
357      ( runNameCheck isIdAttrType checkForUniqueId )
358      <+>
359      -- 5. Validate that there exists only one NOTATION attribute for an element
360      ( runNameCheck isNotationAttrType checkForUniqueNotation )
361      <+>
362      -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED
363      ( runCheck isIdAttrType checkIdKindConstraint )
364      <+>
365      -- 7. Validate that all referenced notations are declared
366      ( runCheck isNotationAttrType (checkNotationDeclaration notationNames) )
367      <+>
368      -- 8. Validate that notations are not declared for EMPTY elements
369      ( checkNoNotationForEmptyElements $< listA ( unlistA
370                                                   >>>
371                                                   isEmptyElement
372                                                   >>>
373                                                   getDTDAttrValue a_name
374                                                 )
375      )
376      <+>
377      -- 9. Validate that the default value matches the lexical constraints of it's type
378      ( checkDefaultValueTypes $< this )
379
380      where
381      -- ------------------------------------------------------------
382      -- control structures
383
384      runCheck select check
385          = unlistA >>> isDTDAttlist
386            >>>
387            select
388            >>>
389            (check $< getDTDAttrl)
390
391      runNameCheck select check
392          = fromSLA [] $ runCheck select check
393
394      --------------------------------------------------------------------------
395
396      -- 1. Find attributes for which no elements are declared
397
398      checkDeclaredElements :: [String] -> Attributes -> XmlArrow
399      checkDeclaredElements elemNames' al
400          | en `elem` elemNames'
401              = none
402          | otherwise
403              = warn ( "The element type \""++ en ++ "\" used in dclaration "++
404                       "of attribute \""++ an ++"\" is not declared."
405                     )
406          where
407          en = dtd_name al
408          an = dtd_value al
409
410      --------------------------------------------------------------------------
411
412      -- 2. Find attributes which are declared more than once
413
414      checkForUniqueAttributeDeclaration ::  Attributes -> SLA [String] XmlTree XmlTree
415      checkForUniqueAttributeDeclaration al
416          = checkName name $
417            warn ( "Attribute \""++ aname ++"\" for element type \""++
418                   ename ++"\" is already declared. First "++
419                   "declaration will be used." )
420          where
421          ename = dtd_name al
422          aname = dtd_value al
423          name  = ename ++ "|" ++ aname
424
425      --------------------------------------------------------------------------
426
427      -- 3. Find enumerated attribute types which nmtokens are declared more than once
428
429      checkEnumeratedTypes :: Attributes -> XmlArrow
430      checkEnumeratedTypes al
431          = fromSLA [] ( getChildren
432                         >>>
433                         isDTDName
434                         >>>
435                         (checkForUniqueType $< getDTDAttrl)
436                       )
437          where
438          checkForUniqueType :: Attributes -> SLA [String] XmlTree XmlTree
439          checkForUniqueType al'
440              = checkName nmtoken $
441                warn ( "Nmtoken \""++ nmtoken ++"\" should not "++
442                       "occur more than once in attribute \""++ dtd_value al ++
443                       "\" for element \""++ dtd_name al ++ "\"." )
444              where
445              nmtoken = dtd_name al'
446
447      --------------------------------------------------------------------------
448
449      -- 4. Validate that there exists only one ID attribute for an element
450
451      checkForUniqueId :: Attributes -> SLA [String] XmlTree XmlTree
452      checkForUniqueId al
453          = checkName ename $
454            err ( "Element \""++ ename ++ "\" already has attribute of type "++
455                  "ID, another attribute \""++ dtd_value al ++ "\" of type ID is "++
456                  "not permitted." )
457          where
458          ename = dtd_name al
459
460      --------------------------------------------------------------------------
461
462      -- 5. Validate that there exists only one NOTATION attribute for an element
463
464      checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree
465      checkForUniqueNotation al
466          = checkName ename $
467            err ( "Element \""++ ename ++ "\" already has attribute of type "++
468                  "NOTATION, another attribute \""++ dtd_value al ++ "\" of type NOTATION "++
469                  "is not permitted." )
470          where
471          ename = dtd_name al
472
473      --------------------------------------------------------------------------
474
475      -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED
476
477      checkIdKindConstraint :: Attributes -> XmlArrow
478      checkIdKindConstraint al
479          | attKind `elem` [k_implied, k_required]
480              = none
481          | otherwise
482              = err ( "ID attribute \""++ dtd_value al ++"\" must have a declared default "++
483                      "of \"#IMPLIED\" or \"REQUIRED\"")
484          where
485          attKind = dtd_kind al
486
487
488      --------------------------------------------------------------------------
489
490      -- 7. Validate that all referenced notations are declared
491
492      checkNotationDeclaration :: [String] -> Attributes -> XmlArrow
493      checkNotationDeclaration notations al
494          = getChildren
495            >>>
496            isDTDName
497            >>>
498            (checkNotations $< getDTDAttrl)
499          where
500          checkNotations :: Attributes -> XmlArrow
501          checkNotations al'
502              | notation `elem` notations
503                  = none
504              | otherwise
505                  = err ( "The notation \""++ notation ++"\" must be declared when "++
506                          "referenced in the notation type list for attribute \""++ dtd_value al ++
507                          "\" of element \""++ dtd_name al ++"\"."
508                        )
509              where
510              notation = dtd_name al'
511
512      --------------------------------------------------------------------------
513
514      -- 8. Validate that notations are not declared for EMPTY elements
515
516      checkNoNotationForEmptyElements :: [String] -> LA XmlTrees XmlTree
517      checkNoNotationForEmptyElements emptyElems
518          = unlistA
519            >>>
520            isDTDAttlist
521            >>>
522            isNotationAttrType
523            >>>
524            (checkNoNotationForEmptyElement $< getDTDAttrl)
525          where
526          checkNoNotationForEmptyElement :: Attributes -> XmlArrow
527          checkNoNotationForEmptyElement al
528              | ename `elem` emptyElems
529                  = err ( "Attribute \""++ dtd_value al ++"\" of type NOTATION must not be "++
530                          "declared on the element \""++ ename ++"\" declared EMPTY."
531                        )
532              | otherwise
533                  = none
534              where
535              ename = dtd_name al
536
537      --------------------------------------------------------------------------
538
539      -- 9. Validate that default values meet the lexical constraints of the attribute types
540
541      checkDefaultValueTypes :: XmlTrees -> LA XmlTrees XmlTree
542      checkDefaultValueTypes dtdPart'
543          = unlistA >>> isDTDAttlist
544            >>>
545            isDefaultAttrKind
546            >>>
547            (checkAttributeValue dtdPart' $< this)
548
549-- ------------------------------------------------------------
550
551-- |
552-- Removes doublicate declarations from the DTD, which first declaration is
553-- binding. This is the case for ATTLIST and ENTITY declarations.
554--
555--    - returns : A function that replaces the children of DOCTYPE nodes by a list
556--               where all multiple declarations are removed.
557
558removeDoublicateDefs :: XmlArrow
559removeDoublicateDefs
560    = replaceChildren
561      ( fromSLA [] ( getChildren
562                     >>>
563                     choiceA [ isDTDAttlist :-> (removeDoubleAttlist $< getDTDAttrl)
564                             , isDTDEntity  :-> (removeDoubleEntity  $< getDTDAttrl)
565                             , this         :-> this
566                             ]
567                   )
568      )
569      `when`
570      isDTDDoctype
571    where
572    checkName' n'
573        = ifA ( getState
574                >>>
575                isA (n' `elem`)
576              )
577          none
578          (this >>> perform (nextState (n':)))
579
580    removeDoubleAttlist :: Attributes -> SLA [String] XmlTree XmlTree
581    removeDoubleAttlist al
582        = checkName' elemAttr
583        where
584        elemAttr = elemName ++ "|" ++ attrName
585        attrName = dtd_value al
586        elemName = dtd_name al
587
588    removeDoubleEntity  :: Attributes -> SLA [String] XmlTree XmlTree
589    removeDoubleEntity al
590        = checkName' (dtd_name al)
591
592-- ------------------------------------------------------------
593