1{-# LANGUAGE CPP #-} 2#if __GLASGOW_HASKELL__ 3{-# LANGUAGE DeriveDataTypeable #-} 4#if MIN_VERSION_base(4,4,0) 5{-# LANGUAGE DeriveGeneric #-} 6#endif 7#endif 8 9-- | 10-- Module: Data.XML.Types 11-- Copyright: 2010-2011 John Millikin 12-- License: MIT 13-- 14-- Basic types for representing XML. 15-- 16-- The idea is to have a full set of appropriate types, which various XML 17-- libraries can share. Instead of having equivalent-but-incompatible types 18-- for every binding, parser, or client, they all share the same types can 19-- can thus interoperate easily. 20-- 21-- This library contains complete types for most parts of an XML document, 22-- including the prologue, node tree, and doctype. Some basic combinators 23-- are included for common tasks, including traversing the node tree and 24-- filtering children. 25-- 26module Data.XML.Types 27 ( -- * Types 28 29 -- ** Document prologue 30 Document (..) 31 , Prologue (..) 32 , Instruction (..) 33 , Miscellaneous (..) 34 35 -- ** Document body 36 , Node (..) 37 , Element (..) 38 , Content (..) 39 , Name (..) 40 41 -- ** Doctypes 42 , Doctype (..) 43 , ExternalID (..) 44 45 -- ** Incremental processing 46 , Event (..) 47 48 -- * Combinators 49 50 -- ** Filters 51 , isElement 52 , isInstruction 53 , isContent 54 , isComment 55 , isNamed 56 57 -- ** Element traversal 58 , elementChildren 59 , elementContent 60 , elementText 61 62 -- ** Node traversal 63 , nodeChildren 64 , nodeContent 65 , nodeText 66 67 -- ** Attributes 68 , hasAttribute 69 , hasAttributeText 70 , attributeContent 71 , attributeText 72 ) where 73 74import Control.Monad ((>=>)) 75import Data.Function (on) 76import Data.Maybe (isJust) 77import Data.String (IsString, fromString) 78import Data.Text (Text) 79import qualified Data.Text as T 80import Control.DeepSeq (NFData(rnf)) 81 82#if __GLASGOW_HASKELL__ 83import Data.Typeable (Typeable) 84import Data.Data (Data) 85 86#if MIN_VERSION_base(4,4,0) 87import GHC.Generics (Generic) 88#endif 89#endif 90 91data Document = Document 92 { documentPrologue :: Prologue 93 , documentRoot :: Element 94 , documentEpilogue :: [Miscellaneous] 95 } 96 deriving (Eq, Ord, Show 97#if __GLASGOW_HASKELL__ 98 , Data, Typeable 99#if MIN_VERSION_base(4,4,0) 100 , Generic 101#endif 102#endif 103 ) 104 105instance NFData Document where 106 rnf (Document a b c) = rnf a `seq` rnf b `seq` rnf c `seq` () 107 108data Prologue = Prologue 109 { prologueBefore :: [Miscellaneous] 110 , prologueDoctype :: Maybe Doctype 111 , prologueAfter :: [Miscellaneous] 112 } 113 deriving (Eq, Ord, Show 114#if __GLASGOW_HASKELL__ 115 , Data, Typeable 116#if MIN_VERSION_base(4,4,0) 117 , Generic 118#endif 119#endif 120 ) 121 122instance NFData Prologue where 123 rnf (Prologue a b c) = rnf a `seq` rnf b `seq` rnf c `seq` () 124 125data Instruction = Instruction 126 { instructionTarget :: Text 127 , instructionData :: Text 128 } 129 deriving (Eq, Ord, Show 130#if __GLASGOW_HASKELL__ 131 , Data, Typeable 132#if MIN_VERSION_base(4,4,0) 133 , Generic 134#endif 135#endif 136 ) 137 138instance NFData Instruction where 139 rnf (Instruction a b) = rnf a `seq` rnf b `seq` () 140 141data Miscellaneous 142 = MiscInstruction Instruction 143 | MiscComment Text 144 deriving (Eq, Ord, Show 145#if __GLASGOW_HASKELL__ 146 , Data, Typeable 147#if MIN_VERSION_base(4,4,0) 148 , Generic 149#endif 150#endif 151 ) 152 153instance NFData Miscellaneous where 154 rnf (MiscInstruction a) = rnf a `seq` () 155 rnf (MiscComment a) = rnf a `seq` () 156 157data Node 158 = NodeElement Element 159 | NodeInstruction Instruction 160 | NodeContent Content 161 | NodeComment Text 162 deriving (Eq, Ord, Show 163#if __GLASGOW_HASKELL__ 164 , Data, Typeable 165#if MIN_VERSION_base(4,4,0) 166 , Generic 167#endif 168#endif 169 ) 170 171instance NFData Node where 172 rnf (NodeElement a) = rnf a `seq` () 173 rnf (NodeInstruction a) = rnf a `seq` () 174 rnf (NodeContent a) = rnf a `seq` () 175 rnf (NodeComment a) = rnf a `seq` () 176 177instance IsString Node where 178 fromString = NodeContent . fromString 179 180data Element = Element 181 { elementName :: Name 182 , elementAttributes :: [(Name, [Content])] 183 , elementNodes :: [Node] 184 } 185 deriving (Eq, Ord, Show 186#if __GLASGOW_HASKELL__ 187 , Data, Typeable 188#if MIN_VERSION_base(4,4,0) 189 , Generic 190#endif 191#endif 192 ) 193 194instance NFData Element where 195 rnf (Element a b c) = rnf a `seq` rnf b `seq` rnf c `seq` () 196 197data Content 198 = ContentText Text 199 | ContentEntity Text -- ^ For pass-through parsing 200 deriving (Eq, Ord, Show 201#if __GLASGOW_HASKELL__ 202 , Data, Typeable 203#if MIN_VERSION_base(4,4,0) 204 , Generic 205#endif 206#endif 207 ) 208 209instance NFData Content where 210 rnf (ContentText a) = rnf a `seq` () 211 rnf (ContentEntity a) = rnf a `seq` () 212 213instance IsString Content where 214 fromString = ContentText . fromString 215 216-- | A fully qualified name. 217-- 218-- Prefixes are not semantically important; they are included only to 219-- simplify pass-through parsing. When comparing names with 'Eq' or 'Ord' 220-- methods, prefixes are ignored. 221-- 222-- The @IsString@ instance supports Clark notation; see 223-- <http://www.jclark.com/xml/xmlns.htm> and 224-- <http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html>. Use 225-- the @OverloadedStrings@ language extension for very simple @Name@ 226-- construction: 227-- 228-- > myname :: Name 229-- > myname = "{http://example.com/ns/my-namespace}my-name" 230-- 231data Name = Name 232 { nameLocalName :: Text 233 , nameNamespace :: Maybe Text 234 , namePrefix :: Maybe Text 235 } 236 deriving (Show 237#if __GLASGOW_HASKELL__ 238 , Data, Typeable 239#if MIN_VERSION_base(4,4,0) 240 , Generic 241#endif 242#endif 243 ) 244 245instance Eq Name where 246 (==) = (==) `on` (\x -> (nameNamespace x, nameLocalName x)) 247 248instance Ord Name where 249 compare = compare `on` (\x -> (nameNamespace x, nameLocalName x)) 250 251instance IsString Name where 252 fromString "" = Name T.empty Nothing Nothing 253 fromString full@('{':rest) = case break (== '}') rest of 254 (_, "") -> error ("Invalid Clark notation: " ++ show full) 255 (ns, local) -> Name (T.pack (drop 1 local)) (Just (T.pack ns)) Nothing 256 fromString local = Name (T.pack local) Nothing Nothing 257 258instance NFData Name where 259 rnf (Name a b c) = rnf a `seq` rnf b `seq` rnf c `seq` () 260 261-- | Note: due to the incredible complexity of DTDs, this type only supports 262-- external subsets. I've tried adding internal subset types, but they 263-- quickly gain more code than the rest of this module put together. 264-- 265-- It is possible that some future version of this library might support 266-- internal subsets, but I am no longer actively working on adding them. 267data Doctype = Doctype 268 { doctypeName :: Text 269 , doctypeID :: Maybe ExternalID 270 } 271 deriving (Eq, Ord, Show 272#if __GLASGOW_HASKELL__ 273 , Data, Typeable 274#if MIN_VERSION_base(4,4,0) 275 , Generic 276#endif 277#endif 278 ) 279 280instance NFData Doctype where 281 rnf (Doctype a b) = rnf a `seq` rnf b `seq` () 282 283data ExternalID 284 = SystemID Text 285 | PublicID Text Text 286 deriving (Eq, Ord, Show 287#if __GLASGOW_HASKELL__ 288 , Data, Typeable 289#if MIN_VERSION_base(4,4,0) 290 , Generic 291#endif 292#endif 293 ) 294 295instance NFData ExternalID where 296 rnf (SystemID a) = rnf a `seq` () 297 rnf (PublicID a b) = rnf a `seq` rnf b `seq` () 298 299-- | Some XML processing tools are incremental, and work in terms of events 300-- rather than node trees. The 'Event' type allows a document to be fully 301-- specified as a sequence of events. 302-- 303-- Event-based XML libraries include: 304-- 305-- * <http://hackage.haskell.org/package/xml-enumerator> 306-- 307-- * <http://hackage.haskell.org/package/libxml-enumerator> 308-- 309-- * <http://hackage.haskell.org/package/expat-enumerator> 310-- 311data Event 312 = EventBeginDocument 313 | EventEndDocument 314 | EventBeginDoctype Text (Maybe ExternalID) 315 | EventEndDoctype 316 | EventInstruction Instruction 317 | EventBeginElement Name [(Name, [Content])] 318 | EventEndElement Name 319 | EventContent Content 320 | EventComment Text 321 | EventCDATA Text 322 deriving (Eq, Ord, Show 323#if __GLASGOW_HASKELL__ 324 , Data, Typeable 325#if MIN_VERSION_base(4,4,0) 326 , Generic 327#endif 328#endif 329 ) 330 331instance NFData Event where 332 rnf (EventBeginDoctype a b) = rnf a `seq` rnf b `seq` () 333 rnf (EventInstruction a) = rnf a `seq` () 334 rnf (EventBeginElement a b) = rnf a `seq` rnf b `seq` () 335 rnf (EventEndElement a) = rnf a `seq` () 336 rnf (EventContent a) = rnf a `seq` () 337 rnf (EventComment a) = rnf a `seq` () 338 rnf (EventCDATA a) = rnf a `seq` () 339 rnf _ = () 340 341isElement :: Node -> [Element] 342isElement (NodeElement e) = [e] 343isElement _ = [] 344 345isInstruction :: Node -> [Instruction] 346isInstruction (NodeInstruction i) = [i] 347isInstruction _ = [] 348 349isContent :: Node -> [Content] 350isContent (NodeContent c) = [c] 351isContent _ = [] 352 353isComment :: Node -> [Text] 354isComment (NodeComment t) = [t] 355isComment _ = [] 356 357isNamed :: Name -> Element -> [Element] 358isNamed n e = [e | elementName e == n] 359 360elementChildren :: Element -> [Element] 361elementChildren = elementNodes >=> isElement 362 363elementContent :: Element -> [Content] 364elementContent = elementNodes >=> isContent 365 366elementText :: Element -> [Text] 367elementText = elementContent >=> contentText 368 369nodeChildren :: Node -> [Node] 370nodeChildren = isElement >=> elementNodes 371 372nodeContent :: Node -> [Content] 373nodeContent = nodeChildren >=> isContent 374 375nodeText :: Node -> [Text] 376nodeText = nodeContent >=> contentText 377 378hasAttribute :: Name -> Element -> [Element] 379hasAttribute name e = [e | isJust (attributeContent name e)] 380 381hasAttributeText :: Name -> (Text -> Bool) -> Element -> [Element] 382hasAttributeText name p e = [e | maybe False p (attributeText name e)] 383 384attributeContent :: Name -> Element -> Maybe [Content] 385attributeContent name e = lookup name (elementAttributes e) 386 387attributeText :: Name -> Element -> Maybe Text 388attributeText name e = fmap contentFlat (attributeContent name e) 389 390contentText :: Content -> [Text] 391contentText (ContentText t) = [t] 392contentText (ContentEntity entity) = [T.pack "&", entity, T.pack ";"] 393 394contentFlat :: [Content] -> Text 395contentFlat cs = T.concat (cs >>= contentText) 396