1{-# LANGUAGE CPP #-}
2#if __GLASGOW_HASKELL__
3{-# LANGUAGE DeriveDataTypeable #-}
4#if MIN_VERSION_base(4,4,0)
5{-# LANGUAGE DeriveGeneric #-}
6#endif
7#endif
8
9-- |
10-- Module: Data.XML.Types
11-- Copyright: 2010-2011 John Millikin
12-- License: MIT
13--
14-- Basic types for representing XML.
15--
16-- The idea is to have a full set of appropriate types, which various XML
17-- libraries can share. Instead of having equivalent-but-incompatible types
18-- for every binding, parser, or client, they all share the same types can
19-- can thus interoperate easily.
20--
21-- This library contains complete types for most parts of an XML document,
22-- including the prologue, node tree, and doctype. Some basic combinators
23-- are included for common tasks, including traversing the node tree and
24-- filtering children.
25--
26module Data.XML.Types
27	( -- * Types
28
29	  -- ** Document prologue
30	  Document (..)
31	, Prologue (..)
32	, Instruction (..)
33	, Miscellaneous (..)
34
35	-- ** Document body
36	, Node (..)
37	, Element (..)
38	, Content (..)
39	, Name (..)
40
41	-- ** Doctypes
42	, Doctype (..)
43	, ExternalID (..)
44
45	-- ** Incremental processing
46	, Event (..)
47
48	-- * Combinators
49
50	-- ** Filters
51	, isElement
52	, isInstruction
53	, isContent
54	, isComment
55	, isNamed
56
57	-- ** Element traversal
58	, elementChildren
59	, elementContent
60	, elementText
61
62	-- ** Node traversal
63	, nodeChildren
64	, nodeContent
65	, nodeText
66
67	-- ** Attributes
68	, hasAttribute
69	, hasAttributeText
70	, attributeContent
71	, attributeText
72	) where
73
74import           Control.Monad ((>=>))
75import           Data.Function (on)
76import           Data.Maybe (isJust)
77import           Data.String (IsString, fromString)
78import           Data.Text (Text)
79import qualified Data.Text as T
80import           Control.DeepSeq (NFData(rnf))
81
82#if __GLASGOW_HASKELL__
83import           Data.Typeable (Typeable)
84import           Data.Data (Data)
85
86#if MIN_VERSION_base(4,4,0)
87import           GHC.Generics (Generic)
88#endif
89#endif
90
91data Document = Document
92	{ documentPrologue :: Prologue
93	, documentRoot :: Element
94	, documentEpilogue :: [Miscellaneous]
95	}
96	deriving (Eq, Ord, Show
97#if __GLASGOW_HASKELL__
98	, Data, Typeable
99#if MIN_VERSION_base(4,4,0)
100	, Generic
101#endif
102#endif
103	)
104
105instance NFData Document where
106	rnf (Document a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()
107
108data Prologue = Prologue
109	{ prologueBefore :: [Miscellaneous]
110	, prologueDoctype :: Maybe Doctype
111	, prologueAfter :: [Miscellaneous]
112	}
113	deriving (Eq, Ord, Show
114#if __GLASGOW_HASKELL__
115	, Data, Typeable
116#if MIN_VERSION_base(4,4,0)
117	, Generic
118#endif
119#endif
120	)
121
122instance NFData Prologue where
123	rnf (Prologue a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()
124
125data Instruction = Instruction
126	{ instructionTarget :: Text
127	, instructionData :: Text
128	}
129	deriving (Eq, Ord, Show
130#if __GLASGOW_HASKELL__
131	, Data, Typeable
132#if MIN_VERSION_base(4,4,0)
133	, Generic
134#endif
135#endif
136	)
137
138instance NFData Instruction where
139	rnf (Instruction a b) = rnf a `seq` rnf b `seq` ()
140
141data Miscellaneous
142	= MiscInstruction Instruction
143	| MiscComment Text
144	deriving (Eq, Ord, Show
145#if __GLASGOW_HASKELL__
146	, Data, Typeable
147#if MIN_VERSION_base(4,4,0)
148	, Generic
149#endif
150#endif
151	)
152
153instance NFData Miscellaneous where
154	rnf (MiscInstruction a) = rnf a `seq` ()
155	rnf (MiscComment a)     = rnf a `seq` ()
156
157data Node
158	= NodeElement Element
159	| NodeInstruction Instruction
160	| NodeContent Content
161	| NodeComment Text
162	deriving (Eq, Ord, Show
163#if __GLASGOW_HASKELL__
164	, Data, Typeable
165#if MIN_VERSION_base(4,4,0)
166	, Generic
167#endif
168#endif
169	)
170
171instance NFData Node where
172	rnf (NodeElement a)     = rnf a `seq` ()
173	rnf (NodeInstruction a) = rnf a `seq` ()
174	rnf (NodeContent a)     = rnf a `seq` ()
175	rnf (NodeComment a)     = rnf a `seq` ()
176
177instance IsString Node where
178	fromString = NodeContent . fromString
179
180data Element = Element
181	{ elementName :: Name
182	, elementAttributes :: [(Name, [Content])]
183	, elementNodes :: [Node]
184	}
185	deriving (Eq, Ord, Show
186#if __GLASGOW_HASKELL__
187	, Data, Typeable
188#if MIN_VERSION_base(4,4,0)
189	, Generic
190#endif
191#endif
192	)
193
194instance NFData Element where
195	rnf (Element a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()
196
197data Content
198	= ContentText Text
199	| ContentEntity Text -- ^ For pass-through parsing
200	deriving (Eq, Ord, Show
201#if __GLASGOW_HASKELL__
202	, Data, Typeable
203#if MIN_VERSION_base(4,4,0)
204	, Generic
205#endif
206#endif
207	)
208
209instance NFData Content where
210	rnf (ContentText a)   = rnf a `seq` ()
211	rnf (ContentEntity a) = rnf a `seq` ()
212
213instance IsString Content where
214	fromString = ContentText . fromString
215
216-- | A fully qualified name.
217--
218-- Prefixes are not semantically important; they are included only to
219-- simplify pass-through parsing. When comparing names with 'Eq' or 'Ord'
220-- methods, prefixes are ignored.
221--
222-- The @IsString@ instance supports Clark notation; see
223-- <http://www.jclark.com/xml/xmlns.htm> and
224-- <http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html>. Use
225-- the @OverloadedStrings@ language extension for very simple @Name@
226-- construction:
227--
228-- > myname :: Name
229-- > myname = "{http://example.com/ns/my-namespace}my-name"
230--
231data Name = Name
232	{ nameLocalName :: Text
233	, nameNamespace :: Maybe Text
234	, namePrefix :: Maybe Text
235	}
236	deriving (Show
237#if __GLASGOW_HASKELL__
238	, Data, Typeable
239#if MIN_VERSION_base(4,4,0)
240	, Generic
241#endif
242#endif
243	)
244
245instance Eq Name where
246	(==) = (==) `on` (\x -> (nameNamespace x, nameLocalName x))
247
248instance Ord Name where
249	compare = compare `on` (\x -> (nameNamespace x, nameLocalName x))
250
251instance IsString Name where
252	fromString "" = Name T.empty Nothing Nothing
253	fromString full@('{':rest) = case break (== '}') rest of
254		(_, "") -> error ("Invalid Clark notation: " ++ show full)
255		(ns, local) -> Name (T.pack (drop 1 local)) (Just (T.pack ns)) Nothing
256	fromString local = Name (T.pack local) Nothing Nothing
257
258instance NFData Name where
259	rnf (Name a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()
260
261-- | Note: due to the incredible complexity of DTDs, this type only supports
262-- external subsets. I've tried adding internal subset types, but they
263-- quickly gain more code than the rest of this module put together.
264--
265-- It is possible that some future version of this library might support
266-- internal subsets, but I am no longer actively working on adding them.
267data Doctype = Doctype
268	{ doctypeName :: Text
269	, doctypeID :: Maybe ExternalID
270	}
271	deriving (Eq, Ord, Show
272#if __GLASGOW_HASKELL__
273	, Data, Typeable
274#if MIN_VERSION_base(4,4,0)
275	, Generic
276#endif
277#endif
278	)
279
280instance NFData Doctype where
281	rnf (Doctype a b) = rnf a `seq` rnf b `seq` ()
282
283data ExternalID
284	= SystemID Text
285	| PublicID Text Text
286	deriving (Eq, Ord, Show
287#if __GLASGOW_HASKELL__
288	, Data, Typeable
289#if MIN_VERSION_base(4,4,0)
290	, Generic
291#endif
292#endif
293	)
294
295instance NFData ExternalID where
296	rnf (SystemID a)   = rnf a `seq` ()
297	rnf (PublicID a b) = rnf a `seq` rnf b `seq` ()
298
299-- | Some XML processing tools are incremental, and work in terms of events
300-- rather than node trees. The 'Event' type allows a document to be fully
301-- specified as a sequence of events.
302--
303-- Event-based XML libraries include:
304--
305-- * <http://hackage.haskell.org/package/xml-enumerator>
306--
307-- * <http://hackage.haskell.org/package/libxml-enumerator>
308--
309-- * <http://hackage.haskell.org/package/expat-enumerator>
310--
311data Event
312	= EventBeginDocument
313	| EventEndDocument
314	| EventBeginDoctype Text (Maybe ExternalID)
315	| EventEndDoctype
316	| EventInstruction Instruction
317	| EventBeginElement Name [(Name, [Content])]
318	| EventEndElement Name
319	| EventContent Content
320	| EventComment Text
321	| EventCDATA Text
322	deriving (Eq, Ord, Show
323#if __GLASGOW_HASKELL__
324	, Data, Typeable
325#if MIN_VERSION_base(4,4,0)
326	, Generic
327#endif
328#endif
329	)
330
331instance NFData Event where
332	rnf (EventBeginDoctype a b) = rnf a `seq` rnf b `seq` ()
333	rnf (EventInstruction a)    = rnf a `seq` ()
334	rnf (EventBeginElement a b) = rnf a `seq` rnf b `seq` ()
335	rnf (EventEndElement a)     = rnf a `seq` ()
336	rnf (EventContent a)        = rnf a `seq` ()
337	rnf (EventComment a)        = rnf a `seq` ()
338	rnf (EventCDATA a)          = rnf a `seq` ()
339	rnf _                       = ()
340
341isElement :: Node -> [Element]
342isElement (NodeElement e) = [e]
343isElement _ = []
344
345isInstruction :: Node -> [Instruction]
346isInstruction (NodeInstruction i) = [i]
347isInstruction _ = []
348
349isContent :: Node -> [Content]
350isContent (NodeContent c) = [c]
351isContent _ = []
352
353isComment :: Node -> [Text]
354isComment (NodeComment t) = [t]
355isComment _ = []
356
357isNamed :: Name -> Element -> [Element]
358isNamed n e = [e | elementName e == n]
359
360elementChildren :: Element -> [Element]
361elementChildren = elementNodes >=> isElement
362
363elementContent :: Element -> [Content]
364elementContent = elementNodes >=> isContent
365
366elementText :: Element -> [Text]
367elementText = elementContent >=> contentText
368
369nodeChildren :: Node -> [Node]
370nodeChildren = isElement >=> elementNodes
371
372nodeContent :: Node -> [Content]
373nodeContent = nodeChildren >=> isContent
374
375nodeText :: Node -> [Text]
376nodeText = nodeContent >=> contentText
377
378hasAttribute :: Name -> Element -> [Element]
379hasAttribute name e = [e | isJust (attributeContent name e)]
380
381hasAttributeText :: Name -> (Text -> Bool) -> Element -> [Element]
382hasAttributeText name p e = [e | maybe False p (attributeText name e)]
383
384attributeContent :: Name -> Element -> Maybe [Content]
385attributeContent name e = lookup name (elementAttributes e)
386
387attributeText :: Name -> Element -> Maybe Text
388attributeText name e = fmap contentFlat (attributeContent name e)
389
390contentText :: Content -> [Text]
391contentText (ContentText t) = [t]
392contentText (ContentEntity entity) = [T.pack "&", entity, T.pack ";"]
393
394contentFlat :: [Content] -> Text
395contentFlat cs = T.concat (cs >>= contentText)
396