1{-# LANGUAGE CPP  #-}
2{-# LANGUAGE FlexibleContexts  #-}
3{-# LANGUAGE OverloadedStrings #-}
4{-# LANGUAGE RankNTypes        #-}
5-- | 'Enumeratee's to render XML 'Event's. Unlike libxml-enumerator and
6-- expat-enumerator, this module does not provide IO and ST variants, since the
7-- underlying rendering operations are pure functions.
8module Text.XML.Stream.Render
9    ( -- * Rendering XML files
10      renderBuilder
11    , renderBuilderFlush
12    , renderBytes
13    , renderText
14    , prettify
15      -- * Renderer settings
16    , RenderSettings
17    , def
18    , rsPretty
19    , rsNamespaces
20    , rsAttrOrder
21    , rsUseCDATA
22    , rsXMLDeclaration
23    , orderAttrs
24      -- * Event rendering
25    , tag
26    , content
27      -- * Attribute rendering
28    , Attributes
29    , attr
30    , optionalAttr
31    ) where
32
33import           Control.Applicative          ((<$>))
34import           Control.Monad.Trans.Resource (MonadThrow)
35import           Data.ByteString              (ByteString)
36import           Data.ByteString.Builder      (Builder)
37import           Conduit
38import           Data.Default.Class           (Default (def))
39import           Data.List                    (foldl')
40import           Data.Map                     (Map)
41import qualified Data.Map                     as Map
42import           Data.Maybe                   (fromMaybe, mapMaybe)
43import           Data.Monoid                  (Monoid, mappend, mempty)
44import qualified Data.Set                     as Set
45import           Data.Text                    (Text)
46import qualified Data.Text                    as T
47import           Data.XML.Types               (Content (..), Event (..),
48                                               Name (..))
49import           Text.XML.Stream.Token
50
51-- | Render a stream of 'Event's into a stream of 'ByteString's. This function
52-- wraps around 'renderBuilder' and 'builderToByteString', so it produces
53-- optimally sized 'ByteString's with minimal buffer copying.
54--
55-- The output is UTF8 encoded.
56renderBytes :: PrimMonad m => RenderSettings -> ConduitT Event ByteString m ()
57renderBytes rs = renderBuilder rs .| builderToByteString
58
59-- | Render a stream of 'Event's into a stream of 'Text's. This function
60-- wraps around 'renderBuilder', 'builderToByteString' and 'renderBytes', so it
61-- produces optimally sized 'Text's with minimal buffer copying.
62renderText :: (PrimMonad m, MonadThrow m) => RenderSettings -> ConduitT Event Text m ()
63renderText rs = renderBytes rs .| decodeUtf8C
64
65data RenderSettings = RenderSettings
66    { rsPretty     :: Bool
67    , rsNamespaces :: [(Text, Text)]
68      -- ^ Defines some top level namespace definitions to be used, in the form
69      -- of (prefix, namespace). This has absolutely no impact on the meaning
70      -- of your documents, but can increase readability by moving commonly
71      -- used namespace declarations to the top level.
72    , rsAttrOrder  :: Name -> Map.Map Name Text -> [(Name, Text)]
73      -- ^ Specify how to turn the unordered attributes used by the "Text.XML"
74      -- module into an ordered list.
75    , rsUseCDATA   :: Content -> Bool
76      -- ^ Determines if for a given text content the renderer should use a
77      -- CDATA node.
78      --
79      -- Default: @False@
80      --
81      -- @since 1.3.3
82    , rsXMLDeclaration :: Bool
83      -- ^ Determines whether the XML declaration will be output.
84      --
85      -- Default: @True@
86      --
87      -- @since 1.5.1
88    }
89
90instance Default RenderSettings where
91    def = RenderSettings
92        { rsPretty = False
93        , rsNamespaces = []
94        , rsAttrOrder = const Map.toList
95        , rsUseCDATA = const False
96        , rsXMLDeclaration = True
97        }
98
99-- | Convenience function to create an ordering function suitable for
100-- use as the value of 'rsAttrOrder'. The ordering function is created
101-- from an explicit ordering of the attributes, specified as a list of
102-- tuples, as follows: In each tuple, the first component is the
103-- 'Name' of an element, and the second component is a list of
104-- attributes names. When the given element is rendered, the
105-- attributes listed, when present, appear first in the given order,
106-- followed by any other attributes in arbitrary order. If an element
107-- does not appear, all of its attributes are rendered in arbitrary
108-- order.
109orderAttrs :: [(Name, [Name])] ->
110              Name -> Map Name Text -> [(Name, Text)]
111orderAttrs orderSpec = order
112  where
113    order elt attrMap =
114      let initialAttrs = fromMaybe [] $ lookup elt orderSpec
115          mkPair attr' = (,) attr' <$> Map.lookup attr' attrMap
116          otherAttrMap =
117            Map.filterWithKey (const . not . (`elem` initialAttrs)) attrMap
118      in mapMaybe mkPair initialAttrs ++ Map.toAscList otherAttrMap
119
120-- | Render a stream of 'Event's into a stream of 'Builder's. Builders are from
121-- the blaze-builder package, and allow the create of optimally sized
122-- 'ByteString's with minimal buffer copying.
123renderBuilder :: Monad m => RenderSettings -> ConduitT Event Builder m ()
124renderBuilder settings = mapC Chunk .| renderBuilder' yield' settings
125  where
126    yield' Flush = return ()
127    yield' (Chunk bs) = yield bs
128
129-- | Same as 'renderBuilder' but allows you to flush XML stream to ensure that all
130-- events at needed point are rendered.
131--
132-- @since 1.3.5
133renderBuilderFlush :: Monad m => RenderSettings -> ConduitT (Flush Event) (Flush Builder) m ()
134renderBuilderFlush = renderBuilder' yield
135
136renderBuilder'
137  :: Monad m
138  => (Flush Builder -> ConduitT (Flush Event) o m ())
139  -> RenderSettings
140  -> ConduitT (Flush Event) o m ()
141renderBuilder' yield' settings =
142    if rsPretty settings
143    then prettify .| renderEvent'
144    else renderEvent'
145  where
146    renderEvent' = renderEvent yield' settings
147
148renderEvent
149  :: Monad m
150  => (Flush Builder -> ConduitT (Flush Event) o m ())
151  -> RenderSettings
152  -> ConduitT (Flush Event) o m ()
153renderEvent yield' RenderSettings { rsPretty = isPretty, rsNamespaces = namespaces0, rsUseCDATA = useCDATA, rsXMLDeclaration = useXMLDecl } =
154    loop []
155  where
156    loop nslevels = await >>= maybe (return ()) (go nslevels)
157
158    go nslevels Flush = yield' Flush >> loop nslevels
159    go nslevels (Chunk e) =
160        case e of
161            EventBeginElement n1 as -> do
162                mnext <- peekC
163                isClosed <-
164                    case mnext of
165                        Just (Chunk (EventEndElement n2)) | n1 == n2 -> do
166                            dropC 1
167                            return True
168                        _ -> return False
169                let (token, nslevels') = mkBeginToken isPretty isClosed namespaces0 nslevels n1 as
170                yield' $ Chunk token
171                loop nslevels'
172            _ -> do
173                let (token, nslevels') = eventToToken nslevels useCDATA useXMLDecl e
174                yield' $ Chunk token
175                loop nslevels'
176
177eventToToken :: Stack -> (Content -> Bool) -> Bool -> Event -> (Builder, [NSLevel])
178eventToToken s _ True EventBeginDocument =
179    (tokenToBuilder $ TokenXMLDeclaration
180            [ ("version", [ContentText "1.0"])
181            , ("encoding", [ContentText "UTF-8"])
182            ]
183     , s)
184eventToToken s _ False EventBeginDocument = (mempty, s)
185eventToToken s _ _ EventEndDocument = (mempty, s)
186eventToToken s _ _ (EventInstruction i) = (tokenToBuilder $ TokenInstruction i, s)
187eventToToken s _ _ (EventBeginDoctype n meid) = (tokenToBuilder $ TokenDoctype n meid [], s)
188eventToToken s _ _ EventEndDoctype = (mempty, s)
189eventToToken s _ _ (EventCDATA t) = (tokenToBuilder $ TokenCDATA t, s)
190eventToToken s _ _ (EventEndElement name) =
191    (tokenToBuilder $ TokenEndElement $ nameToTName sl name, s')
192  where
193    (sl:s') = s
194eventToToken s useCDATA _ (EventContent c)
195    | useCDATA c =
196        case c of
197          ContentText txt -> (tokenToBuilder $ TokenCDATA txt, s)
198          ContentEntity txt -> (tokenToBuilder $ TokenCDATA txt, s)
199    | otherwise  = (tokenToBuilder $ TokenContent c, s)
200eventToToken s _ _ (EventComment t) = (tokenToBuilder $ TokenComment t, s)
201eventToToken _ _ _ EventBeginElement{} = error "eventToToken on EventBeginElement" -- mkBeginToken False s name attrs
202
203type Stack = [NSLevel]
204
205nameToTName :: NSLevel -> Name -> TName
206nameToTName _ (Name name _ (Just pref))
207    | pref == "xml" = TName (Just "xml") name
208nameToTName _ (Name name Nothing _) = TName Nothing name -- invariant that this is true
209nameToTName (NSLevel def' sl) (Name name (Just ns) _)
210    | def' == Just ns = TName Nothing name
211    | otherwise =
212        case Map.lookup ns sl of
213            Nothing -> error "nameToTName"
214            Just pref -> TName (Just pref) name
215
216mkBeginToken :: Bool -- ^ pretty print attributes?
217             -> Bool -- ^ self closing?
218             -> [(Text, Text)] -- ^ namespaces to apply to top-level
219             -> Stack
220             -> Name
221             -> [(Name, [Content])]
222             -> (Builder, Stack)
223mkBeginToken isPretty isClosed namespaces0 s name attrs =
224    (tokenToBuilder $ TokenBeginElement tname tattrs3 isClosed indent,
225     if isClosed then s else sl3 : s)
226  where
227    indent = if isPretty then 2 + 4 * length s else 0
228    prevsl = case s of
229                [] -> NSLevel Nothing Map.empty
230                sl':_ -> sl'
231    (sl1, tname, tattrs1) = newElemStack prevsl name
232    (sl2, tattrs2) = foldr newAttrStack (sl1, tattrs1) $ nubAttrs attrs
233    (sl3, tattrs3) =
234        case s of
235            [] -> (sl2 { prefixes = Map.union (prefixes sl2) $ Map.fromList namespaceSL }, namespaceAttrs ++ tattrs2)
236            _ -> (sl2, tattrs2)
237
238    (namespaceSL, namespaceAttrs) = unzip $ mapMaybe unused namespaces0
239    unused (k, v) =
240        case lookup k' tattrs2 of
241            Just{} -> Nothing
242            Nothing -> Just ((v, k), (k', v'))
243      where
244        k' = TName (Just "xmlns") k
245        v' = [ContentText v]
246
247newElemStack :: NSLevel -> Name -> (NSLevel, TName, [TAttribute])
248newElemStack nsl@(NSLevel def' _) (Name local ns _)
249    | def' == ns = (nsl, TName Nothing local, [])
250newElemStack (NSLevel _ nsmap) (Name local Nothing _) =
251    (NSLevel Nothing nsmap, TName Nothing local, [(TName Nothing "xmlns", [])])
252newElemStack (NSLevel _ nsmap) (Name local (Just ns) Nothing) =
253    (NSLevel (Just ns) nsmap, TName Nothing local, [(TName Nothing "xmlns", [ContentText ns])])
254newElemStack (NSLevel def' nsmap) (Name local (Just ns) (Just pref)) =
255    case Map.lookup ns nsmap of
256        Just pref'
257            | pref == pref' ->
258                ( NSLevel def' nsmap
259                , TName (Just pref) local
260                , []
261                )
262        _ -> ( NSLevel def' nsmap'
263             , TName (Just pref) local
264             , [(TName (Just "xmlns") pref, [ContentText ns])]
265             )
266  where
267    nsmap' = Map.insert ns pref nsmap
268
269newAttrStack :: (Name, [Content]) -> (NSLevel, [TAttribute]) -> (NSLevel, [TAttribute])
270newAttrStack (name, value) (NSLevel def' nsmap, attrs) =
271    (NSLevel def' nsmap', addNS $ (tname, value) : attrs)
272  where
273    (nsmap', tname, addNS) =
274        case name of
275            Name local Nothing _ -> (nsmap, TName Nothing local, id)
276            Name local (Just ns) mpref ->
277                let ppref = fromMaybe "ns" mpref
278                    (pref, addNS') = getPrefix ppref nsmap ns
279                 in (Map.insert ns pref nsmap, TName (Just pref) local, addNS')
280
281getPrefix :: Text -> Map Text Text -> Text -> (Text, [TAttribute] -> [TAttribute])
282getPrefix _ _ "http://www.w3.org/XML/1998/namespace" = ("xml", id)
283getPrefix ppref nsmap ns =
284    case Map.lookup ns nsmap of
285        Just pref -> (pref, id)
286        Nothing ->
287            let pref = findUnused ppref $ Map.elems nsmap
288             in (pref, (:) (TName (Just "xmlns") pref, [ContentText ns]))
289  where
290    findUnused x xs
291        | x `elem` xs = findUnused (x `T.snoc` '_') xs
292        | otherwise = x
293
294-- | Convert a stream of 'Event's into a prettified one, adding extra
295-- whitespace. Note that this can change the meaning of your XML.
296prettify :: Monad m => ConduitT (Flush Event) (Flush Event) m ()
297prettify = prettify' 0
298
299prettify' :: Monad m => Int -> ConduitT (Flush Event) (Flush Event) m ()
300prettify' level =
301    await >>= maybe (return ()) goC
302  where
303    yield' = yield . Chunk
304
305    goC Flush = yield Flush >> prettify' level
306    goC (Chunk e) = go e
307
308    go e@EventBeginDocument = do
309        yield' e
310        yield' $ EventContent $ ContentText "\n"
311        prettify' level
312    go e@EventBeginElement{} = do
313        yield' before
314        yield' e
315        mnext <- peekC
316        case mnext of
317            Just (Chunk next@EventEndElement{}) -> do
318                dropC 1
319                yield' next
320                yield' after
321                prettify' level
322            _ -> do
323                yield' after
324                prettify' $ level + 1
325    go e@EventEndElement{} = do
326        let level' = max 0 $ level - 1
327        yield' $ before' level'
328        yield' e
329        yield' after
330        prettify' level'
331    go (EventContent c) = do
332        cs <- takeContents (c:)
333        let cs' = mapMaybe normalize cs
334        case cs' of
335            [] -> return ()
336            _ -> do
337                yield' before
338                mapM_ (yield' . EventContent) cs'
339                yield' after
340        prettify' level
341    go (EventCDATA t) = go $ EventContent $ ContentText t
342    go e@EventInstruction{} = do
343        yield' before
344        yield' e
345        yield' after
346        prettify' level
347    go (EventComment t) = do
348        yield' before
349        yield' $ EventComment $ T.concat
350            [ " "
351            , T.unwords $ T.words t
352            , " "
353            ]
354        yield' after
355        prettify' level
356
357    go e@EventEndDocument = yield' e >> prettify' level
358    go e@EventBeginDoctype{} = yield' e >> prettify' level
359    go e@EventEndDoctype{} = yield' e >> yield' after >> prettify' level
360
361    takeContents front = do
362        me <- peekC
363        case me of
364            Just (Chunk (EventContent c)) -> do
365                dropC 1
366                takeContents $ front . (c:)
367            Just (Chunk (EventCDATA t)) -> do
368                dropC 1
369                takeContents $ front . (ContentText t:)
370            _ -> return $ front []
371
372    normalize (ContentText t)
373        | T.null t' = Nothing
374        | otherwise = Just $ ContentText t'
375      where
376        t' = T.unwords $ T.words t
377    normalize c = Just c
378
379    before = EventContent $ ContentText $ T.replicate level "    "
380    before' l = EventContent $ ContentText $ T.replicate l "    "
381    after = EventContent $ ContentText "\n"
382
383nubAttrs :: [(Name, v)] -> [(Name, v)]
384nubAttrs orig =
385    front []
386  where
387    (front, _) = foldl' go (id, Set.empty) orig
388    go (dlist, used) (k, v)
389        | k `Set.member` used = (dlist, used)
390        | otherwise = (dlist . ((k, v):), Set.insert k used)
391
392
393-- | Generate a complete XML 'Element'.
394tag :: (Monad m) => Name -> Attributes -> ConduitT i Event m ()  -- ^ 'Element''s subnodes.
395                                       -> ConduitT i Event m ()
396tag name (Attributes a) content' = do
397  yield $ EventBeginElement name a
398  content'
399  yield $ EventEndElement name
400
401-- | Generate a textual 'EventContent'.
402content :: (Monad m) => Text -> ConduitT i Event m ()
403content = yield . EventContent . ContentText
404
405-- | A list of attributes.
406data Attributes = Attributes [(Name, [Content])]
407
408instance Monoid Attributes where
409  mempty = Attributes mempty
410#if !MIN_VERSION_base(4,11,0)
411  (Attributes a) `mappend` (Attributes b) = Attributes (a `mappend` b)
412#else
413instance Semigroup Attributes where
414  (Attributes a) <> (Attributes b) = Attributes (a <> b)
415#endif
416
417-- | Generate a single attribute.
418attr :: Name        -- ^ Attribute's name
419     -> Text        -- ^ Attribute's value
420     -> Attributes
421attr name value = Attributes [(name, [ContentText value])]
422
423-- | Helper function that generates a valid attribute if input isn't 'Nothing', or 'mempty' otherwise.
424optionalAttr :: Name -> Maybe Text -> Attributes
425optionalAttr name = maybe mempty (attr name)
426