1{-| 2 /NOTE/: This module is preliminary and may change at a future date. 3 4 This module is intended to help converting a list of tags into a 5 tree of tags. 6-} 7 8module Text.HTML.TagSoup.Tree 9 ( 10 TagTree(..), tagTree, parseTree, parseTreeOptions, ParseOptions(..), 11 flattenTree, renderTree, renderTreeOptions, RenderOptions(..), transformTree, universeTree 12 ) where 13 14import Text.HTML.TagSoup (parseTags, parseTagsOptions, renderTags, renderTagsOptions, ParseOptions(..), RenderOptions(..)) 15import Text.HTML.TagSoup.Type 16import Control.Arrow 17import GHC.Exts (build) 18 19 20-- | A tree of 'Tag' values. 21data TagTree str 22 = -- | A 'TagOpen'/'TagClose' pair with the 'Tag' values in between. 23 TagBranch str [Attribute str] [TagTree str] 24 | -- | Any leaf node 25 TagLeaf (Tag str) 26 deriving (Eq,Ord,Show) 27 28instance Functor TagTree where 29 fmap f (TagBranch x y z) = TagBranch (f x) (map (f***f) y) (map (fmap f) z) 30 fmap f (TagLeaf x) = TagLeaf (fmap f x) 31 32 33-- | Convert a list of tags into a tree. This version is not lazy at 34-- all, that is saved for version 2. 35tagTree :: Eq str => [Tag str] -> [TagTree str] 36tagTree = g 37 where 38 g :: Eq str => [Tag str] -> [TagTree str] 39 g [] = [] 40 g xs = a ++ map TagLeaf (take 1 b) ++ g (drop 1 b) 41 where (a,b) = f xs 42 43 -- the second tuple is either null or starts with a close 44 f :: Eq str => [Tag str] -> ([TagTree str],[Tag str]) 45 f (TagOpen name atts:rest) = 46 case f rest of 47 (inner,[]) -> (TagLeaf (TagOpen name atts):inner, []) 48 (inner,TagClose x:xs) 49 | x == name -> let (a,b) = f xs in (TagBranch name atts inner:a, b) 50 | otherwise -> (TagLeaf (TagOpen name atts):inner, TagClose x:xs) 51 _ -> error "TagSoup.Tree.tagTree: safe as - forall x . isTagClose (snd (f x))" 52 53 f (TagClose x:xs) = ([], TagClose x:xs) 54 f (x:xs) = (TagLeaf x:a,b) 55 where (a,b) = f xs 56 f [] = ([], []) 57 58-- | Build a 'TagTree' from a string. 59parseTree :: StringLike str => str -> [TagTree str] 60parseTree = tagTree . parseTags 61 62-- | Build a 'TagTree' from a string, specifying the 'ParseOptions'. 63parseTreeOptions :: StringLike str => ParseOptions str -> str -> [TagTree str] 64parseTreeOptions opts str = tagTree $ parseTagsOptions opts str 65 66-- | Flatten a 'TagTree' back to a list of 'Tag'. 67flattenTree :: [TagTree str] -> [Tag str] 68flattenTree xs = build $ flattenTreeFB xs 69 70flattenTreeFB :: [TagTree str] -> (Tag str -> lst -> lst) -> lst -> lst 71flattenTreeFB xs cons nil = flattenTreeOnto xs nil 72 where 73 flattenTreeOnto [] tags = tags 74 flattenTreeOnto (TagBranch name atts inner:trs) tags = 75 TagOpen name atts `cons` flattenTreeOnto inner (TagClose name `cons` flattenTreeOnto trs tags) 76 flattenTreeOnto (TagLeaf x:trs) tags = x `cons` flattenTreeOnto trs tags 77 78-- | Render a 'TagTree'. 79renderTree :: StringLike str => [TagTree str] -> str 80renderTree = renderTags . flattenTree 81 82-- | Render a 'TagTree' with some 'RenderOptions'. 83renderTreeOptions :: StringLike str => RenderOptions str -> [TagTree str] -> str 84renderTreeOptions opts trees = renderTagsOptions opts $ flattenTree trees 85 86-- | This operation is based on the Uniplate @universe@ function. Given a 87-- list of trees, it returns those trees, and all the children trees at 88-- any level. For example: 89-- 90-- > universeTree 91-- > [TagBranch "a" [("href","url")] [TagBranch "b" [] [TagLeaf (TagText "text")]]] 92-- > == [TagBranch "a" [("href","url")] [TagBranch "b" [] [TagLeaf (TagText "text")]]] 93-- > ,TagBranch "b" [] [TagLeaf (TagText "text")]] 94-- 95-- This operation is particularly useful for queries. To collect all @\"a\"@ 96-- tags in a tree, simply do: 97-- 98-- > [x | x@(TagBranch "a" _ _) <- universeTree tree] 99universeTree :: [TagTree str] -> [TagTree str] 100universeTree = concatMap f 101 where 102 f t@(TagBranch _ _ inner) = t : universeTree inner 103 f x = [x] 104 105 106-- | This operation is based on the Uniplate @transform@ function. Given a 107-- list of trees, it applies the function to every tree in a bottom-up 108-- manner. This operation is useful for manipulating a tree - for example 109-- to make all tag names upper case: 110-- 111-- > upperCase = transformTree f 112-- > where f (TagBranch name atts inner) = [TagBranch (map toUpper name) atts inner] 113-- > f x = [x] 114transformTree :: (TagTree str -> [TagTree str]) -> [TagTree str] -> [TagTree str] 115transformTree act = concatMap f 116 where 117 f (TagBranch a b inner) = act $ TagBranch a b (transformTree act inner) 118 f x = act x 119