1{-# LANGUAGE DeriveDataTypeable #-} 2-- | The central type in TagSoup 3 4module Text.HTML.TagSoup.Type( 5 -- * Data structures and parsing 6 StringLike, Tag(..), Attribute, Row, Column, 7 8 -- * Position manipulation 9 Position(..), tagPosition, nullPosition, positionChar, positionString, 10 11 -- * Tag identification 12 isTagOpen, isTagClose, isTagText, isTagWarning, isTagPosition, 13 isTagOpenName, isTagCloseName, isTagComment, 14 15 -- * Extraction 16 fromTagText, fromAttrib, 17 maybeTagText, maybeTagWarning, 18 innerText, 19 ) where 20 21 22import Data.List (foldl') 23import Data.Maybe (fromMaybe, mapMaybe) 24import Text.StringLike 25import Data.Data(Data, Typeable) 26 27-- | An HTML attribute @id=\"name\"@ generates @(\"id\",\"name\")@ 28type Attribute str = (str,str) 29 30-- | The row/line of a position, starting at 1 31type Row = Int 32 33-- | The column of a position, starting at 1 34type Column = Int 35 36 37--- All positions are stored as a row and a column, with (1,1) being the 38--- top-left position 39data Position = Position !Row !Column deriving (Show,Eq,Ord) 40 41nullPosition :: Position 42nullPosition = Position 1 1 43 44positionString :: Position -> String -> Position 45positionString = foldl' positionChar 46 47positionChar :: Position -> Char -> Position 48positionChar (Position r c) x = case x of 49 '\n' -> Position (r+1) 1 50 '\t' -> Position r (c + 8 - mod (c-1) 8) 51 _ -> Position r (c+1) 52 53tagPosition :: Position -> Tag str 54tagPosition (Position r c) = TagPosition r c 55 56 57-- | A single HTML element. A whole document is represented by a list of @Tag@. 58-- There is no requirement for 'TagOpen' and 'TagClose' to match. 59data Tag str = 60 TagOpen str [Attribute str] -- ^ An open tag with 'Attribute's in their original order 61 | TagClose str -- ^ A closing tag 62 | TagText str -- ^ A text node, guaranteed not to be the empty string 63 | TagComment str -- ^ A comment 64 | TagWarning str -- ^ Meta: A syntax error in the input file 65 | TagPosition !Row !Column -- ^ Meta: The position of a parsed element 66 deriving (Show, Eq, Ord, Data, Typeable) 67 68instance Functor Tag where 69 fmap f (TagOpen x y) = TagOpen (f x) [(f a, f b) | (a,b) <- y] 70 fmap f (TagClose x) = TagClose (f x) 71 fmap f (TagText x) = TagText (f x) 72 fmap f (TagComment x) = TagComment (f x) 73 fmap f (TagWarning x) = TagWarning (f x) 74 fmap f (TagPosition x y) = TagPosition x y 75 76 77-- | Test if a 'Tag' is a 'TagOpen' 78isTagOpen :: Tag str -> Bool 79isTagOpen (TagOpen {}) = True; isTagOpen _ = False 80 81-- | Test if a 'Tag' is a 'TagClose' 82isTagClose :: Tag str -> Bool 83isTagClose (TagClose {}) = True; isTagClose _ = False 84 85-- | Test if a 'Tag' is a 'TagText' 86isTagText :: Tag str -> Bool 87isTagText (TagText {}) = True; isTagText _ = False 88 89-- | Extract the string from within 'TagText', otherwise 'Nothing' 90maybeTagText :: Tag str -> Maybe str 91maybeTagText (TagText x) = Just x 92maybeTagText _ = Nothing 93 94-- | Extract the string from within 'TagText', crashes if not a 'TagText' 95fromTagText :: Show str => Tag str -> str 96fromTagText (TagText x) = x 97fromTagText x = error $ "(" ++ show x ++ ") is not a TagText" 98 99-- | Extract all text content from tags (similar to Verbatim found in HaXml) 100innerText :: StringLike str => [Tag str] -> str 101innerText = strConcat . mapMaybe maybeTagText 102 103-- | Test if a 'Tag' is a 'TagWarning' 104isTagWarning :: Tag str -> Bool 105isTagWarning (TagWarning {}) = True; isTagWarning _ = False 106 107-- | Extract the string from within 'TagWarning', otherwise 'Nothing' 108maybeTagWarning :: Tag str -> Maybe str 109maybeTagWarning (TagWarning x) = Just x 110maybeTagWarning _ = Nothing 111 112-- | Test if a 'Tag' is a 'TagPosition' 113isTagPosition :: Tag str -> Bool 114isTagPosition TagPosition{} = True; isTagPosition _ = False 115 116-- | Extract an attribute, crashes if not a 'TagOpen'. 117-- Returns @\"\"@ if no attribute present. 118-- 119-- Warning: does not distinquish between missing attribute 120-- and present attribute with value @\"\"@. 121fromAttrib :: (Show str, Eq str, StringLike str) => str -> Tag str -> str 122fromAttrib att tag = fromMaybe empty $ maybeAttrib att tag 123 124-- | Extract an attribute, crashes if not a 'TagOpen'. 125-- Returns @Nothing@ if no attribute present. 126maybeAttrib :: (Show str, Eq str) => str -> Tag str -> Maybe str 127maybeAttrib att (TagOpen _ atts) = lookup att atts 128maybeAttrib _ x = error ("(" ++ show x ++ ") is not a TagOpen") 129 130-- | Returns True if the 'Tag' is 'TagOpen' and matches the given name 131isTagOpenName :: Eq str => str -> Tag str -> Bool 132isTagOpenName name (TagOpen n _) = n == name 133isTagOpenName _ _ = False 134 135-- | Returns True if the 'Tag' is 'TagClose' and matches the given name 136isTagCloseName :: Eq str => str -> Tag str -> Bool 137isTagCloseName name (TagClose n) = n == name 138isTagCloseName _ _ = False 139 140-- | Test if a 'Tag' is a 'TagComment' 141isTagComment :: Tag str -> Bool 142isTagComment TagComment {} = True; isTagComment _ = False 143