1{-# LANGUAGE DeriveDataTypeable #-}
2-- | The central type in TagSoup
3
4module Text.HTML.TagSoup.Type(
5    -- * Data structures and parsing
6    StringLike, Tag(..), Attribute, Row, Column,
7
8    -- * Position manipulation
9    Position(..), tagPosition, nullPosition, positionChar, positionString,
10
11    -- * Tag identification
12    isTagOpen, isTagClose, isTagText, isTagWarning, isTagPosition,
13    isTagOpenName, isTagCloseName, isTagComment,
14
15    -- * Extraction
16    fromTagText, fromAttrib,
17    maybeTagText, maybeTagWarning,
18    innerText,
19    ) where
20
21
22import Data.List (foldl')
23import Data.Maybe (fromMaybe, mapMaybe)
24import Text.StringLike
25import Data.Data(Data, Typeable)
26
27-- | An HTML attribute @id=\"name\"@ generates @(\"id\",\"name\")@
28type Attribute str = (str,str)
29
30-- | The row/line of a position, starting at 1
31type Row = Int
32
33-- | The column of a position, starting at 1
34type Column = Int
35
36
37--- All positions are stored as a row and a column, with (1,1) being the
38--- top-left position
39data Position = Position !Row !Column deriving (Show,Eq,Ord)
40
41nullPosition :: Position
42nullPosition = Position 1 1
43
44positionString :: Position -> String -> Position
45positionString = foldl' positionChar
46
47positionChar :: Position -> Char -> Position
48positionChar (Position r c) x = case x of
49    '\n' -> Position (r+1) 1
50    '\t' -> Position r (c + 8 - mod (c-1) 8)
51    _    -> Position r (c+1)
52
53tagPosition :: Position -> Tag str
54tagPosition (Position r c) = TagPosition r c
55
56
57-- | A single HTML element. A whole document is represented by a list of @Tag@.
58--   There is no requirement for 'TagOpen' and 'TagClose' to match.
59data Tag str =
60     TagOpen str [Attribute str]  -- ^ An open tag with 'Attribute's in their original order
61   | TagClose str                 -- ^ A closing tag
62   | TagText str                  -- ^ A text node, guaranteed not to be the empty string
63   | TagComment str               -- ^ A comment
64   | TagWarning str               -- ^ Meta: A syntax error in the input file
65   | TagPosition !Row !Column     -- ^ Meta: The position of a parsed element
66     deriving (Show, Eq, Ord, Data, Typeable)
67
68instance Functor Tag where
69    fmap f (TagOpen x y) = TagOpen (f x) [(f a, f b) | (a,b) <- y]
70    fmap f (TagClose x) = TagClose (f x)
71    fmap f (TagText x) = TagText (f x)
72    fmap f (TagComment x) = TagComment (f x)
73    fmap f (TagWarning x) = TagWarning (f x)
74    fmap f (TagPosition x y) = TagPosition x y
75
76
77-- | Test if a 'Tag' is a 'TagOpen'
78isTagOpen :: Tag str -> Bool
79isTagOpen (TagOpen {})  = True; isTagOpen  _ = False
80
81-- | Test if a 'Tag' is a 'TagClose'
82isTagClose :: Tag str -> Bool
83isTagClose (TagClose {}) = True; isTagClose _ = False
84
85-- | Test if a 'Tag' is a 'TagText'
86isTagText :: Tag str -> Bool
87isTagText (TagText {})  = True; isTagText  _ = False
88
89-- | Extract the string from within 'TagText', otherwise 'Nothing'
90maybeTagText :: Tag str -> Maybe str
91maybeTagText (TagText x) = Just x
92maybeTagText _ = Nothing
93
94-- | Extract the string from within 'TagText', crashes if not a 'TagText'
95fromTagText :: Show str => Tag str -> str
96fromTagText (TagText x) = x
97fromTagText x = error $ "(" ++ show x ++ ") is not a TagText"
98
99-- | Extract all text content from tags (similar to Verbatim found in HaXml)
100innerText :: StringLike str => [Tag str] -> str
101innerText = strConcat . mapMaybe maybeTagText
102
103-- | Test if a 'Tag' is a 'TagWarning'
104isTagWarning :: Tag str -> Bool
105isTagWarning (TagWarning {})  = True; isTagWarning _ = False
106
107-- | Extract the string from within 'TagWarning', otherwise 'Nothing'
108maybeTagWarning :: Tag str -> Maybe str
109maybeTagWarning (TagWarning x) = Just x
110maybeTagWarning _ = Nothing
111
112-- | Test if a 'Tag' is a 'TagPosition'
113isTagPosition :: Tag str -> Bool
114isTagPosition TagPosition{} = True; isTagPosition _ = False
115
116-- | Extract an attribute, crashes if not a 'TagOpen'.
117--   Returns @\"\"@ if no attribute present.
118--
119-- Warning: does not distinquish between missing attribute
120-- and present attribute with value @\"\"@.
121fromAttrib :: (Show str, Eq str, StringLike str) => str -> Tag str -> str
122fromAttrib att tag = fromMaybe empty $ maybeAttrib att tag
123
124-- | Extract an attribute, crashes if not a 'TagOpen'.
125--   Returns @Nothing@ if no attribute present.
126maybeAttrib :: (Show str, Eq str) => str -> Tag str -> Maybe str
127maybeAttrib att (TagOpen _ atts) = lookup att atts
128maybeAttrib _ x = error ("(" ++ show x ++ ") is not a TagOpen")
129
130-- | Returns True if the 'Tag' is 'TagOpen' and matches the given name
131isTagOpenName :: Eq str => str -> Tag str -> Bool
132isTagOpenName name (TagOpen n _) = n == name
133isTagOpenName _ _ = False
134
135-- | Returns True if the 'Tag' is 'TagClose' and matches the given name
136isTagCloseName :: Eq str => str -> Tag str -> Bool
137isTagCloseName name (TagClose n) = n == name
138isTagCloseName _ _ = False
139
140-- | Test if a 'Tag' is a 'TagComment'
141isTagComment :: Tag str -> Bool
142isTagComment TagComment {} = True; isTagComment _ = False
143