1{-# LANGUAGE CPP                        #-}
2{-# LANGUAGE BangPatterns               #-}
3{-# LANGUAGE OverloadedStrings          #-}
4{-# LANGUAGE MultiParamTypeClasses      #-}
5{-# LANGUAGE FlexibleInstances          #-}
6{-# LANGUAGE FlexibleContexts           #-}
7{-# LANGUAGE UndecidableInstances       #-}
8module Commonmark.Html
9  ( Html
10  , htmlInline
11  , htmlBlock
12  , htmlText
13  , htmlRaw
14  , addAttribute
15  , renderHtml
16  , escapeURI
17  , escapeHtml
18  )
19where
20
21import           Commonmark.Types
22import           Commonmark.Entity (lookupEntity)
23import           Data.Text (Text)
24import qualified Data.Text as T
25import qualified Data.Text.Lazy as TL
26import           Data.Text.Lazy.Builder (Builder, fromText, toLazyText,
27                                         singleton)
28import           Data.Text.Encoding   (encodeUtf8)
29import qualified Data.ByteString.Char8 as B
30import           Text.Printf          (printf)
31import           Data.Char            (ord, isAlphaNum, isAscii, isSpace)
32import           Data.Maybe           (fromMaybe)
33#if !MIN_VERSION_base(4,11,0)
34import           Data.Semigroup
35#endif
36
37data ElementType =
38    InlineElement
39  | BlockElement
40
41data Html a =
42    HtmlElement !ElementType {-# UNPACK #-} !Text [Attribute] (Maybe (Html a))
43  | HtmlText {-# UNPACK #-} !Text
44  | HtmlRaw {-# UNPACK #-} !Text
45  | HtmlNull
46  | HtmlConcat !(Html a) !(Html a)
47
48instance Show (Html a) where
49  show = TL.unpack . renderHtml
50
51instance Semigroup (Html a) where
52  x <> HtmlNull                = x
53  HtmlNull <> x                = x
54  HtmlText t1 <> HtmlText t2   = HtmlText (t1 <> t2)
55  HtmlRaw t1 <> HtmlRaw t2     = HtmlRaw (t1 <> t2)
56  x <> y                       = HtmlConcat x y
57
58instance Monoid (Html a) where
59  mempty = HtmlNull
60  mappend = (<>)
61
62instance HasAttributes (Html a) where
63  addAttributes attrs x = foldr addAttribute x attrs
64
65instance ToPlainText (Html a) where
66  toPlainText h =
67    case h of
68      HtmlElement InlineElement "span" attr (Just x)
69        -> case lookup "data-emoji" attr of
70              Just alias -> ":" <> alias <> ":"
71              Nothing    -> toPlainText x
72      HtmlElement _ _ _ (Just x) -> toPlainText x
73      HtmlElement _ _ attrs Nothing
74                                 -> fromMaybe mempty $ lookup "alt" attrs
75      HtmlText t                 -> t
76      HtmlConcat x y             -> toPlainText x <> toPlainText y
77      _                          -> mempty
78
79
80-- This instance mirrors what is expected in the spec tests.
81instance Rangeable (Html a) => IsInline (Html a) where
82  lineBreak = htmlInline "br" Nothing <> nl
83  softBreak = nl
84  str t = htmlText t
85  entity t = case lookupEntity (T.drop 1 t) of
86                   Just t' -> htmlText t'
87                   Nothing -> htmlRaw t
88  escapedChar c = htmlText (T.singleton c)
89  emph ils = htmlInline "em" (Just ils)
90  strong ils = htmlInline "strong" (Just ils)
91  link target title ils =
92    addAttribute ("href", escapeURI target) .
93    (if T.null title
94        then id
95        else addAttribute ("title", title)) $
96    htmlInline "a" (Just ils)
97  image target title ils =
98    addAttribute ("src", escapeURI target) .
99    addAttribute ("alt", toPlainText ils) .
100    (if T.null title
101        then id
102        else addAttribute ("title", title)) $
103    htmlInline "img" Nothing
104  code t = htmlInline "code" (Just (htmlText t))
105  rawInline f t
106    | f == Format "html" = htmlRaw t
107    | otherwise          = mempty
108
109instance IsInline (Html a) => IsBlock (Html a) (Html a) where
110  paragraph ils = htmlBlock "p" (Just ils)
111  plain ils = ils <> nl
112  thematicBreak = htmlBlock "hr" Nothing
113  blockQuote bs = htmlBlock "blockquote" $ Just (nl <> bs)
114  codeBlock info t =
115    htmlBlock "pre" $ Just $
116    (if T.null lang
117        then id
118        else addAttribute ("class", "language-" <> lang)) $
119    htmlInline "code" $ Just (htmlText t)
120    where lang = T.takeWhile (not . isSpace) info
121  heading level ils = htmlBlock h (Just ils)
122    where h = case level of
123                   1 -> "h1"
124                   2 -> "h2"
125                   3 -> "h3"
126                   4 -> "h4"
127                   5 -> "h5"
128                   6 -> "h6"
129                   _ -> "p"
130  rawBlock f t
131    | f == Format "html" = htmlRaw t
132    | otherwise          = mempty
133  referenceLinkDefinition _ _ = mempty
134  list (BulletList _) lSpacing items =
135    htmlBlock "ul" $ Just (nl <> mconcat (map li items))
136   where li x = htmlBlock "li" $
137                   Just ((if lSpacing == TightList
138                             then mempty
139                             else nl) <> x)
140  list (OrderedList startnum enumtype _delimtype) lSpacing items =
141    (if startnum /= 1
142        then addAttribute ("start", T.pack (show startnum))
143        else id) .
144    (case enumtype of
145       Decimal  -> id
146       UpperAlpha -> addAttribute ("type", "A")
147       LowerAlpha -> addAttribute ("type", "a")
148       UpperRoman -> addAttribute ("type", "I")
149       LowerRoman -> addAttribute ("type", "i"))
150    $ htmlBlock "ol" $
151      Just (nl <> mconcat (map li items))
152   where li x = htmlBlock "li" $
153                   Just ((if lSpacing == TightList
154                             then mempty
155                             else nl) <> x)
156
157nl :: Html a
158nl = htmlRaw "\n"
159
160instance Rangeable (Html ()) where
161  ranged _ x = x
162
163instance Rangeable (Html SourceRange) where
164  ranged sr x = addAttribute ("data-sourcepos", T.pack (show sr)) x
165
166
167
168htmlInline :: Text -> Maybe (Html a) -> Html a
169htmlInline tagname mbcontents = HtmlElement InlineElement tagname [] mbcontents
170
171htmlBlock :: Text -> Maybe (Html a) -> Html a
172htmlBlock tagname mbcontents = HtmlElement BlockElement tagname [] mbcontents
173
174htmlText :: Text -> Html a
175htmlText = HtmlText
176
177htmlRaw :: Text -> Html a
178htmlRaw = HtmlRaw
179
180addAttribute :: Attribute -> Html a -> Html a
181addAttribute attr (HtmlElement eltType tagname attrs mbcontents) =
182  HtmlElement eltType tagname (incorporateAttribute attr attrs) mbcontents
183addAttribute attr (HtmlText t)
184  = HtmlElement InlineElement "span" [attr] $ Just (HtmlText t)
185addAttribute _ elt = elt
186
187incorporateAttribute :: Attribute -> [Attribute] -> [Attribute]
188incorporateAttribute (k, v) as =
189  case lookup k as of
190    Nothing            -> (k, v) : as
191    Just v'            -> (if k == "class"
192                              then ("class", v <> " " <> v')
193                              else (k, v')) :
194                          filter (\(x, _) -> x /= k) as
195
196renderHtml :: Html a -> TL.Text
197renderHtml = {-# SCC renderHtml #-} toLazyText . toBuilder
198
199toBuilder :: Html a -> Builder
200toBuilder (HtmlNull) = mempty
201toBuilder (HtmlConcat x y) = toBuilder x <> toBuilder y
202toBuilder (HtmlRaw t) = fromText t
203toBuilder (HtmlText t) = escapeHtml t
204toBuilder (HtmlElement eltType tagname attrs mbcontents) =
205  "<" <> fromText tagname <> mconcat (map toAttr attrs) <> filling <> nl'
206  where
207    toAttr (x,y) = " " <> fromText x <> "=\"" <> escapeHtml y <> "\""
208    nl' = case eltType of
209           BlockElement -> "\n"
210           _            -> mempty
211    filling = case mbcontents of
212                 Nothing   -> " />"
213                 Just cont -> ">" <> toBuilder cont <> "</" <>
214                              fromText tagname <> ">"
215
216escapeHtml :: Text -> Builder
217escapeHtml t =
218  case T.uncons post of
219    Just (c, rest) -> fromText pre <> escapeHtmlChar c <> escapeHtml rest
220    Nothing        -> fromText pre
221 where
222  (pre,post)        = T.break needsEscaping t
223  needsEscaping '<' = True
224  needsEscaping '>' = True
225  needsEscaping '&' = True
226  needsEscaping '"' = True
227  needsEscaping _   = False
228
229escapeHtmlChar :: Char -> Builder
230escapeHtmlChar '<' = "&lt;"
231escapeHtmlChar '>' = "&gt;"
232escapeHtmlChar '&' = "&amp;"
233escapeHtmlChar '"' = "&quot;"
234escapeHtmlChar c   = singleton c
235
236escapeURI :: Text -> Text
237escapeURI = mconcat . map escapeURIChar . B.unpack . encodeUtf8
238
239escapeURIChar :: Char -> Text
240escapeURIChar c
241  | isEscapable c = T.singleton '%' <> T.pack (printf "%02X" (ord c))
242  | otherwise     = T.singleton c
243  where isEscapable d = not (isAscii d && isAlphaNum d)
244                     && d `notElem` ['%','/','?',':','@','-','.','_','~','&',
245                                     '#','!','$','\'','(',')','*','+',',',
246                                     ';','=']
247
248