1{-# LANGUAGE CPP                        #-}
2{-# LANGUAGE OverloadedStrings          #-}
3{-# LANGUAGE MultiParamTypeClasses      #-}
4{-# LANGUAGE FlexibleInstances          #-}
5{-# LANGUAGE FlexibleContexts           #-}
6{-# LANGUAGE UndecidableInstances       #-}
7module Commonmark.Html
8  ( Html
9  , htmlInline
10  , htmlBlock
11  , htmlText
12  , htmlRaw
13  , addAttribute
14  , renderHtml
15  , escapeURI
16  , escapeHtml
17  )
18where
19
20import           Commonmark.Types
21import           Commonmark.Entity (lookupEntity)
22import           Data.Text (Text)
23import qualified Data.Text as T
24import qualified Data.Text.Lazy as TL
25import           Data.Text.Lazy.Builder (Builder, fromText, toLazyText,
26                                         singleton)
27import           Data.Text.Encoding   (encodeUtf8)
28import qualified Data.ByteString.Char8 as B
29import qualified Data.Set as Set
30import           Text.Printf          (printf)
31import           Data.Char            (ord, isAlphaNum, isAscii, isSpace)
32import           Data.Maybe           (fromMaybe)
33#if !MIN_VERSION_base(4,11,0)
34import           Data.Semigroup
35#endif
36
37data ElementType =
38    InlineElement
39  | BlockElement
40
41data Html a =
42    HtmlElement !ElementType {-# UNPACK #-} !Text [Attribute] (Maybe (Html a))
43  | HtmlText {-# UNPACK #-} !Text
44  | HtmlRaw {-# UNPACK #-} !Text
45  | HtmlNull
46  | HtmlConcat !(Html a) !(Html a)
47
48instance Show (Html a) where
49  show = TL.unpack . renderHtml
50
51instance Semigroup (Html a) where
52  x <> HtmlNull                = x
53  HtmlNull <> x                = x
54  HtmlText t1 <> HtmlText t2   = HtmlText (t1 <> t2)
55  HtmlRaw t1 <> HtmlRaw t2     = HtmlRaw (t1 <> t2)
56  x <> y                       = HtmlConcat x y
57
58instance Monoid (Html a) where
59  mempty = HtmlNull
60  mappend = (<>)
61
62instance HasAttributes (Html a) where
63  addAttributes attrs x = foldr addAttribute x attrs
64
65instance ToPlainText (Html a) where
66  toPlainText h =
67    case h of
68      HtmlElement InlineElement "span" attr (Just x)
69        -> case lookup "data-emoji" attr of
70              Just alias -> ":" <> alias <> ":"
71              Nothing    -> toPlainText x
72      HtmlElement _ _ _ (Just x) -> toPlainText x
73      HtmlElement _ _ attrs Nothing
74                                 -> fromMaybe mempty $ lookup "alt" attrs
75      HtmlText t                 -> t
76      HtmlConcat x y             -> toPlainText x <> toPlainText y
77      _                          -> mempty
78
79
80-- This instance mirrors what is expected in the spec tests.
81instance Rangeable (Html a) => IsInline (Html a) where
82  lineBreak = htmlInline "br" Nothing <> nl
83  softBreak = nl
84  str t = htmlText t
85  entity t = case lookupEntity (T.drop 1 t) of
86                   Just t' -> htmlText t'
87                   Nothing -> htmlRaw t
88  escapedChar c = htmlText (T.singleton c)
89  emph ils = htmlInline "em" (Just ils)
90  strong ils = htmlInline "strong" (Just ils)
91  link target title ils =
92    addAttribute ("href", escapeURI target) .
93    (if T.null title
94        then id
95        else addAttribute ("title", title)) $
96    htmlInline "a" (Just ils)
97  image target title ils =
98    addAttribute ("src", escapeURI target) .
99    addAttribute ("alt", toPlainText ils) .
100    (if T.null title
101        then id
102        else addAttribute ("title", title)) $
103    htmlInline "img" Nothing
104  code t = htmlInline "code" (Just (htmlText t))
105  rawInline f t
106    | f == Format "html" = htmlRaw t
107    | otherwise          = mempty
108
109instance IsInline (Html a) => IsBlock (Html a) (Html a) where
110  paragraph ils = htmlBlock "p" (Just ils)
111  plain ils = ils <> nl
112  thematicBreak = htmlBlock "hr" Nothing
113  blockQuote bs = htmlBlock "blockquote" $ Just (nl <> bs)
114  codeBlock info t =
115    htmlBlock "pre" $ Just $
116    (if T.null lang
117        then id
118        else addAttribute ("class", "language-" <> lang)) $
119    htmlInline "code" $ Just (htmlText t)
120    where lang = T.takeWhile (not . isSpace) info
121  heading level ils = htmlBlock h (Just ils)
122    where h = case level of
123                   1 -> "h1"
124                   2 -> "h2"
125                   3 -> "h3"
126                   4 -> "h4"
127                   5 -> "h5"
128                   6 -> "h6"
129                   _ -> "p"
130  rawBlock f t
131    | f == Format "html" = htmlRaw t
132    | otherwise          = mempty
133  referenceLinkDefinition _ _ = mempty
134  list (BulletList _) lSpacing items =
135    htmlBlock "ul" $ Just (nl <> mconcat (map li items))
136   where li x = htmlBlock "li" $
137                   Just ((if lSpacing == TightList
138                             then mempty
139                             else nl) <> x)
140  list (OrderedList startnum enumtype _delimtype) lSpacing items =
141    (if startnum /= 1
142        then addAttribute ("start", T.pack (show startnum))
143        else id) .
144    (case enumtype of
145       Decimal  -> id
146       UpperAlpha -> addAttribute ("type", "A")
147       LowerAlpha -> addAttribute ("type", "a")
148       UpperRoman -> addAttribute ("type", "I")
149       LowerRoman -> addAttribute ("type", "i"))
150    $ htmlBlock "ol" $
151      Just (nl <> mconcat (map li items))
152   where li x = htmlBlock "li" $
153                   Just ((if lSpacing == TightList
154                             then mempty
155                             else nl) <> x)
156
157nl :: Html a
158nl = htmlRaw "\n"
159
160instance Rangeable (Html ()) where
161  ranged _ x = x
162
163instance Rangeable (Html SourceRange) where
164  ranged sr x = addAttribute ("data-sourcepos", T.pack (show sr)) x
165
166
167
168htmlInline :: Text -> Maybe (Html a) -> Html a
169htmlInline tagname = HtmlElement InlineElement tagname []
170
171htmlBlock :: Text -> Maybe (Html a) -> Html a
172htmlBlock tagname mbcontents = HtmlElement BlockElement tagname [] mbcontents
173
174htmlText :: Text -> Html a
175htmlText = HtmlText
176
177htmlRaw :: Text -> Html a
178htmlRaw = HtmlRaw
179
180addAttribute :: Attribute -> Html a -> Html a
181addAttribute attr (HtmlElement eltType tagname attrs mbcontents) =
182  HtmlElement eltType tagname (incorporateAttribute attr attrs) mbcontents
183addAttribute attr (HtmlText t)
184  = addAttribute attr $ HtmlElement InlineElement "span" [] $ Just (HtmlText t)
185addAttribute _ elt = elt
186
187incorporateAttribute :: Attribute -> [Attribute] -> [Attribute]
188incorporateAttribute (k, v) as =
189  case lookup k' as of
190    Nothing            -> (k', v) : as
191    Just v'            -> (if k' == "class"
192                              then ("class", v <> " " <> v')
193                              else (k', v')) :
194                          filter (\(x, _) -> x /= k') as
195 where
196  k' = if k `Set.member` html5Attributes
197            || "data-" `T.isPrefixOf` k
198          then k
199          else "data-" <> k
200
201html5Attributes :: Set.Set Text
202html5Attributes = Set.fromList
203  [ "abbr"
204  , "accept"
205  , "accept-charset"
206  , "accesskey"
207  , "action"
208  , "allow"
209  , "allowfullscreen"
210  , "allowpaymentrequest"
211  , "allowusermedia"
212  , "alt"
213  , "as"
214  , "async"
215  , "autocapitalize"
216  , "autocomplete"
217  , "autofocus"
218  , "autoplay"
219  , "charset"
220  , "checked"
221  , "cite"
222  , "class"
223  , "color"
224  , "cols"
225  , "colspan"
226  , "content"
227  , "contenteditable"
228  , "controls"
229  , "coords"
230  , "crossorigin"
231  , "data"
232  , "datetime"
233  , "decoding"
234  , "default"
235  , "defer"
236  , "dir"
237  , "dirname"
238  , "disabled"
239  , "download"
240  , "draggable"
241  , "enctype"
242  , "enterkeyhint"
243  , "for"
244  , "form"
245  , "formaction"
246  , "formenctype"
247  , "formmethod"
248  , "formnovalidate"
249  , "formtarget"
250  , "headers"
251  , "height"
252  , "hidden"
253  , "high"
254  , "href"
255  , "hreflang"
256  , "http-equiv"
257  , "id"
258  , "imagesizes"
259  , "imagesrcset"
260  , "inputmode"
261  , "integrity"
262  , "is"
263  , "ismap"
264  , "itemid"
265  , "itemprop"
266  , "itemref"
267  , "itemscope"
268  , "itemtype"
269  , "kind"
270  , "label"
271  , "lang"
272  , "list"
273  , "loading"
274  , "loop"
275  , "low"
276  , "manifest"
277  , "max"
278  , "maxlength"
279  , "media"
280  , "method"
281  , "min"
282  , "minlength"
283  , "multiple"
284  , "muted"
285  , "name"
286  , "nomodule"
287  , "nonce"
288  , "novalidate"
289  , "onabort"
290  , "onafterprint"
291  , "onauxclick"
292  , "onbeforeprint"
293  , "onbeforeunload"
294  , "onblur"
295  , "oncancel"
296  , "oncanplay"
297  , "oncanplaythrough"
298  , "onchange"
299  , "onclick"
300  , "onclose"
301  , "oncontextmenu"
302  , "oncopy"
303  , "oncuechange"
304  , "oncut"
305  , "ondblclick"
306  , "ondrag"
307  , "ondragend"
308  , "ondragenter"
309  , "ondragexit"
310  , "ondragleave"
311  , "ondragover"
312  , "ondragstart"
313  , "ondrop"
314  , "ondurationchange"
315  , "onemptied"
316  , "onended"
317  , "onerror"
318  , "onfocus"
319  , "onhashchange"
320  , "oninput"
321  , "oninvalid"
322  , "onkeydown"
323  , "onkeypress"
324  , "onkeyup"
325  , "onlanguagechange"
326  , "onload"
327  , "onloadeddata"
328  , "onloadedmetadata"
329  , "onloadend"
330  , "onloadstart"
331  , "onmessage"
332  , "onmessageerror"
333  , "onmousedown"
334  , "onmouseenter"
335  , "onmouseleave"
336  , "onmousemove"
337  , "onmouseout"
338  , "onmouseover"
339  , "onmouseup"
340  , "onoffline"
341  , "ononline"
342  , "onpagehide"
343  , "onpageshow"
344  , "onpaste"
345  , "onpause"
346  , "onplay"
347  , "onplaying"
348  , "onpopstate"
349  , "onprogress"
350  , "onratechange"
351  , "onrejectionhandled"
352  , "onreset"
353  , "onresize"
354  , "onscroll"
355  , "onsecuritypolicyviolation"
356  , "onseeked"
357  , "onseeking"
358  , "onselect"
359  , "onstalled"
360  , "onstorage"
361  , "onsubmit"
362  , "onsuspend"
363  , "ontimeupdate"
364  , "ontoggle"
365  , "onunhandledrejection"
366  , "onunload"
367  , "onvolumechange"
368  , "onwaiting"
369  , "onwheel"
370  , "open"
371  , "optimum"
372  , "pattern"
373  , "ping"
374  , "placeholder"
375  , "playsinline"
376  , "poster"
377  , "preload"
378  , "readonly"
379  , "referrerpolicy"
380  , "rel"
381  , "required"
382  , "reversed"
383  , "role"
384  , "rows"
385  , "rowspan"
386  , "sandbox"
387  , "scope"
388  , "selected"
389  , "shape"
390  , "size"
391  , "sizes"
392  , "slot"
393  , "span"
394  , "spellcheck"
395  , "src"
396  , "srcdoc"
397  , "srclang"
398  , "srcset"
399  , "start"
400  , "step"
401  , "style"
402  , "tabindex"
403  , "target"
404  , "title"
405  , "translate"
406  , "type"
407  , "typemustmatch"
408  , "updateviacache"
409  , "usemap"
410  , "value"
411  , "width"
412  , "workertype"
413  , "wrap"
414  ]
415
416
417renderHtml :: Html a -> TL.Text
418renderHtml = {-# SCC renderHtml #-} toLazyText . toBuilder
419
420toBuilder :: Html a -> Builder
421toBuilder HtmlNull = mempty
422toBuilder (HtmlConcat x y) = toBuilder x <> toBuilder y
423toBuilder (HtmlRaw t) = fromText t
424toBuilder (HtmlText t) = escapeHtml t
425toBuilder (HtmlElement eltType tagname attrs mbcontents) =
426  "<" <> fromText tagname <> mconcat (map toAttr attrs) <> filling <> nl'
427  where
428    toAttr (x,y) = " " <> fromText x <> "=\"" <> escapeHtml y <> "\""
429    nl' = case eltType of
430           BlockElement -> "\n"
431           _            -> mempty
432    filling = case mbcontents of
433                 Nothing   -> " />"
434                 Just cont -> ">" <> toBuilder cont <> "</" <>
435                              fromText tagname <> ">"
436
437escapeHtml :: Text -> Builder
438escapeHtml t =
439  case T.uncons post of
440    Just (c, rest) -> fromText pre <> escapeHtmlChar c <> escapeHtml rest
441    Nothing        -> fromText pre
442 where
443  (pre,post)        = T.break needsEscaping t
444  needsEscaping '<' = True
445  needsEscaping '>' = True
446  needsEscaping '&' = True
447  needsEscaping '"' = True
448  needsEscaping _   = False
449
450escapeHtmlChar :: Char -> Builder
451escapeHtmlChar '<' = "&lt;"
452escapeHtmlChar '>' = "&gt;"
453escapeHtmlChar '&' = "&amp;"
454escapeHtmlChar '"' = "&quot;"
455escapeHtmlChar c   = singleton c
456
457escapeURI :: Text -> Text
458escapeURI = mconcat . map escapeURIChar . B.unpack . encodeUtf8
459
460escapeURIChar :: Char -> Text
461escapeURIChar c
462  | isEscapable c = T.singleton '%' <> T.pack (printf "%02X" (ord c))
463  | otherwise     = T.singleton c
464  where isEscapable d = not (isAscii d && isAlphaNum d)
465                     && d `notElem` ['%','/','?',':','@','-','.','_','~','&',
466                                     '#','!','$','\'','(',')','*','+',',',
467                                     ';','=']
468
469