1{-# LANGUAGE CPP #-} 2{-# LANGUAGE OverloadedStrings #-} 3{-# LANGUAGE MultiParamTypeClasses #-} 4{-# LANGUAGE FlexibleInstances #-} 5{-# LANGUAGE FlexibleContexts #-} 6{-# LANGUAGE UndecidableInstances #-} 7module Commonmark.Html 8 ( Html 9 , htmlInline 10 , htmlBlock 11 , htmlText 12 , htmlRaw 13 , addAttribute 14 , renderHtml 15 , escapeURI 16 , escapeHtml 17 ) 18where 19 20import Commonmark.Types 21import Commonmark.Entity (lookupEntity) 22import Data.Text (Text) 23import qualified Data.Text as T 24import qualified Data.Text.Lazy as TL 25import Data.Text.Lazy.Builder (Builder, fromText, toLazyText, 26 singleton) 27import Data.Text.Encoding (encodeUtf8) 28import qualified Data.ByteString.Char8 as B 29import qualified Data.Set as Set 30import Text.Printf (printf) 31import Data.Char (ord, isAlphaNum, isAscii, isSpace) 32import Data.Maybe (fromMaybe) 33#if !MIN_VERSION_base(4,11,0) 34import Data.Semigroup 35#endif 36 37data ElementType = 38 InlineElement 39 | BlockElement 40 41data Html a = 42 HtmlElement !ElementType {-# UNPACK #-} !Text [Attribute] (Maybe (Html a)) 43 | HtmlText {-# UNPACK #-} !Text 44 | HtmlRaw {-# UNPACK #-} !Text 45 | HtmlNull 46 | HtmlConcat !(Html a) !(Html a) 47 48instance Show (Html a) where 49 show = TL.unpack . renderHtml 50 51instance Semigroup (Html a) where 52 x <> HtmlNull = x 53 HtmlNull <> x = x 54 HtmlText t1 <> HtmlText t2 = HtmlText (t1 <> t2) 55 HtmlRaw t1 <> HtmlRaw t2 = HtmlRaw (t1 <> t2) 56 x <> y = HtmlConcat x y 57 58instance Monoid (Html a) where 59 mempty = HtmlNull 60 mappend = (<>) 61 62instance HasAttributes (Html a) where 63 addAttributes attrs x = foldr addAttribute x attrs 64 65instance ToPlainText (Html a) where 66 toPlainText h = 67 case h of 68 HtmlElement InlineElement "span" attr (Just x) 69 -> case lookup "data-emoji" attr of 70 Just alias -> ":" <> alias <> ":" 71 Nothing -> toPlainText x 72 HtmlElement _ _ _ (Just x) -> toPlainText x 73 HtmlElement _ _ attrs Nothing 74 -> fromMaybe mempty $ lookup "alt" attrs 75 HtmlText t -> t 76 HtmlConcat x y -> toPlainText x <> toPlainText y 77 _ -> mempty 78 79 80-- This instance mirrors what is expected in the spec tests. 81instance Rangeable (Html a) => IsInline (Html a) where 82 lineBreak = htmlInline "br" Nothing <> nl 83 softBreak = nl 84 str t = htmlText t 85 entity t = case lookupEntity (T.drop 1 t) of 86 Just t' -> htmlText t' 87 Nothing -> htmlRaw t 88 escapedChar c = htmlText (T.singleton c) 89 emph ils = htmlInline "em" (Just ils) 90 strong ils = htmlInline "strong" (Just ils) 91 link target title ils = 92 addAttribute ("href", escapeURI target) . 93 (if T.null title 94 then id 95 else addAttribute ("title", title)) $ 96 htmlInline "a" (Just ils) 97 image target title ils = 98 addAttribute ("src", escapeURI target) . 99 addAttribute ("alt", toPlainText ils) . 100 (if T.null title 101 then id 102 else addAttribute ("title", title)) $ 103 htmlInline "img" Nothing 104 code t = htmlInline "code" (Just (htmlText t)) 105 rawInline f t 106 | f == Format "html" = htmlRaw t 107 | otherwise = mempty 108 109instance IsInline (Html a) => IsBlock (Html a) (Html a) where 110 paragraph ils = htmlBlock "p" (Just ils) 111 plain ils = ils <> nl 112 thematicBreak = htmlBlock "hr" Nothing 113 blockQuote bs = htmlBlock "blockquote" $ Just (nl <> bs) 114 codeBlock info t = 115 htmlBlock "pre" $ Just $ 116 (if T.null lang 117 then id 118 else addAttribute ("class", "language-" <> lang)) $ 119 htmlInline "code" $ Just (htmlText t) 120 where lang = T.takeWhile (not . isSpace) info 121 heading level ils = htmlBlock h (Just ils) 122 where h = case level of 123 1 -> "h1" 124 2 -> "h2" 125 3 -> "h3" 126 4 -> "h4" 127 5 -> "h5" 128 6 -> "h6" 129 _ -> "p" 130 rawBlock f t 131 | f == Format "html" = htmlRaw t 132 | otherwise = mempty 133 referenceLinkDefinition _ _ = mempty 134 list (BulletList _) lSpacing items = 135 htmlBlock "ul" $ Just (nl <> mconcat (map li items)) 136 where li x = htmlBlock "li" $ 137 Just ((if lSpacing == TightList 138 then mempty 139 else nl) <> x) 140 list (OrderedList startnum enumtype _delimtype) lSpacing items = 141 (if startnum /= 1 142 then addAttribute ("start", T.pack (show startnum)) 143 else id) . 144 (case enumtype of 145 Decimal -> id 146 UpperAlpha -> addAttribute ("type", "A") 147 LowerAlpha -> addAttribute ("type", "a") 148 UpperRoman -> addAttribute ("type", "I") 149 LowerRoman -> addAttribute ("type", "i")) 150 $ htmlBlock "ol" $ 151 Just (nl <> mconcat (map li items)) 152 where li x = htmlBlock "li" $ 153 Just ((if lSpacing == TightList 154 then mempty 155 else nl) <> x) 156 157nl :: Html a 158nl = htmlRaw "\n" 159 160instance Rangeable (Html ()) where 161 ranged _ x = x 162 163instance Rangeable (Html SourceRange) where 164 ranged sr x = addAttribute ("data-sourcepos", T.pack (show sr)) x 165 166 167 168htmlInline :: Text -> Maybe (Html a) -> Html a 169htmlInline tagname = HtmlElement InlineElement tagname [] 170 171htmlBlock :: Text -> Maybe (Html a) -> Html a 172htmlBlock tagname mbcontents = HtmlElement BlockElement tagname [] mbcontents 173 174htmlText :: Text -> Html a 175htmlText = HtmlText 176 177htmlRaw :: Text -> Html a 178htmlRaw = HtmlRaw 179 180addAttribute :: Attribute -> Html a -> Html a 181addAttribute attr (HtmlElement eltType tagname attrs mbcontents) = 182 HtmlElement eltType tagname (incorporateAttribute attr attrs) mbcontents 183addAttribute attr (HtmlText t) 184 = addAttribute attr $ HtmlElement InlineElement "span" [] $ Just (HtmlText t) 185addAttribute _ elt = elt 186 187incorporateAttribute :: Attribute -> [Attribute] -> [Attribute] 188incorporateAttribute (k, v) as = 189 case lookup k' as of 190 Nothing -> (k', v) : as 191 Just v' -> (if k' == "class" 192 then ("class", v <> " " <> v') 193 else (k', v')) : 194 filter (\(x, _) -> x /= k') as 195 where 196 k' = if k `Set.member` html5Attributes 197 || "data-" `T.isPrefixOf` k 198 then k 199 else "data-" <> k 200 201html5Attributes :: Set.Set Text 202html5Attributes = Set.fromList 203 [ "abbr" 204 , "accept" 205 , "accept-charset" 206 , "accesskey" 207 , "action" 208 , "allow" 209 , "allowfullscreen" 210 , "allowpaymentrequest" 211 , "allowusermedia" 212 , "alt" 213 , "as" 214 , "async" 215 , "autocapitalize" 216 , "autocomplete" 217 , "autofocus" 218 , "autoplay" 219 , "charset" 220 , "checked" 221 , "cite" 222 , "class" 223 , "color" 224 , "cols" 225 , "colspan" 226 , "content" 227 , "contenteditable" 228 , "controls" 229 , "coords" 230 , "crossorigin" 231 , "data" 232 , "datetime" 233 , "decoding" 234 , "default" 235 , "defer" 236 , "dir" 237 , "dirname" 238 , "disabled" 239 , "download" 240 , "draggable" 241 , "enctype" 242 , "enterkeyhint" 243 , "for" 244 , "form" 245 , "formaction" 246 , "formenctype" 247 , "formmethod" 248 , "formnovalidate" 249 , "formtarget" 250 , "headers" 251 , "height" 252 , "hidden" 253 , "high" 254 , "href" 255 , "hreflang" 256 , "http-equiv" 257 , "id" 258 , "imagesizes" 259 , "imagesrcset" 260 , "inputmode" 261 , "integrity" 262 , "is" 263 , "ismap" 264 , "itemid" 265 , "itemprop" 266 , "itemref" 267 , "itemscope" 268 , "itemtype" 269 , "kind" 270 , "label" 271 , "lang" 272 , "list" 273 , "loading" 274 , "loop" 275 , "low" 276 , "manifest" 277 , "max" 278 , "maxlength" 279 , "media" 280 , "method" 281 , "min" 282 , "minlength" 283 , "multiple" 284 , "muted" 285 , "name" 286 , "nomodule" 287 , "nonce" 288 , "novalidate" 289 , "onabort" 290 , "onafterprint" 291 , "onauxclick" 292 , "onbeforeprint" 293 , "onbeforeunload" 294 , "onblur" 295 , "oncancel" 296 , "oncanplay" 297 , "oncanplaythrough" 298 , "onchange" 299 , "onclick" 300 , "onclose" 301 , "oncontextmenu" 302 , "oncopy" 303 , "oncuechange" 304 , "oncut" 305 , "ondblclick" 306 , "ondrag" 307 , "ondragend" 308 , "ondragenter" 309 , "ondragexit" 310 , "ondragleave" 311 , "ondragover" 312 , "ondragstart" 313 , "ondrop" 314 , "ondurationchange" 315 , "onemptied" 316 , "onended" 317 , "onerror" 318 , "onfocus" 319 , "onhashchange" 320 , "oninput" 321 , "oninvalid" 322 , "onkeydown" 323 , "onkeypress" 324 , "onkeyup" 325 , "onlanguagechange" 326 , "onload" 327 , "onloadeddata" 328 , "onloadedmetadata" 329 , "onloadend" 330 , "onloadstart" 331 , "onmessage" 332 , "onmessageerror" 333 , "onmousedown" 334 , "onmouseenter" 335 , "onmouseleave" 336 , "onmousemove" 337 , "onmouseout" 338 , "onmouseover" 339 , "onmouseup" 340 , "onoffline" 341 , "ononline" 342 , "onpagehide" 343 , "onpageshow" 344 , "onpaste" 345 , "onpause" 346 , "onplay" 347 , "onplaying" 348 , "onpopstate" 349 , "onprogress" 350 , "onratechange" 351 , "onrejectionhandled" 352 , "onreset" 353 , "onresize" 354 , "onscroll" 355 , "onsecuritypolicyviolation" 356 , "onseeked" 357 , "onseeking" 358 , "onselect" 359 , "onstalled" 360 , "onstorage" 361 , "onsubmit" 362 , "onsuspend" 363 , "ontimeupdate" 364 , "ontoggle" 365 , "onunhandledrejection" 366 , "onunload" 367 , "onvolumechange" 368 , "onwaiting" 369 , "onwheel" 370 , "open" 371 , "optimum" 372 , "pattern" 373 , "ping" 374 , "placeholder" 375 , "playsinline" 376 , "poster" 377 , "preload" 378 , "readonly" 379 , "referrerpolicy" 380 , "rel" 381 , "required" 382 , "reversed" 383 , "role" 384 , "rows" 385 , "rowspan" 386 , "sandbox" 387 , "scope" 388 , "selected" 389 , "shape" 390 , "size" 391 , "sizes" 392 , "slot" 393 , "span" 394 , "spellcheck" 395 , "src" 396 , "srcdoc" 397 , "srclang" 398 , "srcset" 399 , "start" 400 , "step" 401 , "style" 402 , "tabindex" 403 , "target" 404 , "title" 405 , "translate" 406 , "type" 407 , "typemustmatch" 408 , "updateviacache" 409 , "usemap" 410 , "value" 411 , "width" 412 , "workertype" 413 , "wrap" 414 ] 415 416 417renderHtml :: Html a -> TL.Text 418renderHtml = {-# SCC renderHtml #-} toLazyText . toBuilder 419 420toBuilder :: Html a -> Builder 421toBuilder HtmlNull = mempty 422toBuilder (HtmlConcat x y) = toBuilder x <> toBuilder y 423toBuilder (HtmlRaw t) = fromText t 424toBuilder (HtmlText t) = escapeHtml t 425toBuilder (HtmlElement eltType tagname attrs mbcontents) = 426 "<" <> fromText tagname <> mconcat (map toAttr attrs) <> filling <> nl' 427 where 428 toAttr (x,y) = " " <> fromText x <> "=\"" <> escapeHtml y <> "\"" 429 nl' = case eltType of 430 BlockElement -> "\n" 431 _ -> mempty 432 filling = case mbcontents of 433 Nothing -> " />" 434 Just cont -> ">" <> toBuilder cont <> "</" <> 435 fromText tagname <> ">" 436 437escapeHtml :: Text -> Builder 438escapeHtml t = 439 case T.uncons post of 440 Just (c, rest) -> fromText pre <> escapeHtmlChar c <> escapeHtml rest 441 Nothing -> fromText pre 442 where 443 (pre,post) = T.break needsEscaping t 444 needsEscaping '<' = True 445 needsEscaping '>' = True 446 needsEscaping '&' = True 447 needsEscaping '"' = True 448 needsEscaping _ = False 449 450escapeHtmlChar :: Char -> Builder 451escapeHtmlChar '<' = "<" 452escapeHtmlChar '>' = ">" 453escapeHtmlChar '&' = "&" 454escapeHtmlChar '"' = """ 455escapeHtmlChar c = singleton c 456 457escapeURI :: Text -> Text 458escapeURI = mconcat . map escapeURIChar . B.unpack . encodeUtf8 459 460escapeURIChar :: Char -> Text 461escapeURIChar c 462 | isEscapable c = T.singleton '%' <> T.pack (printf "%02X" (ord c)) 463 | otherwise = T.singleton c 464 where isEscapable d = not (isAscii d && isAlphaNum d) 465 && d `notElem` ['%','/','?',':','@','-','.','_','~','&', 466 '#','!','$','\'','(',')','*','+',',', 467 ';','='] 468 469