1{-# LANGUAGE CPP #-} 2{-# LANGUAGE BangPatterns #-} 3{-# LANGUAGE OverloadedStrings #-} 4{-# LANGUAGE MultiParamTypeClasses #-} 5{-# LANGUAGE FlexibleInstances #-} 6{-# LANGUAGE FlexibleContexts #-} 7{-# LANGUAGE UndecidableInstances #-} 8module Commonmark.Html 9 ( Html 10 , htmlInline 11 , htmlBlock 12 , htmlText 13 , htmlRaw 14 , addAttribute 15 , renderHtml 16 , escapeURI 17 , escapeHtml 18 ) 19where 20 21import Commonmark.Types 22import Commonmark.Entity (lookupEntity) 23import Data.Text (Text) 24import qualified Data.Text as T 25import qualified Data.Text.Lazy as TL 26import Data.Text.Lazy.Builder (Builder, fromText, toLazyText, 27 singleton) 28import Data.Text.Encoding (encodeUtf8) 29import qualified Data.ByteString.Char8 as B 30import Text.Printf (printf) 31import Data.Char (ord, isAlphaNum, isAscii, isSpace) 32import Data.Maybe (fromMaybe) 33#if !MIN_VERSION_base(4,11,0) 34import Data.Semigroup 35#endif 36 37data ElementType = 38 InlineElement 39 | BlockElement 40 41data Html a = 42 HtmlElement !ElementType {-# UNPACK #-} !Text [Attribute] (Maybe (Html a)) 43 | HtmlText {-# UNPACK #-} !Text 44 | HtmlRaw {-# UNPACK #-} !Text 45 | HtmlNull 46 | HtmlConcat !(Html a) !(Html a) 47 48instance Show (Html a) where 49 show = TL.unpack . renderHtml 50 51instance Semigroup (Html a) where 52 x <> HtmlNull = x 53 HtmlNull <> x = x 54 HtmlText t1 <> HtmlText t2 = HtmlText (t1 <> t2) 55 HtmlRaw t1 <> HtmlRaw t2 = HtmlRaw (t1 <> t2) 56 x <> y = HtmlConcat x y 57 58instance Monoid (Html a) where 59 mempty = HtmlNull 60 mappend = (<>) 61 62instance HasAttributes (Html a) where 63 addAttributes attrs x = foldr addAttribute x attrs 64 65instance ToPlainText (Html a) where 66 toPlainText h = 67 case h of 68 HtmlElement InlineElement "span" attr (Just x) 69 -> case lookup "data-emoji" attr of 70 Just alias -> ":" <> alias <> ":" 71 Nothing -> toPlainText x 72 HtmlElement _ _ _ (Just x) -> toPlainText x 73 HtmlElement _ _ attrs Nothing 74 -> fromMaybe mempty $ lookup "alt" attrs 75 HtmlText t -> t 76 HtmlConcat x y -> toPlainText x <> toPlainText y 77 _ -> mempty 78 79 80-- This instance mirrors what is expected in the spec tests. 81instance Rangeable (Html a) => IsInline (Html a) where 82 lineBreak = htmlInline "br" Nothing <> nl 83 softBreak = nl 84 str t = htmlText t 85 entity t = case lookupEntity (T.drop 1 t) of 86 Just t' -> htmlText t' 87 Nothing -> htmlRaw t 88 escapedChar c = htmlText (T.singleton c) 89 emph ils = htmlInline "em" (Just ils) 90 strong ils = htmlInline "strong" (Just ils) 91 link target title ils = 92 addAttribute ("href", escapeURI target) . 93 (if T.null title 94 then id 95 else addAttribute ("title", title)) $ 96 htmlInline "a" (Just ils) 97 image target title ils = 98 addAttribute ("src", escapeURI target) . 99 addAttribute ("alt", toPlainText ils) . 100 (if T.null title 101 then id 102 else addAttribute ("title", title)) $ 103 htmlInline "img" Nothing 104 code t = htmlInline "code" (Just (htmlText t)) 105 rawInline f t 106 | f == Format "html" = htmlRaw t 107 | otherwise = mempty 108 109instance IsInline (Html a) => IsBlock (Html a) (Html a) where 110 paragraph ils = htmlBlock "p" (Just ils) 111 plain ils = ils <> nl 112 thematicBreak = htmlBlock "hr" Nothing 113 blockQuote bs = htmlBlock "blockquote" $ Just (nl <> bs) 114 codeBlock info t = 115 htmlBlock "pre" $ Just $ 116 (if T.null lang 117 then id 118 else addAttribute ("class", "language-" <> lang)) $ 119 htmlInline "code" $ Just (htmlText t) 120 where lang = T.takeWhile (not . isSpace) info 121 heading level ils = htmlBlock h (Just ils) 122 where h = case level of 123 1 -> "h1" 124 2 -> "h2" 125 3 -> "h3" 126 4 -> "h4" 127 5 -> "h5" 128 6 -> "h6" 129 _ -> "p" 130 rawBlock f t 131 | f == Format "html" = htmlRaw t 132 | otherwise = mempty 133 referenceLinkDefinition _ _ = mempty 134 list (BulletList _) lSpacing items = 135 htmlBlock "ul" $ Just (nl <> mconcat (map li items)) 136 where li x = htmlBlock "li" $ 137 Just ((if lSpacing == TightList 138 then mempty 139 else nl) <> x) 140 list (OrderedList startnum enumtype _delimtype) lSpacing items = 141 (if startnum /= 1 142 then addAttribute ("start", T.pack (show startnum)) 143 else id) . 144 (case enumtype of 145 Decimal -> id 146 UpperAlpha -> addAttribute ("type", "A") 147 LowerAlpha -> addAttribute ("type", "a") 148 UpperRoman -> addAttribute ("type", "I") 149 LowerRoman -> addAttribute ("type", "i")) 150 $ htmlBlock "ol" $ 151 Just (nl <> mconcat (map li items)) 152 where li x = htmlBlock "li" $ 153 Just ((if lSpacing == TightList 154 then mempty 155 else nl) <> x) 156 157nl :: Html a 158nl = htmlRaw "\n" 159 160instance Rangeable (Html ()) where 161 ranged _ x = x 162 163instance Rangeable (Html SourceRange) where 164 ranged sr x = addAttribute ("data-sourcepos", T.pack (show sr)) x 165 166 167 168htmlInline :: Text -> Maybe (Html a) -> Html a 169htmlInline tagname mbcontents = HtmlElement InlineElement tagname [] mbcontents 170 171htmlBlock :: Text -> Maybe (Html a) -> Html a 172htmlBlock tagname mbcontents = HtmlElement BlockElement tagname [] mbcontents 173 174htmlText :: Text -> Html a 175htmlText = HtmlText 176 177htmlRaw :: Text -> Html a 178htmlRaw = HtmlRaw 179 180addAttribute :: Attribute -> Html a -> Html a 181addAttribute attr (HtmlElement eltType tagname attrs mbcontents) = 182 HtmlElement eltType tagname (incorporateAttribute attr attrs) mbcontents 183addAttribute attr (HtmlText t) 184 = HtmlElement InlineElement "span" [attr] $ Just (HtmlText t) 185addAttribute _ elt = elt 186 187incorporateAttribute :: Attribute -> [Attribute] -> [Attribute] 188incorporateAttribute (k, v) as = 189 case lookup k as of 190 Nothing -> (k, v) : as 191 Just v' -> (if k == "class" 192 then ("class", v <> " " <> v') 193 else (k, v')) : 194 filter (\(x, _) -> x /= k) as 195 196renderHtml :: Html a -> TL.Text 197renderHtml = {-# SCC renderHtml #-} toLazyText . toBuilder 198 199toBuilder :: Html a -> Builder 200toBuilder (HtmlNull) = mempty 201toBuilder (HtmlConcat x y) = toBuilder x <> toBuilder y 202toBuilder (HtmlRaw t) = fromText t 203toBuilder (HtmlText t) = escapeHtml t 204toBuilder (HtmlElement eltType tagname attrs mbcontents) = 205 "<" <> fromText tagname <> mconcat (map toAttr attrs) <> filling <> nl' 206 where 207 toAttr (x,y) = " " <> fromText x <> "=\"" <> escapeHtml y <> "\"" 208 nl' = case eltType of 209 BlockElement -> "\n" 210 _ -> mempty 211 filling = case mbcontents of 212 Nothing -> " />" 213 Just cont -> ">" <> toBuilder cont <> "</" <> 214 fromText tagname <> ">" 215 216escapeHtml :: Text -> Builder 217escapeHtml t = 218 case T.uncons post of 219 Just (c, rest) -> fromText pre <> escapeHtmlChar c <> escapeHtml rest 220 Nothing -> fromText pre 221 where 222 (pre,post) = T.break needsEscaping t 223 needsEscaping '<' = True 224 needsEscaping '>' = True 225 needsEscaping '&' = True 226 needsEscaping '"' = True 227 needsEscaping _ = False 228 229escapeHtmlChar :: Char -> Builder 230escapeHtmlChar '<' = "<" 231escapeHtmlChar '>' = ">" 232escapeHtmlChar '&' = "&" 233escapeHtmlChar '"' = """ 234escapeHtmlChar c = singleton c 235 236escapeURI :: Text -> Text 237escapeURI = mconcat . map escapeURIChar . B.unpack . encodeUtf8 238 239escapeURIChar :: Char -> Text 240escapeURIChar c 241 | isEscapable c = T.singleton '%' <> T.pack (printf "%02X" (ord c)) 242 | otherwise = T.singleton c 243 where isEscapable d = not (isAscii d && isAlphaNum d) 244 && d `notElem` ['%','/','?',':','@','-','.','_','~','&', 245 '#','!','$','\'','(',')','*','+',',', 246 ';','='] 247 248