1{-# LANGUAGE CPP #-} 2{-# LANGUAGE ExtendedDefaultRules #-} 3{-# LANGUAGE FlexibleContexts #-} 4{-# LANGUAGE FlexibleInstances #-} 5{-# LANGUAGE GeneralizedNewtypeDeriving #-} 6{-# LANGUAGE MultiParamTypeClasses #-} 7{-# LANGUAGE OverloadedStrings #-} 8{-# LANGUAGE UndecidableInstances #-} 9 10module Commonmark.Pandoc 11 ( Cm(..) 12 ) 13 14where 15 16import Data.Maybe (fromMaybe) 17import qualified Data.Text as T 18import qualified Data.Text.Read as TR 19import Text.Pandoc.Definition 20import Text.Pandoc.Walk 21import qualified Text.Pandoc.Builder as B 22import Commonmark.Types as C 23import Commonmark.Entity (lookupEntity) 24import Commonmark.Extensions.Math 25import Commonmark.Extensions.Emoji 26import Commonmark.Extensions.Wikilinks 27import Commonmark.Extensions.PipeTable 28import Commonmark.Extensions.Strikethrough 29import Commonmark.Extensions.Superscript 30import Commonmark.Extensions.Subscript 31import Commonmark.Extensions.DefinitionList 32import Commonmark.Extensions.Attributes 33import Commonmark.Extensions.Footnote 34import Commonmark.Extensions.TaskList 35import Commonmark.Extensions.Smart 36import Data.Char (isSpace) 37import Data.Coerce (coerce) 38#if !MIN_VERSION_base(4,11,0) 39import Data.Semigroup (Semigroup, (<>)) 40#endif 41 42newtype Cm b a = Cm { unCm :: a } 43 deriving (Show, Semigroup, Monoid) 44 45instance Functor (Cm b) where 46 fmap f (Cm x) = Cm (f x) 47 48instance Rangeable (Cm b B.Inlines) => IsInline (Cm b B.Inlines) where 49 lineBreak = Cm B.linebreak 50 softBreak = Cm B.softbreak 51 str t = Cm $ B.text t 52 entity t 53 | illegalCodePoint t = Cm $ B.str "\xFFFD" 54 | otherwise = Cm $ B.str $ fromMaybe t $ lookupEntity (T.drop 1 t) 55 escapedChar c = Cm $ B.str $ T.singleton c 56 emph ils = B.emph <$> ils 57 strong ils = B.strong <$> ils 58 link target title ils = B.link target title <$> ils 59 image target title ils = B.image target title <$> ils 60 code t = Cm $ B.code t 61 rawInline (C.Format f) t = Cm $ B.rawInline f t 62 63instance Rangeable (Cm () B.Inlines) where 64 ranged _r x = x 65 66instance Rangeable (Cm SourceRange B.Inlines) where 67 ranged r = addAttributes [("data-pos", T.pack (show r))] 68 69instance Walkable Inline b => ToPlainText (Cm a b) where 70 toPlainText = stringify . walk unemoji . unCm 71 72unemoji :: Inline -> Inline 73unemoji (Span ("",["emoji"],[("data-emoji",alias)]) _) 74 = Str (":" <> alias <> ":") 75unemoji x = x 76 77instance (Rangeable (Cm a B.Inlines), 78 Rangeable (Cm a B.Blocks)) 79 => IsBlock (Cm a B.Inlines) (Cm a B.Blocks) where 80 paragraph ils = Cm $ B.para $ unCm ils 81 plain ils = Cm $ B.plain $ unCm ils 82 thematicBreak = Cm B.horizontalRule 83 blockQuote bs = B.blockQuote <$> bs 84 codeBlock info t = 85 Cm $ B.codeBlockWith attr $ fromMaybe t $ T.stripSuffix "\n" t 86 where attr = ("", [lang | not (T.null lang)], []) 87 lang = T.takeWhile (not . isSpace) info 88 heading level ils = Cm $ B.header level $ unCm ils 89 rawBlock (C.Format f) t = Cm $ B.rawBlock f t 90 referenceLinkDefinition _ _ = Cm mempty 91 list (C.BulletList _) lSpacing items = 92 Cm . B.bulletList . handleSpacing lSpacing . map unCm $ items 93 list (C.OrderedList startnum enumtype delimtype) lSpacing items = 94 Cm . B.orderedListWith attr . handleSpacing lSpacing . map unCm $ items 95 where sty = case enumtype of 96 C.Decimal -> B.Decimal 97 C.UpperAlpha -> B.UpperAlpha 98 C.LowerAlpha -> B.LowerAlpha 99 C.UpperRoman -> B.UpperRoman 100 C.LowerRoman -> B.LowerRoman 101 delim = case delimtype of 102 C.Period -> B.Period 103 C.OneParen -> B.OneParen 104 C.TwoParens -> B.TwoParens 105 attr = (startnum, sty, delim) 106 107instance Rangeable (Cm () B.Blocks) where 108 ranged _r x = x 109 110instance Rangeable (Cm SourceRange B.Blocks) where 111 ranged r x = B.divWith ("",[],[("data-pos",T.pack (show r))]) <$> x 112 113instance HasMath (Cm b B.Inlines) where 114 inlineMath t = Cm $ B.math t 115 displayMath t = Cm $ B.displayMath t 116 117instance Rangeable (Cm b B.Inlines) => HasQuoted (Cm b B.Inlines) where 118 singleQuoted x = B.singleQuoted <$> x 119 doubleQuoted x = B.doubleQuoted <$> x 120 121instance HasEmoji (Cm b B.Inlines) where 122 emoji kw t = Cm $ B.spanWith ("",["emoji"],[("data-emoji",kw)]) 123 $ B.text t 124 125instance HasWikilinks (Cm b B.Inlines) where 126 wikilink t il = Cm $ B.link t "wikilink" $ unCm il 127 128instance HasPipeTable (Cm a B.Inlines) (Cm a B.Blocks) where 129 pipeTable aligns headerCells rows = 130 Cm $ B.table B.emptyCaption colspecs 131 (TableHead nullAttr (toHeaderRow headerCells)) 132 [TableBody nullAttr 0 [] $ map toRow rows] 133 (TableFoot nullAttr []) 134 where 135 toHeaderRow cells 136 | null cells = [] 137 | otherwise = [toRow cells] 138 toRow = Row nullAttr . map (B.simpleCell . B.plain . unCm) 139 toPandocAlignment LeftAlignedCol = AlignLeft 140 toPandocAlignment CenterAlignedCol = AlignCenter 141 toPandocAlignment RightAlignedCol = AlignRight 142 toPandocAlignment DefaultAlignedCol = AlignDefault 143 colspecs = map (\al -> (toPandocAlignment al, ColWidthDefault)) 144 aligns 145 146instance (Rangeable (Cm a B.Inlines), Rangeable (Cm a B.Blocks)) 147 => HasDefinitionList (Cm a B.Inlines) (Cm a B.Blocks) where 148 definitionList _ items = 149 Cm $ B.definitionList $ map coerce items 150 151instance (Rangeable (Cm a B.Inlines), Rangeable (Cm a B.Blocks)) 152 => HasTaskList (Cm a B.Inlines) (Cm a B.Blocks) where 153 taskList _ spacing items = 154 Cm $ B.bulletList $ handleSpacing spacing $ map toTaskListItem items 155 156handleSpacing :: ListSpacing -> [B.Blocks] -> [B.Blocks] 157handleSpacing TightList = map (B.fromList . map paraToPlain . B.toList) 158handleSpacing LooseList = id 159 160paraToPlain :: Block -> Block 161paraToPlain (Para xs) = Plain xs 162paraToPlain x = x 163 164toTaskListItem :: (Bool, Cm a B.Blocks) -> B.Blocks 165toTaskListItem (checked, item) = B.fromList $ 166 case B.toList $ coerce item of 167 (Plain ils : rest) -> Plain (checkbox : Space : ils) : rest 168 (Para ils : rest) -> Para (checkbox : Space : ils) : rest 169 bs -> Plain [checkbox] : bs 170 where checkbox = Str (if checked then "\9746" else "\9744") 171 172instance Rangeable (Cm a B.Blocks) 173 => HasDiv (Cm a B.Blocks) where 174 div_ bs = B.divWith nullAttr <$> bs 175 176instance HasStrikethrough (Cm a B.Inlines) where 177 strikethrough ils = B.strikeout <$> ils 178 179instance HasSuperscript (Cm a B.Inlines) where 180 superscript ils = B.superscript <$> ils 181 182instance HasSubscript (Cm a B.Inlines) where 183 subscript ils = B.subscript <$> ils 184 185instance Rangeable (Cm a B.Inlines) => HasSpan (Cm a B.Inlines) where 186 spanWith attrs ils = 187 B.spanWith (addToPandocAttr attrs nullAttr) <$> ils 188 189instance HasAttributes (Cm a B.Blocks) where 190 addAttributes attrs b = fmap (addBlockAttrs attrs) <$> b 191 192instance HasAttributes (Cm a B.Inlines) where 193 addAttributes attrs il = fmap (addInlineAttrs attrs) <$> il 194 195addBlockAttrs :: [(T.Text, T.Text)] -> Block -> Block 196addBlockAttrs attrs (Header n curattrs ils) = 197 Header n (addToPandocAttr attrs curattrs) ils 198addBlockAttrs attrs (CodeBlock curattrs s) = 199 CodeBlock (addToPandocAttr attrs curattrs) s 200addBlockAttrs attrs (Table curattrs capt colspecs thead tbody tfoot) = 201 Table (addToPandocAttr attrs curattrs) capt colspecs thead tbody tfoot 202addBlockAttrs attrs (Div curattrs bs) = 203 Div (addToPandocAttr attrs curattrs) bs 204addBlockAttrs attrs x = 205 Div (addToPandocAttr attrs nullAttr) [x] 206 207addInlineAttrs :: [(T.Text, T.Text)] -> Inline -> Inline 208addInlineAttrs attrs (Link curattrs ils target) = 209 Link (addToPandocAttr attrs curattrs) ils target 210addInlineAttrs attrs (Image curattrs ils target) = 211 Image (addToPandocAttr attrs curattrs) ils target 212addInlineAttrs attrs (Span curattrs ils) = 213 Span (addToPandocAttr attrs curattrs) ils 214addInlineAttrs attrs (Code curattrs s) = 215 Code (addToPandocAttr attrs curattrs) s 216addInlineAttrs attrs x = 217 Span (addToPandocAttr attrs nullAttr) [x] 218 219addToPandocAttr :: Attributes -> Attr -> Attr 220addToPandocAttr attrs attr = foldr go attr attrs 221 where 222 go ("id", v) (_, cls, kvs) = (v, cls, kvs) 223 go ("class", v) (ident, cls, kvs) = (ident, v:cls, kvs) 224 go (k, v) (ident, cls, kvs) = (ident, cls, (k,v):kvs) 225 226instance (Rangeable (Cm a B.Inlines), Rangeable (Cm a B.Blocks)) 227 => HasFootnote (Cm a B.Inlines) (Cm a B.Blocks) where 228 footnote _num _lab _x = mempty 229 footnoteList _xs = mempty 230 footnoteRef _num _lab contents = B.note <$> contents 231 232illegalCodePoint :: T.Text -> Bool 233illegalCodePoint t = 234 "&#" `T.isPrefixOf` t && 235 let t' = T.drop 2 $ T.filter (/=';') t 236 badvalue (n, r) = not (T.null r) || 237 n < 1 || 238 n > (0x10FFFF :: Integer) 239 in 240 case T.uncons t' of 241 Nothing -> True 242 Just (x, rest) 243 | x == 'x' || x == 'X' 244 -> either (const True) badvalue (TR.hexadecimal rest) 245 | otherwise 246 -> either (const True) badvalue (TR.decimal t') 247 248stringify :: Walkable Inline a => a -> T.Text 249stringify = query go . walk (deNote . deQuote) 250 where go :: Inline -> T.Text 251 go Space = " " 252 go SoftBreak = " " 253 go (Str x) = x 254 go (Code _ x) = x 255 go (Math _ x) = x 256 go (RawInline (B.Format "html") t) 257 | "<br" `T.isPrefixOf` t = " " 258 go LineBreak = " " 259 go _ = mempty 260 261deNote :: Inline -> Inline 262deNote (Note _) = Str "" 263deNote x = x 264 265deQuote :: Inline -> Inline 266deQuote (Quoted SingleQuote xs) = 267 Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"]) 268deQuote (Quoted DoubleQuote xs) = 269 Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"]) 270deQuote x = x 271