1{-# LANGUAGE CPP                        #-}
2{-# LANGUAGE ExtendedDefaultRules       #-}
3{-# LANGUAGE FlexibleContexts           #-}
4{-# LANGUAGE FlexibleInstances          #-}
5{-# LANGUAGE GeneralizedNewtypeDeriving #-}
6{-# LANGUAGE MultiParamTypeClasses      #-}
7{-# LANGUAGE OverloadedStrings          #-}
8{-# LANGUAGE UndecidableInstances       #-}
9
10module Commonmark.Pandoc
11  ( Cm(..)
12  )
13
14where
15
16import Data.Maybe (fromMaybe)
17import qualified Data.Text as T
18import qualified Data.Text.Read as TR
19import Text.Pandoc.Definition
20import Text.Pandoc.Walk
21import qualified Text.Pandoc.Builder as B
22import Commonmark.Types as C
23import Commonmark.Entity (lookupEntity)
24import Commonmark.Extensions.Math
25import Commonmark.Extensions.Emoji
26import Commonmark.Extensions.Wikilinks
27import Commonmark.Extensions.PipeTable
28import Commonmark.Extensions.Strikethrough
29import Commonmark.Extensions.Superscript
30import Commonmark.Extensions.Subscript
31import Commonmark.Extensions.DefinitionList
32import Commonmark.Extensions.Attributes
33import Commonmark.Extensions.Footnote
34import Commonmark.Extensions.TaskList
35import Commonmark.Extensions.Smart
36import Data.Char (isSpace)
37import Data.Coerce (coerce)
38#if !MIN_VERSION_base(4,11,0)
39import Data.Semigroup       (Semigroup, (<>))
40#endif
41
42newtype Cm b a = Cm { unCm :: a }
43  deriving (Show, Semigroup, Monoid)
44
45instance Functor (Cm b) where
46  fmap f (Cm x) = Cm (f x)
47
48instance Rangeable (Cm b B.Inlines) => IsInline (Cm b B.Inlines) where
49  lineBreak = Cm B.linebreak
50  softBreak = Cm B.softbreak
51  str t = Cm $ B.text t
52  entity t
53    | illegalCodePoint t = Cm $ B.str "\xFFFD"
54    | otherwise = Cm $ B.str $ fromMaybe t $ lookupEntity (T.drop 1 t)
55  escapedChar c = Cm $ B.str $ T.singleton c
56  emph ils = B.emph <$> ils
57  strong ils = B.strong <$> ils
58  link target title ils = B.link target title <$> ils
59  image target title ils = B.image target title <$> ils
60  code t = Cm $ B.code t
61  rawInline (C.Format f) t = Cm $ B.rawInline f t
62
63instance Rangeable (Cm () B.Inlines) where
64  ranged _r x = x
65
66instance Rangeable (Cm SourceRange B.Inlines) where
67  ranged r = addAttributes [("data-pos", T.pack (show r))]
68
69instance Walkable Inline b => ToPlainText (Cm a b) where
70  toPlainText = stringify . walk unemoji . unCm
71
72unemoji :: Inline -> Inline
73unemoji (Span ("",["emoji"],[("data-emoji",alias)]) _)
74          = Str (":" <> alias <> ":")
75unemoji x = x
76
77instance (Rangeable (Cm a B.Inlines),
78          Rangeable (Cm a B.Blocks))
79      => IsBlock (Cm a B.Inlines) (Cm a B.Blocks) where
80  paragraph ils = Cm $ B.para $ unCm ils
81  plain ils = Cm $ B.plain $ unCm ils
82  thematicBreak = Cm B.horizontalRule
83  blockQuote bs = B.blockQuote <$> bs
84  codeBlock info t =
85    Cm $ B.codeBlockWith attr $ fromMaybe t $ T.stripSuffix "\n" t
86    where attr = ("", [lang | not (T.null lang)], [])
87          lang = T.takeWhile (not . isSpace) info
88  heading level ils = Cm $ B.header level $ unCm ils
89  rawBlock (C.Format f) t = Cm $ B.rawBlock f t
90  referenceLinkDefinition _ _ = Cm mempty
91  list (C.BulletList _) lSpacing items =
92    Cm . B.bulletList . handleSpacing lSpacing . map unCm $ items
93  list (C.OrderedList startnum enumtype delimtype) lSpacing items =
94    Cm . B.orderedListWith attr . handleSpacing lSpacing . map unCm $ items
95    where sty = case enumtype of
96                  C.Decimal    -> B.Decimal
97                  C.UpperAlpha -> B.UpperAlpha
98                  C.LowerAlpha -> B.LowerAlpha
99                  C.UpperRoman -> B.UpperRoman
100                  C.LowerRoman -> B.LowerRoman
101          delim = case delimtype of
102                    C.Period    -> B.Period
103                    C.OneParen  -> B.OneParen
104                    C.TwoParens -> B.TwoParens
105          attr = (startnum, sty, delim)
106
107instance Rangeable (Cm () B.Blocks) where
108  ranged _r x = x
109
110instance Rangeable (Cm SourceRange B.Blocks) where
111  ranged r x = B.divWith ("",[],[("data-pos",T.pack (show r))]) <$> x
112
113instance HasMath (Cm b B.Inlines) where
114  inlineMath t = Cm $ B.math t
115  displayMath t = Cm $ B.displayMath t
116
117instance Rangeable (Cm b B.Inlines) => HasQuoted (Cm b B.Inlines) where
118  singleQuoted x = B.singleQuoted <$> x
119  doubleQuoted x = B.doubleQuoted <$> x
120
121instance HasEmoji (Cm b B.Inlines) where
122  emoji kw t = Cm $ B.spanWith ("",["emoji"],[("data-emoji",kw)])
123                  $ B.text t
124
125instance HasWikilinks (Cm b B.Inlines) where
126  wikilink t il = Cm $ B.link t "wikilink" $ unCm il
127
128instance HasPipeTable (Cm a B.Inlines) (Cm a B.Blocks) where
129  pipeTable aligns headerCells rows =
130    Cm $ B.table B.emptyCaption colspecs
131           (TableHead nullAttr (toHeaderRow headerCells))
132           [TableBody nullAttr 0 [] $ map toRow rows]
133           (TableFoot nullAttr [])
134    where
135     toHeaderRow cells
136       | null cells  = []
137       | otherwise   = [toRow cells]
138     toRow = Row nullAttr . map (B.simpleCell . B.plain . unCm)
139     toPandocAlignment LeftAlignedCol = AlignLeft
140     toPandocAlignment CenterAlignedCol = AlignCenter
141     toPandocAlignment RightAlignedCol = AlignRight
142     toPandocAlignment DefaultAlignedCol = AlignDefault
143     colspecs = map (\al -> (toPandocAlignment al, ColWidthDefault))
144                 aligns
145
146instance (Rangeable (Cm a B.Inlines), Rangeable (Cm a B.Blocks))
147  => HasDefinitionList (Cm a B.Inlines) (Cm a B.Blocks) where
148  definitionList _ items =
149    Cm $ B.definitionList $ map coerce items
150
151instance (Rangeable (Cm a B.Inlines), Rangeable (Cm a B.Blocks))
152  => HasTaskList (Cm a B.Inlines) (Cm a B.Blocks) where
153  taskList _ spacing items =
154    Cm $ B.bulletList $ handleSpacing spacing $ map toTaskListItem items
155
156handleSpacing :: ListSpacing -> [B.Blocks] -> [B.Blocks]
157handleSpacing TightList = map (B.fromList . map paraToPlain . B.toList)
158handleSpacing LooseList = id
159
160paraToPlain :: Block -> Block
161paraToPlain (Para xs) = Plain xs
162paraToPlain x = x
163
164toTaskListItem :: (Bool, Cm a B.Blocks) -> B.Blocks
165toTaskListItem (checked, item) = B.fromList $
166  case B.toList $ coerce item of
167    (Plain ils : rest) -> Plain (checkbox : Space : ils) : rest
168    (Para  ils : rest) -> Para  (checkbox : Space : ils) : rest
169    bs                 -> Plain [checkbox] : bs
170    where checkbox = Str (if checked then "\9746" else "\9744")
171
172instance Rangeable (Cm a B.Blocks)
173  => HasDiv (Cm a B.Blocks) where
174  div_ bs = B.divWith nullAttr <$> bs
175
176instance HasStrikethrough (Cm a B.Inlines) where
177  strikethrough ils = B.strikeout <$> ils
178
179instance HasSuperscript (Cm a B.Inlines) where
180  superscript ils = B.superscript <$> ils
181
182instance HasSubscript (Cm a B.Inlines) where
183  subscript ils = B.subscript <$> ils
184
185instance Rangeable (Cm a B.Inlines) => HasSpan (Cm a B.Inlines) where
186  spanWith attrs ils =
187    B.spanWith (addToPandocAttr attrs nullAttr) <$> ils
188
189instance HasAttributes (Cm a B.Blocks) where
190  addAttributes attrs b = fmap (addBlockAttrs attrs) <$> b
191
192instance HasAttributes (Cm a B.Inlines) where
193  addAttributes attrs il = fmap (addInlineAttrs attrs) <$> il
194
195addBlockAttrs :: [(T.Text, T.Text)] -> Block -> Block
196addBlockAttrs attrs (Header n curattrs ils) =
197  Header n (addToPandocAttr attrs curattrs) ils
198addBlockAttrs attrs (CodeBlock curattrs s) =
199  CodeBlock (addToPandocAttr attrs curattrs) s
200addBlockAttrs attrs (Table curattrs capt colspecs thead tbody tfoot) =
201  Table (addToPandocAttr attrs curattrs) capt colspecs thead tbody tfoot
202addBlockAttrs attrs (Div curattrs bs) =
203  Div (addToPandocAttr attrs curattrs) bs
204addBlockAttrs attrs x =
205  Div (addToPandocAttr attrs nullAttr) [x]
206
207addInlineAttrs :: [(T.Text, T.Text)] -> Inline -> Inline
208addInlineAttrs attrs (Link curattrs ils target) =
209  Link (addToPandocAttr attrs curattrs) ils target
210addInlineAttrs attrs (Image curattrs ils target) =
211  Image (addToPandocAttr attrs curattrs) ils target
212addInlineAttrs attrs (Span curattrs ils) =
213  Span (addToPandocAttr attrs curattrs) ils
214addInlineAttrs attrs (Code curattrs s) =
215  Code (addToPandocAttr attrs curattrs) s
216addInlineAttrs attrs x =
217  Span (addToPandocAttr attrs nullAttr) [x]
218
219addToPandocAttr :: Attributes -> Attr -> Attr
220addToPandocAttr attrs attr = foldr go attr attrs
221 where
222  go ("id", v) (_, cls, kvs) = (v, cls, kvs)
223  go ("class", v) (ident, cls, kvs) = (ident, v:cls, kvs)
224  go (k, v) (ident, cls, kvs) = (ident, cls, (k,v):kvs)
225
226instance (Rangeable (Cm a B.Inlines), Rangeable (Cm a B.Blocks))
227     => HasFootnote (Cm a B.Inlines) (Cm a B.Blocks) where
228  footnote _num _lab _x = mempty
229  footnoteList _xs = mempty
230  footnoteRef _num _lab contents = B.note <$> contents
231
232illegalCodePoint :: T.Text -> Bool
233illegalCodePoint t =
234  "&#" `T.isPrefixOf` t &&
235  let t' = T.drop 2 $ T.filter (/=';') t
236      badvalue (n, r) = not (T.null r) ||
237                        n < 1 ||
238                        n > (0x10FFFF :: Integer)
239  in
240  case T.uncons t' of
241       Nothing -> True
242       Just (x, rest)
243         | x == 'x' || x == 'X'
244           -> either (const True) badvalue (TR.hexadecimal rest)
245         | otherwise
246           -> either (const True) badvalue (TR.decimal t')
247
248stringify :: Walkable Inline a => a -> T.Text
249stringify = query go . walk (deNote . deQuote)
250  where go :: Inline -> T.Text
251        go Space                                         = " "
252        go SoftBreak                                     = " "
253        go (Str x)                                       = x
254        go (Code _ x)                                    = x
255        go (Math _ x)                                    = x
256        go (RawInline (B.Format "html") t)
257           | "<br" `T.isPrefixOf` t                      = " "
258        go LineBreak                                     = " "
259        go _                                             = mempty
260
261deNote :: Inline -> Inline
262deNote (Note _) = Str ""
263deNote x        = x
264
265deQuote :: Inline -> Inline
266deQuote (Quoted SingleQuote xs) =
267  Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
268deQuote (Quoted DoubleQuote xs) =
269  Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
270deQuote x = x
271