1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE ViewPatterns #-}
3{-# LANGUAGE FlexibleInstances #-}
4{-# LANGUAGE FlexibleContexts #-}
5{-# OPTIONS_GHC -fno-warn-orphans #-}
6-- | This module defines a 'CiteprocOutput' instance for pandoc 'Inlines'.
7module Citeproc.Pandoc
8  ()
9where
10import Text.Pandoc.Definition
11import Text.Pandoc.Builder as B
12import Text.Pandoc.Walk
13import qualified Data.Text as T
14import qualified Data.Sequence as Seq
15import Data.Text (Text)
16import Citeproc.Types
17import Citeproc.CaseTransform
18import Control.Monad.Trans.State.Strict as S
19import Control.Monad (unless, when)
20import Data.Functor.Reverse
21import Data.Char (isSpace, isPunctuation, isAlphaNum)
22
23instance CiteprocOutput Inlines where
24  toText                = stringify
25  fromText t            = (if " " `T.isPrefixOf` t
26                              then B.space
27                              else mempty) <>
28                          B.text t <> -- B.text eats leading/trailing spaces
29                          (if " " `T.isSuffixOf` t
30                              then B.space
31                              else mempty)
32  dropTextWhile         = dropTextWhile'
33  dropTextWhileEnd      = dropTextWhileEnd'
34  addFontVariant x      =
35    case x of
36      NormalVariant    -> id
37      SmallCapsVariant -> B.smallcaps
38  addFontStyle x        =
39    case x of
40      NormalFont       -> id
41      ItalicFont       -> B.emph
42      ObliqueFont      -> B.emph
43  addFontWeight x       =
44    case x of
45      NormalWeight     -> id
46      LightWeight      -> id
47      BoldWeight       -> B.strong
48  addTextDecoration x   =
49    case x of
50      NoDecoration        -> B.spanWith ("",["nodecoration"],[])
51      UnderlineDecoration -> B.underline
52  addVerticalAlign x    =
53    case x of
54      BaselineAlign    -> id
55      SubAlign         -> B.subscript
56      SupAlign         -> B.superscript
57  addTextCase mblang x =
58    case x of
59      Lowercase        -> caseTransform mblang withLowercaseAll
60      Uppercase        -> caseTransform mblang withUppercaseAll
61      CapitalizeFirst  -> caseTransform mblang withCapitalizeFirst
62      CapitalizeAll    -> caseTransform mblang withCapitalizeWords
63      SentenceCase     -> caseTransform mblang withSentenceCase
64      TitleCase        -> caseTransform mblang withTitleCase
65  addDisplay x          =
66    case x of
67      DisplayBlock       -> B.spanWith ("",["csl-block"],[])
68      DisplayLeftMargin  -> B.spanWith ("",["csl-left-margin"],[])
69      DisplayRightInline -> B.spanWith ("",["csl-right-inline"],[])
70      DisplayIndent      -> B.spanWith ("",["csl-indent"],[])
71  addQuotes             = B.doubleQuoted . flipFlopQuotes DoubleQuote
72  inNote                = B.note . B.para
73  movePunctuationInsideQuotes
74                        = punctuationInsideQuotes
75  mapText f             = walk go
76    where go (Str t) = Str (f t)
77          go x       = x
78  addHyperlink t        = B.link t ""
79
80flipFlopQuotes :: QuoteType -> Inlines -> Inlines
81flipFlopQuotes qt = B.fromList . map (go qt) . B.toList
82 where
83  go :: QuoteType -> Inline -> Inline
84  go q (Quoted _ zs) =
85    let q' = case q of
86               SingleQuote -> DoubleQuote
87               DoubleQuote -> SingleQuote
88    in  Quoted q' (map (go q') zs)
89  go q (SmallCaps zs) = SmallCaps (map (go q) zs)
90  go q (Superscript zs) = Superscript (map (go q) zs)
91  go q (Subscript zs) = Subscript (map (go q) zs)
92  go q (Span attr zs) = Span attr (map (go q) zs)
93  go q (Emph zs) = Emph (map (go q) zs)
94  go q (Underline zs) = Underline (map (go q) zs)
95  go q (Strong zs) = Strong (map (go q) zs)
96  go q (Strikeout zs) = Strikeout (map (go q) zs)
97  go q (Cite cs zs) = Cite cs (map (go q) zs)
98  go q (Link attr zs t) = Link attr (map (go q) zs) t
99  go q (Image attr zs t) = Image attr (map (go q) zs) t
100  go _ x = x
101
102punctuationInsideQuotes :: Inlines -> Inlines
103punctuationInsideQuotes = B.fromList . go . walk go . B.toList
104 where
105  startsWithMovable t =
106    case T.uncons t of
107      Just (c,_) -> c == '.' || c == ',' || c == '!' || c == '?'
108      Nothing    -> False
109  go [] = []
110  go (Quoted qt xs : Str t : rest)
111    | startsWithMovable t
112      = Quoted qt (xs ++ [Str (T.take 1 t) | not (endWithPunct True xs)]) :
113        if T.length t == 1
114           then go rest
115           else Str (T.drop 1 t) : go rest
116  go (x:xs) = x : go xs
117
118endWithPunct :: Bool -> [Inline] -> Bool
119endWithPunct _ [] = False
120endWithPunct onlyFinal xs@(_:_) =
121  case reverse (T.unpack $ stringify xs) of
122       []                       -> True
123       -- covers .), .", etc.:
124       (d:c:_) | isPunctuation d
125                 && not onlyFinal
126                 && isEndPunct c -> True
127       (c:_) | isEndPunct c      -> True
128             | otherwise         -> False
129  where isEndPunct c = c `elem` (".,;:!?" :: String)
130
131dropTextWhile' :: (Char -> Bool) -> Inlines -> Inlines
132dropTextWhile' f ils = evalState (walkM go ils) True
133 where
134  go x = do
135    atStart <- get
136    if atStart
137       then
138         case x of
139           Str t -> do
140             let t' = T.dropWhile f t
141             unless (T.null t') $
142               put False
143             return $ Str t'
144           Space ->
145             if f ' '
146                then return $ Str ""
147                else do
148                  put False
149                  return Space
150           _ -> return x
151       else return x
152
153
154dropTextWhileEnd' :: (Char -> Bool) -> Inlines -> Inlines
155dropTextWhileEnd' f ils =
156  getReverse $ evalState (walkM go $ Reverse ils) True
157 where
158  go x = do
159    atEnd <- get
160    if atEnd
161       then
162         case x of
163           Str t -> do
164             let t' = T.dropWhileEnd f t
165             unless (T.null t') $
166               put False
167             return $ Str t'
168           Space | f ' ' -> return $ Str ""
169           _ -> return x
170       else return x
171
172-- taken from Text.Pandoc.Shared:
173
174-- | Convert pandoc structure to a string with formatting removed.
175-- Footnotes are skipped (since we don't want their contents in link
176-- labels).
177stringify :: Walkable Inline a => a -> T.Text
178stringify = query go . walk (unNote . unQuote)
179 where
180  go :: Inline -> T.Text
181  go Space                                       = " "
182  go SoftBreak                                   = " "
183  go (Str x)                                     = x
184  go (Code _ x)                                  = x
185  go (Math _ x)                                  = x
186  go (RawInline (Format "html") (T.unpack -> ('<':'b':'r':_)))
187                                                 = " " -- see #2105
188  go LineBreak                                   = " "
189  go _                                           = ""
190
191  unNote :: Inline -> Inline
192  unNote (Note _) = Str ""
193  unNote x        = x
194
195  unQuote :: Inline -> Inline
196  unQuote (Quoted SingleQuote xs) =
197    Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
198  unQuote (Quoted DoubleQuote xs) =
199    Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
200  unQuote x = x
201
202
203caseTransform :: Maybe Lang
204              -> CaseTransformer
205              -> Inlines
206              -> Inlines
207caseTransform mblang f x =
208  evalState (caseTransform' (unCaseTransformer f mblang) x) Start
209
210
211-- custom traversal which does not descend into
212-- SmallCaps, Superscript, Subscript, Span "nocase" (implicit nocase)
213caseTransform' :: (CaseTransformState -> Text -> Text)
214               -> Inlines
215               -> State CaseTransformState Inlines
216caseTransform' f ils =
217  case Seq.viewr (unMany ils) of
218    xs Seq.:> Str t | not (Seq.null xs)
219                    , not (hasWordBreak t) -> do
220        xs' <- mapM go xs
221        st <- get
222        when (st == AfterWordEnd || st == StartSentence || st == Start) $
223          put BeforeLastWord
224        x' <- go (Str t)
225        return $ Many $ xs' Seq.|> x'
226    _ -> mapM go ils
227 where
228  go (Str t) = Str . mconcat <$> mapM g (splitUp t)
229  go Space = Space <$ g " "
230  go (SmallCaps zs) = return' $ SmallCaps zs
231  go (Superscript zs) = return' $ Superscript zs
232  go (Subscript zs) = return' $ Subscript zs
233  go (Span attr@(_,classes,_) zs)
234      | "nocase" `elem` classes = do
235            st <- get
236            case st of
237              AfterWordChar | classes == ["nocase"]
238                   -- we need to apply g to update the state:
239                -> return' $ Span nullAttr zs
240              _ -> return' $ Span attr zs
241      | otherwise = Span attr <$> mapM go zs
242  go (Emph zs) = Emph <$> mapM go zs
243  go (Underline zs) = Underline <$> mapM go zs
244  go (Strong zs) = Strong <$> mapM go zs
245  go (Strikeout zs) = Strikeout <$> mapM go zs
246  go (Quoted qt zs) = Quoted qt <$> mapM go zs
247  go (Cite cs zs) = Cite cs <$> mapM go zs
248  go (Link attr zs t) = (\x -> Link attr x t) <$> mapM go zs
249  go (Image attr zs t) = (\x -> Image attr x t) <$> mapM go zs
250  go x = return x
251
252  -- we need to apply g to update the state:
253  return' x = x <$ g (query fromStr x)
254
255  fromStr (Str t) = t
256  fromStr _ = mempty
257
258  g :: Text -> State CaseTransformState Text
259  g t = do
260    st <- get
261    put $ case T.unsnoc t of
262            Nothing -> st
263            Just (_,c)
264              | c == '.' || c == '?' || c == '!' || c == ':' ->
265                AfterSentenceEndingPunctuation
266              | isAlphaNum c -> AfterWordChar
267              | isSpace c
268              , st == AfterSentenceEndingPunctuation -> StartSentence
269              | isWordBreak c -> AfterWordEnd
270              | otherwise -> st
271    return $
272      if T.all isAlphaNum t
273         then f st t
274         else t
275  isWordBreak '-' = True
276  isWordBreak '/' = True
277  isWordBreak '\x2013' = True
278  isWordBreak '\x2014' = True
279  isWordBreak c = isSpace c
280  hasWordBreak = T.any isWordBreak
281  splitUp = T.groupBy sameType
282  sameType c d =
283    (isAlphaNum c && isAlphaNum d) || (isSpace c && isSpace d)
284