1{-# LANGUAGE OverloadedStrings #-} 2{-# LANGUAGE ViewPatterns #-} 3{-# LANGUAGE FlexibleInstances #-} 4{-# LANGUAGE FlexibleContexts #-} 5{-# OPTIONS_GHC -fno-warn-orphans #-} 6-- | This module defines a 'CiteprocOutput' instance for pandoc 'Inlines'. 7module Citeproc.Pandoc 8 () 9where 10import Text.Pandoc.Definition 11import Text.Pandoc.Builder as B 12import Text.Pandoc.Walk 13import qualified Data.Text as T 14import qualified Data.Sequence as Seq 15import Data.Text (Text) 16import Citeproc.Types 17import Citeproc.CaseTransform 18import Control.Monad.Trans.State.Strict as S 19import Control.Monad (unless, when) 20import Data.Functor.Reverse 21import Data.Char (isSpace, isPunctuation, isAlphaNum) 22 23instance CiteprocOutput Inlines where 24 toText = stringify 25 fromText t = (if " " `T.isPrefixOf` t 26 then B.space 27 else mempty) <> 28 B.text t <> -- B.text eats leading/trailing spaces 29 (if " " `T.isSuffixOf` t 30 then B.space 31 else mempty) 32 dropTextWhile = dropTextWhile' 33 dropTextWhileEnd = dropTextWhileEnd' 34 addFontVariant x = 35 case x of 36 NormalVariant -> id 37 SmallCapsVariant -> B.smallcaps 38 addFontStyle x = 39 case x of 40 NormalFont -> id 41 ItalicFont -> B.emph 42 ObliqueFont -> B.emph 43 addFontWeight x = 44 case x of 45 NormalWeight -> id 46 LightWeight -> id 47 BoldWeight -> B.strong 48 addTextDecoration x = 49 case x of 50 NoDecoration -> B.spanWith ("",["nodecoration"],[]) 51 UnderlineDecoration -> B.underline 52 addVerticalAlign x = 53 case x of 54 BaselineAlign -> id 55 SubAlign -> B.subscript 56 SupAlign -> B.superscript 57 addTextCase mblang x = 58 case x of 59 Lowercase -> caseTransform mblang withLowercaseAll 60 Uppercase -> caseTransform mblang withUppercaseAll 61 CapitalizeFirst -> caseTransform mblang withCapitalizeFirst 62 CapitalizeAll -> caseTransform mblang withCapitalizeWords 63 SentenceCase -> caseTransform mblang withSentenceCase 64 TitleCase -> caseTransform mblang withTitleCase 65 addDisplay x = 66 case x of 67 DisplayBlock -> B.spanWith ("",["csl-block"],[]) 68 DisplayLeftMargin -> B.spanWith ("",["csl-left-margin"],[]) 69 DisplayRightInline -> B.spanWith ("",["csl-right-inline"],[]) 70 DisplayIndent -> B.spanWith ("",["csl-indent"],[]) 71 addQuotes = B.doubleQuoted . flipFlopQuotes DoubleQuote 72 inNote = B.note . B.para 73 movePunctuationInsideQuotes 74 = punctuationInsideQuotes 75 mapText f = walk go 76 where go (Str t) = Str (f t) 77 go x = x 78 addHyperlink t = B.link t "" 79 80flipFlopQuotes :: QuoteType -> Inlines -> Inlines 81flipFlopQuotes qt = B.fromList . map (go qt) . B.toList 82 where 83 go :: QuoteType -> Inline -> Inline 84 go q (Quoted _ zs) = 85 let q' = case q of 86 SingleQuote -> DoubleQuote 87 DoubleQuote -> SingleQuote 88 in Quoted q' (map (go q') zs) 89 go q (SmallCaps zs) = SmallCaps (map (go q) zs) 90 go q (Superscript zs) = Superscript (map (go q) zs) 91 go q (Subscript zs) = Subscript (map (go q) zs) 92 go q (Span attr zs) = Span attr (map (go q) zs) 93 go q (Emph zs) = Emph (map (go q) zs) 94 go q (Underline zs) = Underline (map (go q) zs) 95 go q (Strong zs) = Strong (map (go q) zs) 96 go q (Strikeout zs) = Strikeout (map (go q) zs) 97 go q (Cite cs zs) = Cite cs (map (go q) zs) 98 go q (Link attr zs t) = Link attr (map (go q) zs) t 99 go q (Image attr zs t) = Image attr (map (go q) zs) t 100 go _ x = x 101 102punctuationInsideQuotes :: Inlines -> Inlines 103punctuationInsideQuotes = B.fromList . go . walk go . B.toList 104 where 105 startsWithMovable t = 106 case T.uncons t of 107 Just (c,_) -> c == '.' || c == ',' || c == '!' || c == '?' 108 Nothing -> False 109 go [] = [] 110 go (Quoted qt xs : Str t : rest) 111 | startsWithMovable t 112 = Quoted qt (xs ++ [Str (T.take 1 t) | not (endWithPunct True xs)]) : 113 if T.length t == 1 114 then go rest 115 else Str (T.drop 1 t) : go rest 116 go (x:xs) = x : go xs 117 118endWithPunct :: Bool -> [Inline] -> Bool 119endWithPunct _ [] = False 120endWithPunct onlyFinal xs@(_:_) = 121 case reverse (T.unpack $ stringify xs) of 122 [] -> True 123 -- covers .), .", etc.: 124 (d:c:_) | isPunctuation d 125 && not onlyFinal 126 && isEndPunct c -> True 127 (c:_) | isEndPunct c -> True 128 | otherwise -> False 129 where isEndPunct c = c `elem` (".,;:!?" :: String) 130 131dropTextWhile' :: (Char -> Bool) -> Inlines -> Inlines 132dropTextWhile' f ils = evalState (walkM go ils) True 133 where 134 go x = do 135 atStart <- get 136 if atStart 137 then 138 case x of 139 Str t -> do 140 let t' = T.dropWhile f t 141 unless (T.null t') $ 142 put False 143 return $ Str t' 144 Space -> 145 if f ' ' 146 then return $ Str "" 147 else do 148 put False 149 return Space 150 _ -> return x 151 else return x 152 153 154dropTextWhileEnd' :: (Char -> Bool) -> Inlines -> Inlines 155dropTextWhileEnd' f ils = 156 getReverse $ evalState (walkM go $ Reverse ils) True 157 where 158 go x = do 159 atEnd <- get 160 if atEnd 161 then 162 case x of 163 Str t -> do 164 let t' = T.dropWhileEnd f t 165 unless (T.null t') $ 166 put False 167 return $ Str t' 168 Space | f ' ' -> return $ Str "" 169 _ -> return x 170 else return x 171 172-- taken from Text.Pandoc.Shared: 173 174-- | Convert pandoc structure to a string with formatting removed. 175-- Footnotes are skipped (since we don't want their contents in link 176-- labels). 177stringify :: Walkable Inline a => a -> T.Text 178stringify = query go . walk (unNote . unQuote) 179 where 180 go :: Inline -> T.Text 181 go Space = " " 182 go SoftBreak = " " 183 go (Str x) = x 184 go (Code _ x) = x 185 go (Math _ x) = x 186 go (RawInline (Format "html") (T.unpack -> ('<':'b':'r':_))) 187 = " " -- see #2105 188 go LineBreak = " " 189 go _ = "" 190 191 unNote :: Inline -> Inline 192 unNote (Note _) = Str "" 193 unNote x = x 194 195 unQuote :: Inline -> Inline 196 unQuote (Quoted SingleQuote xs) = 197 Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"]) 198 unQuote (Quoted DoubleQuote xs) = 199 Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"]) 200 unQuote x = x 201 202 203caseTransform :: Maybe Lang 204 -> CaseTransformer 205 -> Inlines 206 -> Inlines 207caseTransform mblang f x = 208 evalState (caseTransform' (unCaseTransformer f mblang) x) Start 209 210 211-- custom traversal which does not descend into 212-- SmallCaps, Superscript, Subscript, Span "nocase" (implicit nocase) 213caseTransform' :: (CaseTransformState -> Text -> Text) 214 -> Inlines 215 -> State CaseTransformState Inlines 216caseTransform' f ils = 217 case Seq.viewr (unMany ils) of 218 xs Seq.:> Str t | not (Seq.null xs) 219 , not (hasWordBreak t) -> do 220 xs' <- mapM go xs 221 st <- get 222 when (st == AfterWordEnd || st == StartSentence || st == Start) $ 223 put BeforeLastWord 224 x' <- go (Str t) 225 return $ Many $ xs' Seq.|> x' 226 _ -> mapM go ils 227 where 228 go (Str t) = Str . mconcat <$> mapM g (splitUp t) 229 go Space = Space <$ g " " 230 go (SmallCaps zs) = return' $ SmallCaps zs 231 go (Superscript zs) = return' $ Superscript zs 232 go (Subscript zs) = return' $ Subscript zs 233 go (Span attr@(_,classes,_) zs) 234 | "nocase" `elem` classes = do 235 st <- get 236 case st of 237 AfterWordChar | classes == ["nocase"] 238 -- we need to apply g to update the state: 239 -> return' $ Span nullAttr zs 240 _ -> return' $ Span attr zs 241 | otherwise = Span attr <$> mapM go zs 242 go (Emph zs) = Emph <$> mapM go zs 243 go (Underline zs) = Underline <$> mapM go zs 244 go (Strong zs) = Strong <$> mapM go zs 245 go (Strikeout zs) = Strikeout <$> mapM go zs 246 go (Quoted qt zs) = Quoted qt <$> mapM go zs 247 go (Cite cs zs) = Cite cs <$> mapM go zs 248 go (Link attr zs t) = (\x -> Link attr x t) <$> mapM go zs 249 go (Image attr zs t) = (\x -> Image attr x t) <$> mapM go zs 250 go x = return x 251 252 -- we need to apply g to update the state: 253 return' x = x <$ g (query fromStr x) 254 255 fromStr (Str t) = t 256 fromStr _ = mempty 257 258 g :: Text -> State CaseTransformState Text 259 g t = do 260 st <- get 261 put $ case T.unsnoc t of 262 Nothing -> st 263 Just (_,c) 264 | c == '.' || c == '?' || c == '!' || c == ':' -> 265 AfterSentenceEndingPunctuation 266 | isAlphaNum c -> AfterWordChar 267 | isSpace c 268 , st == AfterSentenceEndingPunctuation -> StartSentence 269 | isWordBreak c -> AfterWordEnd 270 | otherwise -> st 271 return $ 272 if T.all isAlphaNum t 273 then f st t 274 else t 275 isWordBreak '-' = True 276 isWordBreak '/' = True 277 isWordBreak '\x2013' = True 278 isWordBreak '\x2014' = True 279 isWordBreak c = isSpace c 280 hasWordBreak = T.any isWordBreak 281 splitUp = T.groupBy sameType 282 sameType c d = 283 (isAlphaNum c && isAlphaNum d) || (isSpace c && isSpace d) 284