1{-# LANGUAGE LambdaCase #-} 2{-# LANGUAGE OverloadedStrings #-} 3{- | 4 Module : Text.Pandoc.Readers.Org.Inlines 5 Copyright : Copyright (C) 2014-2021 Albert Krewinkel 6 License : GNU GPL, version 2 or above 7 8 Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> 9 10Parsers for Org-mode inline elements. 11-} 12module Text.Pandoc.Readers.Org.Inlines 13 ( inline 14 , inlines 15 , addToNotesTable 16 , linkTarget 17 ) where 18 19import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) 20import Text.Pandoc.Readers.Org.ParserState 21import Text.Pandoc.Readers.Org.Parsing 22import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, 23 originalLang, translateLang, exportsCode) 24 25import Text.Pandoc.Builder (Inlines) 26import qualified Text.Pandoc.Builder as B 27import Text.Pandoc.Class.PandocMonad (PandocMonad) 28import Text.Pandoc.Definition 29import Text.Pandoc.Options 30import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) 31import Text.TeXMath (DisplayType (..), readTeX, writePandoc) 32import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap 33 34import Control.Monad (guard, mplus, mzero, unless, void, when) 35import Control.Monad.Trans (lift) 36import Data.Char (isAlphaNum, isSpace) 37import Data.List (intersperse) 38import qualified Data.Map as M 39import Data.Text (Text) 40import qualified Data.Text as T 41 42-- 43-- Functions acting on the parser state 44-- 45recordAnchorId :: PandocMonad m => Text -> OrgParser m () 46recordAnchorId i = updateState $ \s -> 47 s{ orgStateAnchorIds = i : orgStateAnchorIds s } 48 49pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () 50pushToInlineCharStack c = updateState $ \s -> 51 s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } 52 53popInlineCharStack :: PandocMonad m => OrgParser m () 54popInlineCharStack = updateState $ \s -> 55 s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } 56 57surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char] 58surroundingEmphasisChar = 59 take 1 . drop 1 . orgStateEmphasisCharStack <$> getState 60 61startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m () 62startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> 63 s{ orgStateEmphasisNewlines = Just maxNewlines } 64 65decEmphasisNewlinesCount :: PandocMonad m => OrgParser m () 66decEmphasisNewlinesCount = updateState $ \s -> 67 s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } 68 69newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool 70newlinesCountWithinLimits = do 71 st <- getState 72 return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True 73 74resetEmphasisNewlines :: PandocMonad m => OrgParser m () 75resetEmphasisNewlines = updateState $ \s -> 76 s{ orgStateEmphasisNewlines = Nothing } 77 78addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m () 79addToNotesTable note = do 80 oldnotes <- orgStateNotes' <$> getState 81 updateState $ \s -> s{ orgStateNotes' = note:oldnotes } 82 83-- | Parse a single Org-mode inline element 84inline :: PandocMonad m => OrgParser m (F Inlines) 85inline = 86 choice [ whitespace 87 , linebreak 88 , cite 89 , footnote 90 , linkOrImage 91 , anchor 92 , inlineCodeBlock 93 , str 94 , endline 95 , emphasizedText 96 , code 97 , math 98 , displayMath 99 , verbatim 100 , subscript 101 , superscript 102 , inlineLaTeX 103 , exportSnippet 104 , macro 105 , smart 106 , symbol 107 ] <* (guard =<< newlinesCountWithinLimits) 108 <?> "inline" 109 110-- | Read the rest of the input as inlines. 111inlines :: PandocMonad m => OrgParser m (F Inlines) 112inlines = trimInlinesF . mconcat <$> many1 inline 113 114-- treat these as potentially non-text when parsing inline: 115specialChars :: [Char] 116specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~" 117 118 119whitespace :: PandocMonad m => OrgParser m (F Inlines) 120whitespace = pure B.space <$ skipMany1 spaceChar 121 <* updateLastPreCharPos 122 <* updateLastForbiddenCharPos 123 <?> "whitespace" 124 125linebreak :: PandocMonad m => OrgParser m (F Inlines) 126linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline 127 128str :: PandocMonad m => OrgParser m (F Inlines) 129str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") 130 <* updateLastStrPos 131 132-- | An endline character that can be treated as a space, not a structural 133-- break. This should reflect the values of the Emacs variable 134-- @org-element-pagaraph-separate@. 135endline :: PandocMonad m => OrgParser m (F Inlines) 136endline = try $ do 137 newline 138 notFollowedBy' endOfBlock 139 decEmphasisNewlinesCount 140 guard =<< newlinesCountWithinLimits 141 updateLastPreCharPos 142 useHardBreaks <- exportPreserveBreaks . orgStateExportSettings <$> getState 143 returnF (if useHardBreaks then B.linebreak else B.softbreak) 144 145 146-- 147-- Citations 148-- 149 150-- The state of citations is a bit confusing due to the lack of an official 151-- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the 152-- first to be implemented here and is almost identical to Markdown's citation 153-- syntax. The org-ref package is in wide use to handle citations, but the 154-- syntax is a bit limiting and not quite as simple to write. The 155-- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc 156-- sytax and Org-oriented enhancements contributed by Richard Lawrence and 157-- others. It's dubbed Berkeley syntax due the place of activity of its main 158-- contributors. All this should be consolidated once an official Org-mode 159-- citation syntax has emerged. 160 161cite :: PandocMonad m => OrgParser m (F Inlines) 162cite = try $ berkeleyCite <|> do 163 guardEnabled Ext_citations 164 (cs, raw) <- withRaw $ choice 165 [ pandocOrgCite 166 , orgRefCite 167 , berkeleyTextualCite 168 ] 169 return $ flip B.cite (B.text raw) <$> cs 170 171-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). 172pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) 173pandocOrgCite = try $ 174 char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' 175 176orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) 177orgRefCite = try $ choice 178 [ normalOrgRefCite 179 , fmap (:[]) <$> linkLikeOrgRefCite 180 ] 181 182normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation]) 183normalOrgRefCite = try $ do 184 mode <- orgRefCiteMode 185 firstCitation <- orgRefCiteList mode 186 moreCitations <- many (try $ char ',' *> orgRefCiteList mode) 187 return . sequence $ firstCitation : moreCitations 188 where 189 -- | A list of org-ref style citation keys, parsed as citation of the given 190 -- citation mode. 191 orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) 192 orgRefCiteList citeMode = try $ do 193 key <- orgRefCiteKey 194 returnF Citation 195 { citationId = key 196 , citationPrefix = mempty 197 , citationSuffix = mempty 198 , citationMode = citeMode 199 , citationNoteNum = 0 200 , citationHash = 0 201 } 202 203-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was 204-- develop and adjusted to Org-mode style by John MacFarlane and Richard 205-- Lawrence, respectively, both philosophers at UC Berkeley. 206berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) 207berkeleyCite = try $ do 208 bcl <- berkeleyCitationList 209 return $ do 210 parens <- berkeleyCiteParens <$> bcl 211 prefix <- berkeleyCiteCommonPrefix <$> bcl 212 suffix <- berkeleyCiteCommonSuffix <$> bcl 213 citationList <- berkeleyCiteCitations <$> bcl 214 return $ 215 if parens 216 then toCite 217 . maybe id (alterFirst . prependPrefix) prefix 218 . maybe id (alterLast . appendSuffix) suffix 219 $ citationList 220 else maybe mempty (<> " ") prefix 221 <> toListOfCites (map toInTextMode citationList) 222 <> maybe mempty (", " <>) suffix 223 where 224 toCite :: [Citation] -> Inlines 225 toCite cs = B.cite cs mempty 226 227 toListOfCites :: [Citation] -> Inlines 228 toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) 229 230 toInTextMode :: Citation -> Citation 231 toInTextMode c = c { citationMode = AuthorInText } 232 233 alterFirst, alterLast :: (a -> a) -> [a] -> [a] 234 alterFirst _ [] = [] 235 alterFirst f (c:cs) = f c : cs 236 alterLast f = reverse . alterFirst f . reverse 237 238 prependPrefix, appendSuffix :: Inlines -> Citation -> Citation 239 prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } 240 appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } 241 242data BerkeleyCitationList = BerkeleyCitationList 243 { berkeleyCiteParens :: Bool 244 , berkeleyCiteCommonPrefix :: Maybe Inlines 245 , berkeleyCiteCommonSuffix :: Maybe Inlines 246 , berkeleyCiteCitations :: [Citation] 247 } 248berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) 249berkeleyCitationList = try $ do 250 char '[' 251 parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] 252 char ':' 253 skipSpaces 254 commonPrefix <- optionMaybe (try $ citationListPart <* char ';') 255 citations <- citeList 256 commonSuffix <- optionMaybe (try citationListPart) 257 char ']' 258 return (BerkeleyCitationList parens 259 <$> sequence commonPrefix 260 <*> sequence commonSuffix 261 <*> citations) 262 where 263 citationListPart :: PandocMonad m => OrgParser m (F Inlines) 264 citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do 265 notFollowedBy' citeKey 266 notFollowedBy (oneOf ";]") 267 inline 268 269berkeleyBareTag :: PandocMonad m => OrgParser m () 270berkeleyBareTag = try $ void berkeleyBareTag' 271 272berkeleyParensTag :: PandocMonad m => OrgParser m () 273berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag' 274 275berkeleyBareTag' :: PandocMonad m => OrgParser m () 276berkeleyBareTag' = try $ void (string "cite") 277 278berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) 279berkeleyTextualCite = try $ do 280 (suppressAuthor, key) <- citeKey 281 returnF . return $ Citation 282 { citationId = key 283 , citationPrefix = mempty 284 , citationSuffix = mempty 285 , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText 286 , citationNoteNum = 0 287 , citationHash = 0 288 } 289 290-- The following is what a Berkeley-style bracketed textual citation parser 291-- would look like. However, as these citations are a subset of Pandoc's Org 292-- citation style, this isn't used. 293-- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) 294-- berkeleyBracketedTextualCite = try . (fmap head) $ 295-- enclosedByPair1 '[' ']' berkeleyTextualCite 296 297-- | Read a link-like org-ref style citation. The citation includes pre and 298-- post text. However, multiple citations are not possible due to limitations 299-- in the syntax. 300linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation) 301linkLikeOrgRefCite = try $ do 302 _ <- string "[[" 303 mode <- orgRefCiteMode 304 key <- orgRefCiteKey 305 _ <- string "][" 306 pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::") 307 spc <- option False (True <$ spaceChar) 308 suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]") 309 return $ do 310 pre' <- pre 311 suf' <- suf 312 return Citation 313 { citationId = key 314 , citationPrefix = B.toList pre' 315 , citationSuffix = B.toList (if spc then B.space <> suf' else suf') 316 , citationMode = mode 317 , citationNoteNum = 0 318 , citationHash = 0 319 } 320 321-- | Read a citation key. The characters allowed in citation keys are taken 322-- from the `org-ref-cite-re` variable in `org-ref.el`. 323orgRefCiteKey :: PandocMonad m => OrgParser m Text 324orgRefCiteKey = 325 let citeKeySpecialChars = "-_:\\./," :: String 326 isCiteKeySpecialChar c = c `elem` citeKeySpecialChars 327 isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c 328 endOfCitation = try $ do 329 many $ satisfy isCiteKeySpecialChar 330 satisfy $ not . isCiteKeyChar 331 in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation 332 333 334-- | Supported citation types. Only a small subset of org-ref types is 335-- supported for now. TODO: rewrite this, use LaTeX reader as template. 336orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode 337orgRefCiteMode = 338 choice $ map (\(s, mode) -> mode <$ try (string s <* char ':')) 339 [ ("cite", AuthorInText) 340 , ("citep", NormalCitation) 341 , ("citep*", NormalCitation) 342 , ("citet", AuthorInText) 343 , ("citet*", AuthorInText) 344 , ("citeyear", SuppressAuthor) 345 ] 346 347citeList :: PandocMonad m => OrgParser m (F [Citation]) 348citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) 349 350citation :: PandocMonad m => OrgParser m (F Citation) 351citation = try $ do 352 pref <- prefix 353 (suppress_author, key) <- citeKey 354 suff <- suffix 355 return $ do 356 x <- pref 357 y <- suff 358 return Citation 359 { citationId = key 360 , citationPrefix = B.toList x 361 , citationSuffix = B.toList y 362 , citationMode = if suppress_author 363 then SuppressAuthor 364 else NormalCitation 365 , citationNoteNum = 0 366 , citationHash = 0 367 } 368 where 369 prefix = trimInlinesF . mconcat <$> 370 manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) 371 suffix = try $ do 372 hasSpace <- option False (notFollowedBy nonspaceChar >> return True) 373 skipSpaces 374 rest <- trimInlinesF . mconcat <$> 375 many (notFollowedBy (oneOf ";]") *> inline) 376 return $ if hasSpace 377 then (B.space <>) <$> rest 378 else rest 379 380footnote :: PandocMonad m => OrgParser m (F Inlines) 381footnote = try $ do 382 note <- inlineNote <|> referencedNote 383 withNote <- getExportSetting exportWithFootnotes 384 return $ if withNote then note else mempty 385 386inlineNote :: PandocMonad m => OrgParser m (F Inlines) 387inlineNote = try $ do 388 string "[fn:" 389 ref <- manyChar alphaNum 390 char ':' 391 note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') 392 unless (T.null ref) $ 393 addToNotesTable ("fn:" <> ref, note) 394 return $ B.note <$> note 395 396referencedNote :: PandocMonad m => OrgParser m (F Inlines) 397referencedNote = try $ do 398 ref <- noteMarker 399 return $ do 400 notes <- asksF orgStateNotes' 401 case lookup ref notes of 402 Nothing -> return . B.str $ "[" <> ref <> "]" 403 Just contents -> do 404 st <- askF 405 let contents' = runF contents st{ orgStateNotes' = [] } 406 return $ B.note contents' 407 408linkOrImage :: PandocMonad m => OrgParser m (F Inlines) 409linkOrImage = explicitOrImageLink 410 <|> selflinkOrImage 411 <|> angleLink 412 <|> plainLink 413 <?> "link or image" 414 415explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines) 416explicitOrImageLink = try $ do 417 char '[' 418 srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget 419 descr <- enclosedRaw (char '[') (char ']') 420 titleF <- parseFromString (mconcat <$> many inline) descr 421 char ']' 422 return $ do 423 src <- srcF 424 title <- titleF 425 case cleanLinkText descr of 426 Just imgSrc | isImageFilename imgSrc -> 427 return . B.link src "" $ B.image imgSrc mempty mempty 428 _ -> 429 linkToInlinesF src title 430 431selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) 432selflinkOrImage = try $ do 433 target <- char '[' *> linkTarget <* char ']' 434 case cleanLinkText target of 435 Nothing -> case T.uncons target of 436 Just ('#', _) -> returnF $ B.link target "" (B.str target) 437 _ -> return $ internalLink target (B.str target) 438 Just nonDocTgt -> if isImageFilename nonDocTgt 439 then returnF $ B.image nonDocTgt "" "" 440 else returnF $ B.link nonDocTgt "" (B.str target) 441 442plainLink :: PandocMonad m => OrgParser m (F Inlines) 443plainLink = try $ do 444 (orig, src) <- uri 445 returnF $ B.link src "" (B.str orig) 446 447angleLink :: PandocMonad m => OrgParser m (F Inlines) 448angleLink = try $ do 449 char '<' 450 link <- plainLink 451 char '>' 452 return link 453 454linkTarget :: PandocMonad m => OrgParser m Text 455linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") 456 457possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text 458possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") 459 460applyCustomLinkFormat :: Text -> OrgParser m (F Text) 461applyCustomLinkFormat link = do 462 let (linkType, rest) = T.break (== ':') link 463 return $ do 464 formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters 465 return $ maybe link ($ T.drop 1 rest) formatter 466 467-- | Take a link and return a function which produces new inlines when given 468-- description inlines. 469linkToInlinesF :: Text -> Inlines -> F Inlines 470linkToInlinesF linkStr = 471 case T.uncons linkStr of 472 Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) 473 Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction 474 _ -> case cleanLinkText linkStr of 475 Just extTgt -> return . B.link extTgt "" 476 Nothing -> internalLink linkStr -- other internal link 477 478internalLink :: Text -> Inlines -> F Inlines 479internalLink link title = do 480 ids <- asksF orgStateAnchorIds 481 if link `elem` ids 482 then return $ B.link ("#" <> link) "" title 483 else let attr' = ("", ["spurious-link"] , [("target", link)]) 484 in return $ B.spanWith attr' (B.emph title) 485 486-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with 487-- @anchor-id@ set as id. Legal anchors in org-mode are defined through 488-- @org-target-regexp@, which is fairly liberal. Since no link is created if 489-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as 490-- an anchor. 491anchor :: PandocMonad m => OrgParser m (F Inlines) 492anchor = try $ do 493 anchorId <- parseAnchor 494 recordAnchorId anchorId 495 returnF $ B.spanWith (solidify anchorId, [], []) mempty 496 where 497 parseAnchor = string "<<" 498 *> many1Char (noneOf "\t\n\r<>\"' ") 499 <* string ">>" 500 <* skipSpaces 501 502-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors 503-- the org function @org-export-solidify-link-text@. 504solidify :: Text -> Text 505solidify = T.map replaceSpecialChar 506 where replaceSpecialChar c 507 | isAlphaNum c = c 508 | c `elem` ("_.-:" :: String) = c 509 | otherwise = '-' 510 511-- | Parses an inline code block and marks it as an babel block. 512inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) 513inlineCodeBlock = try $ do 514 string "src_" 515 lang <- many1Char orgArgWordChar 516 opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption 517 inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") 518 let attrClasses = [translateLang lang] 519 let attrKeyVal = originalLang lang <> opts 520 let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode 521 returnF $ if exportsCode opts then codeInlineBlck else mempty 522 where 523 inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) 524 inlineBlockOption = try $ do 525 argKey <- orgArgKey 526 paramValue <- option "yes" orgInlineParamValue 527 return (argKey, paramValue) 528 529 orgInlineParamValue :: PandocMonad m => OrgParser m Text 530 orgInlineParamValue = try $ 531 skipSpaces 532 *> notFollowedBy (char ':') 533 *> many1Char (noneOf "\t\n\r ]") 534 <* skipSpaces 535 536 537emphasizedText :: PandocMonad m => OrgParser m (F Inlines) 538emphasizedText = do 539 state <- getState 540 guard . exportEmphasizedText . orgStateExportSettings $ state 541 try $ choice 542 [ emph 543 , strong 544 , strikeout 545 , underline 546 ] 547 548enclosedByPair :: PandocMonad m 549 => Char -- ^ opening char 550 -> Char -- ^ closing char 551 -> OrgParser m a -- ^ parser 552 -> OrgParser m [a] 553enclosedByPair s e p = char s *> manyTill p (char e) 554 555enclosedByPair1 :: PandocMonad m 556 => Char -- ^ opening char 557 -> Char -- ^ closing char 558 -> OrgParser m a -- ^ parser 559 -> OrgParser m [a] 560enclosedByPair1 s e p = char s *> many1Till p (char e) 561 562emph :: PandocMonad m => OrgParser m (F Inlines) 563emph = fmap B.emph <$> emphasisBetween '/' 564 565strong :: PandocMonad m => OrgParser m (F Inlines) 566strong = fmap B.strong <$> emphasisBetween '*' 567 568strikeout :: PandocMonad m => OrgParser m (F Inlines) 569strikeout = fmap B.strikeout <$> emphasisBetween '+' 570 571underline :: PandocMonad m => OrgParser m (F Inlines) 572underline = fmap B.underline <$> emphasisBetween '_' 573 574verbatim :: PandocMonad m => OrgParser m (F Inlines) 575verbatim = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '=' 576 577code :: PandocMonad m => OrgParser m (F Inlines) 578code = return . B.code <$> verbatimBetween '~' 579 580subscript :: PandocMonad m => OrgParser m (F Inlines) 581subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) 582 583superscript :: PandocMonad m => OrgParser m (F Inlines) 584superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) 585 586math :: PandocMonad m => OrgParser m (F Inlines) 587math = return . B.math <$> choice [ math1CharBetween '$' 588 , mathTextBetween '$' 589 , rawMathBetween "\\(" "\\)" 590 ] 591 592displayMath :: PandocMonad m => OrgParser m (F Inlines) 593displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" 594 , rawMathBetween "$$" "$$" 595 ] 596 597updatePositions :: PandocMonad m 598 => Char 599 -> OrgParser m Char 600updatePositions c = do 601 st <- getState 602 let emphasisPreChars = orgStateEmphasisPreChars st 603 when (c `elem` emphasisPreChars) updateLastPreCharPos 604 when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos 605 return c 606 607symbol :: PandocMonad m => OrgParser m (F Inlines) 608symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) 609 610emphasisBetween :: PandocMonad m 611 => Char 612 -> OrgParser m (F Inlines) 613emphasisBetween c = try $ do 614 startEmphasisNewlinesCounting emphasisAllowedNewlines 615 res <- enclosedInlines (emphasisStart c) (emphasisEnd c) 616 isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState 617 when isTopLevelEmphasis 618 resetEmphasisNewlines 619 return res 620 621verbatimBetween :: PandocMonad m 622 => Char 623 -> OrgParser m Text 624verbatimBetween c = try $ 625 emphasisStart c *> 626 many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) 627 where 628 verbatimChar = noneOf "\n\r" >>= updatePositions 629 630-- | Parses a raw string delimited by @c@ using Org's math rules 631mathTextBetween :: PandocMonad m 632 => Char 633 -> OrgParser m Text 634mathTextBetween c = try $ do 635 mathStart c 636 body <- many1TillNOrLessNewlines mathAllowedNewlines 637 (noneOf (c:"\n\r")) 638 (lookAhead $ mathEnd c) 639 final <- mathEnd c 640 return $ T.snoc body final 641 642-- | Parse a single character between @c@ using math rules 643math1CharBetween :: PandocMonad m 644 => Char 645 -> OrgParser m Text 646math1CharBetween c = try $ do 647 char c 648 res <- noneOf $ c:mathForbiddenBorderChars 649 char c 650 eof <|> () <$ lookAhead (oneOf mathPostChars) 651 return $ T.singleton res 652 653rawMathBetween :: PandocMonad m 654 => Text 655 -> Text 656 -> OrgParser m Text 657rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) 658 659-- | Parses the start (opening character) of emphasis 660emphasisStart :: PandocMonad m => Char -> OrgParser m Char 661emphasisStart c = try $ do 662 guard =<< afterEmphasisPreChar 663 guard =<< notAfterString 664 char c 665 lookAhead (noneOf emphasisForbiddenBorderChars) 666 pushToInlineCharStack c 667 -- nested inlines are allowed, so mark this position as one which might be 668 -- followed by another inline. 669 updateLastPreCharPos 670 return c 671 672-- | Parses the closing character of emphasis 673emphasisEnd :: PandocMonad m => Char -> OrgParser m Char 674emphasisEnd c = try $ do 675 guard =<< notAfterForbiddenBorderChar 676 char c 677 eof <|> () <$ lookAhead acceptablePostChars 678 updateLastStrPos 679 popInlineCharStack 680 return c 681 where 682 acceptablePostChars = do 683 emphasisPostChars <- orgStateEmphasisPostChars <$> getState 684 surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) 685 686mathStart :: PandocMonad m => Char -> OrgParser m Char 687mathStart c = try $ 688 char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) 689 690mathEnd :: PandocMonad m => Char -> OrgParser m Char 691mathEnd c = try $ do 692 res <- noneOf (c:mathForbiddenBorderChars) 693 char c 694 eof <|> () <$ lookAhead (oneOf mathPostChars) 695 return res 696 697 698enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a 699 -> OrgParser m b 700 -> OrgParser m (F Inlines) 701enclosedInlines start end = try $ 702 trimInlinesF . mconcat <$> enclosed start end inline 703 704enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a 705 -> OrgParser m b 706 -> OrgParser m Text 707enclosedRaw start end = try $ 708 start *> (onSingleLine <|> spanningTwoLines) 709 where onSingleLine = try $ many1TillChar (noneOf "\n\r") end 710 spanningTwoLines = try $ 711 anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine 712 713-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume 714-- newlines. 715many1TillNOrLessNewlines :: PandocMonad m => Int 716 -> OrgParser m Char 717 -> OrgParser m a 718 -> OrgParser m Text 719many1TillNOrLessNewlines n p end = try $ 720 nMoreLines (Just n) mempty >>= oneOrMore 721 where 722 nMoreLines Nothing cs = return cs 723 nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine 724 nMoreLines k cs = try $ (final k cs <|> rest k cs) 725 >>= uncurry nMoreLines 726 final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine 727 rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) 728 finalLine = try $ manyTill p end 729 minus1 k = k - 1 730 oneOrMore cs = T.pack cs <$ guard (not $ null cs) 731 732-- Org allows customization of the way it reads emphasis. We use the defaults 733-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` 734-- for details). 735 736-- | Chars not allowed at the (inner) border of emphasis 737emphasisForbiddenBorderChars :: [Char] 738emphasisForbiddenBorderChars = "\t\n\r " 739 740-- | The maximum number of newlines within 741emphasisAllowedNewlines :: Int 742emphasisAllowedNewlines = 1 743 744-- LaTeX-style math: see `org-latex-regexps` for details 745 746-- | Chars allowed after an inline ($...$) math statement 747mathPostChars :: [Char] 748mathPostChars = "\t\n \"'),-.:;?" 749 750-- | Chars not allowed at the (inner) border of math 751mathForbiddenBorderChars :: [Char] 752mathForbiddenBorderChars = "\t\n\r ,;.$" 753 754-- | Maximum number of newlines in an inline math statement 755mathAllowedNewlines :: Int 756mathAllowedNewlines = 2 757 758-- | Whether we are right behind a char allowed before emphasis 759afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool 760afterEmphasisPreChar = do 761 pos <- getPosition 762 lastPrePos <- orgStateLastPreCharPos <$> getState 763 return $ maybe True (== pos) lastPrePos 764 765-- | Whether the parser is right after a forbidden border char 766notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool 767notAfterForbiddenBorderChar = do 768 pos <- getPosition 769 lastFBCPos <- orgStateLastForbiddenCharPos <$> getState 770 return $ lastFBCPos /= Just pos 771 772-- | Read a sub- or superscript expression 773subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) 774subOrSuperExpr = try $ 775 simpleSubOrSuperText <|> 776 (choice [ charsInBalanced '{' '}' (noneOf "\n\r") 777 , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") 778 ] >>= parseFromString (mconcat <$> many inline)) 779 where enclosing (left, right) s = T.cons left $ T.snoc s right 780 781simpleSubOrSuperText :: PandocMonad m => OrgParser m (F Inlines) 782simpleSubOrSuperText = try $ do 783 state <- getState 784 guard . exportSubSuperscripts . orgStateExportSettings $ state 785 return . B.str <$> 786 choice [ textStr "*" 787 , mappend <$> option "" (T.singleton <$> oneOf "+-") 788 <*> many1Char alphaNum 789 ] 790 791inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) 792inlineLaTeX = try $ do 793 cmd <- inlineLaTeXCommand 794 texOpt <- getExportSetting exportWithLatex 795 allowEntities <- getExportSetting exportWithEntities 796 ils <- parseAsInlineLaTeX cmd texOpt 797 maybe mzero returnF $ 798 parseAsMathMLSym allowEntities cmd `mplus` 799 parseAsMath cmd texOpt `mplus` 800 ils 801 where 802 parseAsInlineLaTeX :: PandocMonad m 803 => Text -> TeXExport -> OrgParser m (Maybe Inlines) 804 parseAsInlineLaTeX cs = \case 805 TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs 806 TeXIgnore -> return (Just mempty) 807 TeXVerbatim -> return (Just $ B.str cs) 808 809 parseAsMathMLSym :: Bool -> Text -> Maybe Inlines 810 parseAsMathMLSym allowEntities cs = do 811 -- drop initial backslash and any trailing "{}" 812 let clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 813 -- If entities are disabled, then return the string as text, but 814 -- only if this *is* a MathML entity. 815 case B.str <$> MathMLEntityMap.getUnicode (clean cs) of 816 Just _ | not allowEntities -> Just $ B.str cs 817 x -> x 818 819 state :: ParserState 820 state = def{ stateOptions = def{ readerExtensions = 821 enableExtension Ext_raw_tex (readerExtensions def) } } 822 823 parseAsMath :: Text -> TeXExport -> Maybe Inlines 824 parseAsMath cs = \case 825 TeXExport -> maybeRight (readTeX cs) >>= 826 fmap B.fromList . writePandoc DisplayInline 827 TeXIgnore -> Just mempty 828 TeXVerbatim -> Just $ B.str cs 829 830maybeRight :: Either a b -> Maybe b 831maybeRight = either (const Nothing) Just 832 833inlineLaTeXCommand :: PandocMonad m => OrgParser m Text 834inlineLaTeXCommand = try $ do 835 rest <- getInput 836 st <- getState 837 parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest 838 case parsed of 839 Right cs -> do 840 -- drop any trailing whitespace, those are not part of the command as 841 -- far as org mode is concerned. 842 let cmdNoSpc = T.dropWhileEnd isSpace cs 843 let len = T.length cmdNoSpc 844 count len anyChar 845 return cmdNoSpc 846 _ -> mzero 847 848exportSnippet :: PandocMonad m => OrgParser m (F Inlines) 849exportSnippet = try $ do 850 string "@@" 851 format <- many1TillChar (alphaNum <|> char '-') (char ':') 852 snippet <- manyTillChar anyChar (try $ string "@@") 853 returnF $ B.rawInline format snippet 854 855macro :: PandocMonad m => OrgParser m (F Inlines) 856macro = try $ do 857 recursionDepth <- orgStateMacroDepth <$> getState 858 guard $ recursionDepth < 15 859 string "{{{" 860 name <- manyChar alphaNum 861 args <- ([] <$ string "}}}") 862 <|> char '(' *> argument `sepBy` char ',' <* eoa 863 expander <- lookupMacro name <$> getState 864 case expander of 865 Nothing -> mzero 866 Just fn -> do 867 updateState $ \s -> s { orgStateMacroDepth = recursionDepth + 1 } 868 res <- parseFromString (mconcat <$> many inline) $ fn args 869 updateState $ \s -> s { orgStateMacroDepth = recursionDepth } 870 return res 871 where 872 argument = manyChar $ notFollowedBy eoa *> noneOf "," 873 eoa = string ")}}}" 874 875smart :: PandocMonad m => OrgParser m (F Inlines) 876smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses] 877 where 878 orgDash = do 879 guardOrSmartEnabled =<< getExportSetting exportSpecialStrings 880 pure <$> dash <* updatePositions '-' 881 orgEllipses = do 882 guardOrSmartEnabled =<< getExportSetting exportSpecialStrings 883 pure <$> ellipses <* updatePositions '.' 884 orgApostrophe = do 885 guardEnabled Ext_smart 886 (char '\'' <|> char '\8217') <* updateLastPreCharPos 887 <* updateLastForbiddenCharPos 888 returnF (B.str "\x2019") 889 890guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m () 891guardOrSmartEnabled b = do 892 smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions 893 guard (b || smartExtension) 894 895singleQuoted :: PandocMonad m => OrgParser m (F Inlines) 896singleQuoted = try $ do 897 guardOrSmartEnabled =<< getExportSetting exportSmartQuotes 898 singleQuoteStart 899 updatePositions '\'' 900 withQuoteContext InSingleQuote $ 901 fmap B.singleQuoted . trimInlinesF . mconcat <$> 902 many1Till inline (singleQuoteEnd <* updatePositions '\'') 903 904-- doubleQuoted will handle regular double-quoted sections, as well 905-- as dialogues with an open double-quote without a close double-quote 906-- in the same paragraph. 907doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) 908doubleQuoted = try $ do 909 guardOrSmartEnabled =<< getExportSetting exportSmartQuotes 910 doubleQuoteStart 911 updatePositions '"' 912 contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) 913 let doubleQuotedContent = withQuoteContext InDoubleQuote $ do 914 doubleQuoteEnd 915 updateLastForbiddenCharPos 916 return . fmap B.doubleQuoted . trimInlinesF $ contents 917 let leftQuoteAndContent = return $ pure (B.str "\8220") <> contents 918 doubleQuotedContent <|> leftQuoteAndContent 919