1{-# LANGUAGE FlexibleInstances #-} 2{-# LANGUAGE FlexibleContexts #-} 3{-# LANGUAGE MultiParamTypeClasses #-} 4{-# LANGUAGE GeneralizedNewtypeDeriving #-} 5{-# LANGUAGE OverloadedStrings #-} 6{- | 7 Module : Text.Pandoc.Sources 8 Copyright : Copyright (C) 2021 John MacFarlane 9 License : GNU GPL, version 2 or above 10 11 Maintainer : John MacFarlane <jgm@berkeley.edu> 12 Stability : alpha 13 Portability : portable 14 15Defines Sources object to be used as input to pandoc parsers and redefines Char 16parsers so they get source position information from it. 17-} 18 19module Text.Pandoc.Sources 20 ( Sources(..) 21 , ToSources(..) 22 , UpdateSourcePos(..) 23 , sourcesToText 24 , initialSourceName 25 , addToSources 26 , ensureFinalNewlines 27 , addToInput 28 , satisfy 29 , oneOf 30 , noneOf 31 , anyChar 32 , char 33 , string 34 , newline 35 , space 36 , spaces 37 , letter 38 , digit 39 , hexDigit 40 , alphaNum 41 ) 42where 43import qualified Text.Parsec as P 44import Text.Parsec (Stream(..), ParsecT) 45import Text.Parsec.Pos as P 46import Data.Text (Text) 47import qualified Data.Text as T 48import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit) 49import Data.String (IsString(..)) 50import qualified Data.List.NonEmpty as NonEmpty 51 52-- | A list of inputs labeled with source positions. It is assumed 53-- that the 'Text's have @\n@ line endings. 54newtype Sources = Sources { unSources :: [(SourcePos, Text)] } 55 deriving (Show, Semigroup, Monoid) 56 57instance Monad m => Stream Sources m Char where 58 uncons (Sources []) = return Nothing 59 uncons (Sources ((pos,t):rest)) = 60 case T.uncons t of 61 Nothing -> uncons (Sources rest) 62 Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest)) 63 64instance IsString Sources where 65 fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))] 66 67class ToSources a where 68 toSources :: a -> Sources 69 70instance ToSources Text where 71 toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)] 72 73instance ToSources [(FilePath, Text)] where 74 toSources = Sources 75 . map (\(fp,t) -> 76 (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n')) 77 78instance ToSources Sources where 79 toSources = id 80 81sourcesToText :: Sources -> Text 82sourcesToText (Sources xs) = mconcat $ map snd xs 83 84addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m () 85addToSources pos t = do 86 curpos <- P.getPosition 87 Sources xs <- P.getInput 88 let xs' = case xs of 89 [] -> [] 90 ((_,t'):rest) -> (curpos,t'):rest 91 P.setInput $ Sources ((pos, T.filter (/='\r') t):xs') 92 93ensureFinalNewlines :: Int -- ^ number of trailing newlines 94 -> Sources 95 -> Sources 96ensureFinalNewlines n (Sources xs) = 97 case NonEmpty.nonEmpty xs of 98 Nothing -> Sources [(initialPos "", T.replicate n "\n")] 99 Just lst -> 100 case NonEmpty.last lst of 101 (spos, t) -> 102 case T.length (T.takeWhileEnd (=='\n') t) of 103 len | len >= n -> Sources xs 104 | otherwise -> Sources (NonEmpty.init lst ++ 105 [(spos, 106 t <> T.replicate (n - len) "\n")]) 107 108class UpdateSourcePos s c where 109 updateSourcePos :: SourcePos -> c -> s -> SourcePos 110 111instance UpdateSourcePos Text Char where 112 updateSourcePos pos c _ = updatePosChar pos c 113 114instance UpdateSourcePos Sources Char where 115 updateSourcePos pos c sources = 116 case sources of 117 Sources [] -> updatePosChar pos c 118 Sources ((_,t):(pos',_):_) 119 | T.null t -> pos' 120 Sources _ -> 121 case c of 122 '\n' -> incSourceLine (setSourceColumn pos 1) 1 123 '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4)) 124 _ -> incSourceColumn pos 1 125 126-- | Get name of first source in 'Sources'. 127initialSourceName :: Sources -> FilePath 128initialSourceName (Sources []) = "" 129initialSourceName (Sources ((pos,_):_)) = sourceName pos 130 131-- | Add some text to the beginning of the input sources. 132-- This simplifies code that expands macros. 133addToInput :: Monad m => Text -> ParsecT Sources u m () 134addToInput t = do 135 Sources xs <- P.getInput 136 case xs of 137 [] -> P.setInput $ Sources [(initialPos "",t)] 138 (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest) 139 140-- We need to redefine the parsers in Text.Parsec.Char so that they 141-- update source positions properly from the Sources stream. 142 143satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 144 => (Char -> Bool) -> ParsecT s u m Char 145satisfy f = P.tokenPrim show updateSourcePos matcher 146 where 147 matcher c = if f c then Just c else Nothing 148 149oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 150 => [Char] -> ParsecT s u m Char 151oneOf cs = satisfy (`elem` cs) 152 153noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 154 => [Char] -> ParsecT s u m Char 155noneOf cs = satisfy (`notElem` cs) 156 157anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 158 => ParsecT s u m Char 159anyChar = satisfy (const True) 160 161char :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 162 => Char -> ParsecT s u m Char 163char c = satisfy (== c) 164 165string :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 166 => [Char] -> ParsecT s u m [Char] 167string = mapM char 168 169newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 170 => ParsecT s u m Char 171newline = satisfy (== '\n') 172 173space :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 174 => ParsecT s u m Char 175space = satisfy isSpace 176 177spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 178 => ParsecT s u m () 179spaces = P.skipMany space P.<?> "white space" 180 181letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 182 => ParsecT s u m Char 183letter = satisfy isLetter 184 185alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 186 => ParsecT s u m Char 187alphaNum = satisfy isAlphaNum 188 189digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 190 => ParsecT s u m Char 191digit = satisfy isDigit 192 193hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char) 194 => ParsecT s u m Char 195hexDigit = satisfy isHexDigit 196