1{-# LANGUAGE FlexibleInstances #-}
2{-# LANGUAGE FlexibleContexts #-}
3{-# LANGUAGE MultiParamTypeClasses #-}
4{-# LANGUAGE GeneralizedNewtypeDeriving #-}
5{-# LANGUAGE OverloadedStrings #-}
6{- |
7   Module      : Text.Pandoc.Sources
8   Copyright   : Copyright (C) 2021 John MacFarlane
9   License     : GNU GPL, version 2 or above
10
11   Maintainer  : John MacFarlane <jgm@berkeley.edu>
12   Stability   : alpha
13   Portability : portable
14
15Defines Sources object to be used as input to pandoc parsers and redefines Char
16parsers so they get source position information from it.
17-}
18
19module Text.Pandoc.Sources
20  ( Sources(..)
21  , ToSources(..)
22  , UpdateSourcePos(..)
23  , sourcesToText
24  , initialSourceName
25  , addToSources
26  , ensureFinalNewlines
27  , addToInput
28  , satisfy
29  , oneOf
30  , noneOf
31  , anyChar
32  , char
33  , string
34  , newline
35  , space
36  , spaces
37  , letter
38  , digit
39  , hexDigit
40  , alphaNum
41  )
42where
43import qualified Text.Parsec as P
44import Text.Parsec (Stream(..), ParsecT)
45import Text.Parsec.Pos as P
46import Data.Text (Text)
47import qualified Data.Text as T
48import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
49import Data.String (IsString(..))
50import qualified Data.List.NonEmpty as NonEmpty
51
52-- | A list of inputs labeled with source positions.  It is assumed
53-- that the 'Text's have @\n@ line endings.
54newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
55  deriving (Show, Semigroup, Monoid)
56
57instance Monad m => Stream Sources m Char where
58  uncons (Sources []) = return Nothing
59  uncons (Sources ((pos,t):rest)) =
60    case T.uncons t of
61      Nothing -> uncons (Sources rest)
62      Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
63
64instance IsString Sources where
65  fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
66
67class ToSources a where
68  toSources :: a -> Sources
69
70instance ToSources Text where
71  toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
72
73instance ToSources [(FilePath, Text)] where
74  toSources = Sources
75            . map (\(fp,t) ->
76                    (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
77
78instance ToSources Sources where
79  toSources = id
80
81sourcesToText :: Sources -> Text
82sourcesToText (Sources xs) = mconcat $ map snd xs
83
84addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
85addToSources pos t = do
86  curpos <- P.getPosition
87  Sources xs <- P.getInput
88  let xs' = case xs of
89               [] -> []
90               ((_,t'):rest) -> (curpos,t'):rest
91  P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
92
93ensureFinalNewlines :: Int -- ^ number of trailing newlines
94                    -> Sources
95                    -> Sources
96ensureFinalNewlines n (Sources xs) =
97  case NonEmpty.nonEmpty xs of
98    Nothing -> Sources [(initialPos "", T.replicate n "\n")]
99    Just lst ->
100      case NonEmpty.last lst of
101        (spos, t) ->
102          case T.length (T.takeWhileEnd (=='\n') t) of
103            len | len >= n -> Sources xs
104                | otherwise -> Sources (NonEmpty.init lst ++
105                                        [(spos,
106                                          t <> T.replicate (n - len) "\n")])
107
108class UpdateSourcePos s c where
109  updateSourcePos :: SourcePos -> c -> s -> SourcePos
110
111instance UpdateSourcePos Text Char where
112   updateSourcePos pos c _ = updatePosChar pos c
113
114instance UpdateSourcePos Sources Char where
115   updateSourcePos pos c sources =
116     case sources of
117       Sources [] -> updatePosChar pos c
118       Sources ((_,t):(pos',_):_)
119         | T.null t  -> pos'
120       Sources _ ->
121           case c of
122             '\n' -> incSourceLine (setSourceColumn pos 1) 1
123             '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
124             _    -> incSourceColumn pos 1
125
126-- | Get name of first source in 'Sources'.
127initialSourceName :: Sources -> FilePath
128initialSourceName (Sources []) = ""
129initialSourceName (Sources ((pos,_):_)) = sourceName pos
130
131-- | Add some text to the beginning of the input sources.
132-- This simplifies code that expands macros.
133addToInput :: Monad m => Text -> ParsecT Sources u m ()
134addToInput t = do
135  Sources xs <- P.getInput
136  case xs of
137    [] -> P.setInput $ Sources [(initialPos "",t)]
138    (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
139
140-- We need to redefine the parsers in Text.Parsec.Char so that they
141-- update source positions properly from the Sources stream.
142
143satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
144         => (Char -> Bool) -> ParsecT s u m Char
145satisfy f = P.tokenPrim show updateSourcePos matcher
146 where
147  matcher c = if f c then Just c else Nothing
148
149oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
150      => [Char] -> ParsecT s u m Char
151oneOf cs = satisfy (`elem` cs)
152
153noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
154       => [Char] -> ParsecT s u m Char
155noneOf cs = satisfy (`notElem` cs)
156
157anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
158        => ParsecT s u m Char
159anyChar = satisfy (const True)
160
161char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
162     => Char -> ParsecT s u m Char
163char c = satisfy (== c)
164
165string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
166       => [Char] -> ParsecT s u m [Char]
167string = mapM char
168
169newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
170        => ParsecT s u m Char
171newline = satisfy (== '\n')
172
173space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
174      => ParsecT s u m Char
175space = satisfy isSpace
176
177spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
178       => ParsecT s u m ()
179spaces = P.skipMany space P.<?> "white space"
180
181letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
182       => ParsecT s u m Char
183letter = satisfy isLetter
184
185alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
186         => ParsecT s u m Char
187alphaNum = satisfy isAlphaNum
188
189digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
190      => ParsecT s u m Char
191digit = satisfy isDigit
192
193hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
194         => ParsecT s u m Char
195hexDigit = satisfy isHexDigit
196