1{-# LANGUAGE OverloadedStrings #-} 2{-# LANGUAGE BangPatterns #-} 3module Commonmark.TokParsers 4 ( satisfyTok 5 , satisfyWord 6 , anyTok 7 , anySymbol 8 , symbol 9 , whitespace 10 , lineEnd 11 , spaceTok 12 , oneOfToks 13 , noneOfToks 14 , gobbleSpaces 15 , gobbleUpToSpaces 16 , withRaw 17 , hasType 18 , textIs 19 , blankLine 20 , restOfLine 21 , isOneOfCI 22 , nonindentSpaces 23 , skipManyTill 24 , skipWhile 25 ) 26 where 27import Control.Monad (mzero, void) 28import Data.Text (Text) 29import qualified Data.Text as T 30import Text.Parsec 31import Text.Parsec.Pos (updatePosString) 32import Commonmark.Tokens 33 34-- | Parses a single 'Tok' satisfying a predicate. 35satisfyTok :: Monad m => (Tok -> Bool) -> ParsecT [Tok] s m Tok 36satisfyTok f = tokenPrim (T.unpack . tokContents) updatePos matcher 37 where matcher t | f t = Just t 38 | otherwise = Nothing 39 updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos 40 updatePos _spos _ (Tok _ !pos _ : _) = pos 41 updatePos !spos (Tok _ _pos !t) [] = 42 updatePosString spos (T.unpack t) 43{-# INLINE satisfyTok #-} 44 45-- | Parses any 'Tok'. 46anyTok :: Monad m => ParsecT [Tok] s m Tok 47anyTok = satisfyTok (const True) 48{-# INLINE anyTok #-} 49 50-- | Parses any 'Symbol' 'Tok'. 51anySymbol :: Monad m => ParsecT [Tok] s m Tok 52anySymbol = satisfyTok (\t -> case tokType t of 53 Symbol _ -> True 54 _ -> False) 55{-# INLINE anySymbol #-} 56 57-- | Parses a 'Symbol' with character @c@. 58symbol :: Monad m => Char -> ParsecT [Tok] s m Tok 59symbol c = satisfyTok (hasType (Symbol c)) 60{-# INLINE symbol #-} 61 62-- | Parses a 'Tok' with one of the listed types. 63oneOfToks :: Monad m => [TokType] -> ParsecT [Tok] s m Tok 64oneOfToks toktypes = satisfyTok (hasTypeIn toktypes) 65{-# INLINE oneOfToks #-} 66 67-- | Parses a 'Tok' with none of the listed types. 68noneOfToks :: Monad m => [TokType] -> ParsecT [Tok] s m Tok 69noneOfToks toktypes = satisfyTok (not . hasTypeIn toktypes) 70{-# INLINE noneOfToks #-} 71 72-- | Parses one or more whitespace 'Tok's. 73whitespace :: Monad m => ParsecT [Tok] s m [Tok] 74whitespace = many1 $ satisfyTok (\t -> case tokType t of 75 Spaces -> True 76 LineEnd -> True 77 _ -> False) 78{-# INLINE whitespace #-} 79 80-- | Parses a 'LineEnd' token. 81lineEnd :: Monad m => ParsecT [Tok] s m Tok 82lineEnd = satisfyTok (hasType LineEnd) 83{-# INLINE lineEnd #-} 84 85-- | Parses a 'Spaces' token. 86spaceTok :: Monad m => ParsecT [Tok] s m Tok 87spaceTok = satisfyTok (hasType Spaces) 88{-# INLINE spaceTok #-} 89 90-- | Parses a 'WordChars' token matching a predicate. 91satisfyWord :: Monad m => (Text -> Bool) -> ParsecT [Tok] s m Tok 92satisfyWord f = satisfyTok (\t -> hasType WordChars t && textIs f t) 93{-# INLINE satisfyWord #-} 94 95-- | Parses exactly @n@ spaces. If tabs are encountered, 96-- they are split into spaces before being consumed; so 97-- a tab may be partially consumed by this parser. 98gobbleSpaces :: Monad m => Int -> ParsecT [Tok] u m Int 99gobbleSpaces 0 = return 0 100gobbleSpaces n = try $ gobble' True n 101{-# INLINE gobbleSpaces #-} 102 103-- | Parses up to @n@ spaces. 104gobbleUpToSpaces :: Monad m => Int -> ParsecT [Tok] u m Int 105gobbleUpToSpaces 0 = return 0 106gobbleUpToSpaces n = gobble' False n 107{-# INLINE gobbleUpToSpaces #-} 108 109gobble' :: Monad m => Bool -> Int -> ParsecT [Tok] u m Int 110gobble' requireAll numspaces 111 | numspaces >= 1 = (do 112 Tok Spaces pos _ <- satisfyTok (hasType Spaces) 113 pos' <- getPosition 114 case sourceColumn pos' - sourceColumn pos of 115 n | n < numspaces -> (+ n) <$> gobble' requireAll (numspaces - n) 116 | n == numspaces -> return $! n 117 | otherwise -> do 118 let newpos = incSourceColumn pos numspaces 119 let newtok = Tok Spaces newpos 120 (T.replicate (n - numspaces) " ") 121 getInput >>= setInput . (newtok:) 122 setPosition $ newpos 123 return $! numspaces) 124 <|> if requireAll 125 then mzero 126 else return 0 127 | otherwise = return 0 128{-# INLINE gobble' #-} 129 130-- | Applies a parser and returns its value (if successful) 131-- plus a list of the raw tokens parsed. 132withRaw :: Monad m => ParsecT [Tok] s m a -> ParsecT [Tok] s m (a, [Tok]) 133withRaw parser = do 134 toks <- getInput 135 res <- parser 136 newpos <- getPosition 137 let getrawtoks (t:ts) 138 | tokPos t < newpos = t : getrawtoks ts 139 getrawtoks _ = [] 140 let rawtoks = getrawtoks toks 141 return (res, rawtoks) 142{-# INLINE withRaw #-} 143 144-- | Filters tokens of a certain type. 145hasType :: TokType -> Tok -> Bool 146hasType ty (Tok ty' _ _) = ty == ty' 147{-# INLINE hasType #-} 148 149hasTypeIn :: [TokType] -> Tok -> Bool 150hasTypeIn tys (Tok ty' _ _) = ty' `elem` tys 151 152-- | Filters tokens with certain contents. 153textIs :: (Text -> Bool) -> Tok -> Bool 154textIs f (Tok _ _ t) = f t 155{-# INLINE textIs #-} 156 157-- | Gobble up to 3 spaces (may be part of a tab). 158nonindentSpaces :: Monad m => ParsecT [Tok] u m () 159nonindentSpaces = void $ gobbleUpToSpaces 3 160{-# INLINE nonindentSpaces #-} 161 162-- | Case-insensitive membership in a list of 'Text's. 163isOneOfCI :: [Text] -> Text -> Bool 164isOneOfCI ts t = T.toLower t `elem` ts 165{-# INLINE isOneOfCI #-} 166 167-- | Apply @p@ many times until @stop@ succeeds, discarding results. 168skipManyTill :: ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m () 169skipManyTill p stop = scan 170 where scan = (() <$ stop) <|> (p >> scan) 171{-# INLINE skipManyTill #-} 172 173-- | Efficiently skip 'Tok's satisfying a certain condition. 174skipWhile :: Monad m => (Tok -> Bool) -> ParsecT [Tok] u m () 175skipWhile f = skipMany (satisfyTok f) 176{-# INLINE skipWhile #-} 177 178-- | Parse optional spaces and an endline. 179blankLine :: Monad m => ParsecT [Tok] s m () 180blankLine = try $ do 181 skipWhile (hasType Spaces) 182 void lineEnd 183{-# INLINE blankLine #-} 184 185-- | Efficiently parse the remaining tokens on a line, 186-- including the LineEnd (if any). 187restOfLine :: Monad m => ParsecT [Tok] s m [Tok] 188restOfLine = go 189 where 190 go = option [] $ do 191 !tok <- anyTok 192 case tokType tok of 193 LineEnd -> return [tok] 194 _ -> (tok:) <$> go 195{-# INLINE restOfLine #-} 196