1-- | 2-- Module : Foundation.Parser 3-- License : BSD-style 4-- Maintainer : Haskell Foundation 5-- Stability : experimental 6-- Portability : portable 7-- 8-- The current implementation is mainly, if not copy/pasted, inspired from 9-- `memory`'s Parser. 10-- 11-- Foundation Parser makes use of the Foundation's @Collection@ and 12-- @Sequential@ classes to allow you to define generic parsers over any 13-- @Sequential@ of inpu. 14-- 15-- This way you can easily implements parsers over @LString@, @String@. 16-- 17-- 18-- > flip parseOnly "my.email@address.com" $ do 19-- > EmailAddress 20-- > <$> (takeWhile ((/=) '@' <* element '@') 21-- > <*> takeAll 22-- 23 24{-# LANGUAGE Rank2Types #-} 25{-# LANGUAGE FlexibleContexts #-} 26{-# LANGUAGE FlexibleInstances #-} 27 28module Foundation.Parser 29 ( Parser 30 , parse 31 , parseFeed 32 , parseOnly 33 , -- * Result 34 Result(..) 35 , ParseError(..) 36 , reportError 37 38 , -- * Parser source 39 ParserSource(..) 40 41 , -- * combinator 42 peek 43 , element 44 , anyElement 45 , elements 46 , string 47 48 , satisfy 49 , satisfy_ 50 , take 51 , takeWhile 52 , takeAll 53 54 , skip 55 , skipWhile 56 , skipAll 57 58 , (<|>) 59 , many 60 , some 61 , optional 62 , repeat, Condition(..), And(..) 63 ) where 64 65import Control.Applicative (Alternative, empty, (<|>), many, some, optional) 66import Control.Monad (MonadPlus, mzero, mplus) 67 68import Basement.Compat.Base 69import Basement.Types.OffsetSize 70import Foundation.Numerical 71import Foundation.Collection hiding (take, takeWhile) 72import qualified Foundation.Collection as C 73import Foundation.String 74 75-- Error handling ------------------------------------------------------------- 76 77-- | common parser error definition 78data ParseError input 79 = NotEnough (CountOf (Element input)) 80 -- ^ meaning the parser was short of @CountOf@ @Element@ of `input`. 81 | NotEnoughParseOnly 82 -- ^ The parser needed more data, only when using @parseOnly@ 83 | ExpectedElement (Element input) (Element input) 84 -- ^ when using @element@ 85 | Expected (Chunk input) (Chunk input) 86 -- ^ when using @elements@ or @string@ 87 | Satisfy (Maybe String) 88 -- ^ the @satisfy@ or @satisfy_@ function failed, 89 deriving (Typeable) 90instance (Typeable input, Show input) => Exception (ParseError input) 91 92instance Show input => Show (ParseError input) where 93 show (NotEnough (CountOf sz)) = "NotEnough: missing " <> show sz <> " element(s)" 94 show NotEnoughParseOnly = "NotEnough, parse only" 95 show (ExpectedElement _ _) = "Expected _ but received _" 96 show (Expected _ _) = "Expected _ but received _" 97 show (Satisfy Nothing) = "Satisfy" 98 show (Satisfy (Just s)) = "Satisfy: " <> toList s 99 100instance {-# OVERLAPPING #-} Show (ParseError String) where 101 show (NotEnough (CountOf sz)) = "NotEnough: missing " <> show sz <> " element(s)" 102 show NotEnoughParseOnly = "NotEnough, parse only" 103 show (ExpectedElement a b) = "Expected "<>show a<>" but received " <> show b 104 show (Expected a b) = "Expected "<>show a<>" but received " <> show b 105 show (Satisfy Nothing) = "Satisfy" 106 show (Satisfy (Just s)) = "Satisfy: " <> toList s 107 108-- Results -------------------------------------------------------------------- 109 110-- | result of executing the `parser` over the given `input` 111data Result input result 112 = ParseFailed (ParseError input) 113 -- ^ the parser failed with the given @ParserError@ 114 | ParseOk (Chunk input) result 115 -- ^ the parser complete successfuly with the remaining @Chunk@ 116 | ParseMore (Chunk input -> Result input result) 117 -- ^ the parser needs more input, pass an empty @Chunk@ or @mempty@ 118 -- to tell the parser you don't have anymore inputs. 119 120instance (Show k, Show input) => Show (Result input k) where 121 show (ParseFailed err) = "Parser failed: " <> show err 122 show (ParseOk _ k) = "Parser succeed: " <> show k 123 show (ParseMore _) = "Parser incomplete: need more" 124instance Functor (Result input) where 125 fmap f r = case r of 126 ParseFailed err -> ParseFailed err 127 ParseOk rest a -> ParseOk rest (f a) 128 ParseMore more -> ParseMore (fmap f . more) 129 130-- Parser Source -------------------------------------------------------------- 131 132class (Sequential input, IndexedCollection input) => ParserSource input where 133 type Chunk input 134 135 nullChunk :: input -> Chunk input -> Bool 136 137 appendChunk :: input -> Chunk input -> input 138 139 subChunk :: input -> Offset (Element input) -> CountOf (Element input) -> Chunk input 140 141 spanChunk :: input -> Offset (Element input) -> (Element input -> Bool) -> (Chunk input, Offset (Element input)) 142 143endOfParserSource :: ParserSource input => input -> Offset (Element input) -> Bool 144endOfParserSource l off = off .==# length l 145{-# INLINE endOfParserSource #-} 146 147-- Parser --------------------------------------------------------------------- 148 149data NoMore = More | NoMore 150 deriving (Show, Eq) 151 152type Failure input result = input -> Offset (Element input) -> NoMore -> ParseError input -> Result input result 153 154type Success input result' result = input -> Offset (Element input) -> NoMore -> result' -> Result input result 155 156-- | Foundation's @Parser@ monad. 157-- 158-- Its implementation is based on the parser in `memory`. 159newtype Parser input result = Parser 160 { runParser :: forall result' 161 . input -> Offset (Element input) -> NoMore 162 -> Failure input result' 163 -> Success input result result' 164 -> Result input result' 165 } 166 167instance Functor (Parser input) where 168 fmap f fa = Parser $ \buf off nm err ok -> 169 runParser fa buf off nm err $ \buf' off' nm' a -> ok buf' off' nm' (f a) 170 {-# INLINE fmap #-} 171 172instance ParserSource input => Applicative (Parser input) where 173 pure a = Parser $ \buf off nm _ ok -> ok buf off nm a 174 {-# INLINE pure #-} 175 fab <*> fa = Parser $ \buf0 off0 nm0 err ok -> 176 runParser fab buf0 off0 nm0 err $ \buf1 off1 nm1 ab -> 177 runParser_ fa buf1 off1 nm1 err $ \buf2 off2 nm2 -> ok buf2 off2 nm2 . ab 178 {-# INLINE (<*>) #-} 179 180instance ParserSource input => Monad (Parser input) where 181 return = pure 182 {-# INLINE return #-} 183 m >>= k = Parser $ \buf off nm err ok -> 184 runParser m buf off nm err $ \buf' off' nm' a -> 185 runParser_ (k a) buf' off' nm' err ok 186 {-# INLINE (>>=) #-} 187 188instance ParserSource input => MonadPlus (Parser input) where 189 mzero = error "Foundation.Parser.Internal.MonadPlus.mzero" 190 mplus f g = Parser $ \buf off nm err ok -> 191 runParser f buf off nm (\buf' _ nm' _ -> runParser g buf' off nm' err ok) ok 192 {-# INLINE mplus #-} 193instance ParserSource input => Alternative (Parser input) where 194 empty = error "Foundation.Parser.Internal.Alternative.empty" 195 (<|>) = mplus 196 {-# INLINE (<|>) #-} 197 198runParser_ :: ParserSource input 199 => Parser input result 200 -> input 201 -> Offset (Element input) 202 -> NoMore 203 -> Failure input result' 204 -> Success input result result' 205 -> Result input result' 206runParser_ parser buf off NoMore err ok = runParser parser buf off NoMore err ok 207runParser_ parser buf off nm err ok 208 | endOfParserSource buf off = ParseMore $ \chunk -> 209 if nullChunk buf chunk 210 then runParser parser buf off NoMore err ok 211 else runParser parser (appendChunk buf chunk) off nm err ok 212 | otherwise = runParser parser buf off nm err ok 213{-# INLINE runParser_ #-} 214 215-- | Run a parser on an @initial input. 216-- 217-- If the Parser need more data than available, the @feeder function 218-- is automatically called and fed to the More continuation. 219parseFeed :: (ParserSource input, Monad m) 220 => m (Chunk input) 221 -> Parser input a 222 -> input 223 -> m (Result input a) 224parseFeed feeder p initial = loop $ parse p initial 225 where loop (ParseMore k) = feeder >>= (loop . k) 226 loop r = return r 227 228-- | Run a Parser on a ByteString and return a 'Result' 229parse :: ParserSource input 230 => Parser input a -> input -> Result input a 231parse p s = runParser p s 0 More failure success 232 233failure :: input -> Offset (Element input) -> NoMore -> ParseError input -> Result input r 234failure _ _ _ = ParseFailed 235{-# INLINE failure #-} 236 237success :: ParserSource input => input -> Offset (Element input) -> NoMore -> r -> Result input r 238success buf off _ = ParseOk rest 239 where 240 !rest = subChunk buf off (length buf `sizeSub` offsetAsSize off) 241{-# INLINE success #-} 242 243-- | parse only the given input 244-- 245-- The left-over `Element input` will be ignored, if the parser call for more 246-- data it will be continuously fed with `Nothing` (up to 256 iterations). 247-- 248parseOnly :: (ParserSource input, Monoid (Chunk input)) 249 => Parser input a 250 -> input 251 -> Either (ParseError input) a 252parseOnly p i = case runParser p i 0 NoMore failure success of 253 ParseFailed err -> Left err 254 ParseOk _ r -> Right r 255 ParseMore _ -> Left NotEnoughParseOnly 256 257-- ------------------------------------------------------------------------- -- 258-- String Parser -- 259-- ------------------------------------------------------------------------- -- 260 261instance ParserSource String where 262 type Chunk String = String 263 nullChunk _ = null 264 {-# INLINE nullChunk #-} 265 appendChunk = mappend 266 {-# INLINE appendChunk #-} 267 subChunk c off sz = C.take sz $ C.drop (offsetAsSize off) c 268 {-# INLINE subChunk #-} 269 spanChunk buf off predicate = 270 let c = C.drop (offsetAsSize off) buf 271 (t, _) = C.span predicate c 272 in (t, off `offsetPlusE` length t) 273 {-# INLINE spanChunk #-} 274 275instance ParserSource [a] where 276 type Chunk [a] = [a] 277 nullChunk _ = null 278 {-# INLINE nullChunk #-} 279 appendChunk = mappend 280 {-# INLINE appendChunk #-} 281 subChunk c off sz = C.take sz $ C.drop (offsetAsSize off) c 282 {-# INLINE subChunk #-} 283 spanChunk buf off predicate = 284 let c = C.drop (offsetAsSize off) buf 285 (t, _) = C.span predicate c 286 in (t, off `offsetPlusE` length t) 287 {-# INLINE spanChunk #-} 288 289-- ------------------------------------------------------------------------- -- 290-- Helpers -- 291-- ------------------------------------------------------------------------- -- 292 293-- | helper function to report error when writing parsers 294-- 295-- This way we can provide more detailed error when building custom 296-- parsers and still avoid to use the naughty _fail_. 297-- 298-- @ 299-- myParser :: Parser input Int 300-- myParser = reportError $ Satisfy (Just "this function is not implemented...") 301-- @ 302-- 303reportError :: ParseError input -> Parser input a 304reportError pe = Parser $ \buf off nm err _ -> err buf off nm pe 305 306-- | Get the next `Element input` from the parser 307anyElement :: ParserSource input => Parser input (Element input) 308anyElement = Parser $ \buf off nm err ok -> 309 case buf ! off of 310 Nothing -> err buf off nm $ NotEnough 1 311 Just x -> ok buf (succ off) nm x 312{-# INLINE anyElement #-} 313 314-- | peek the first element from the input source without consuming it 315-- 316-- Returns 'Nothing' if there is no more input to parse. 317-- 318peek :: ParserSource input => Parser input (Maybe (Element input)) 319peek = Parser $ \buf off nm err ok -> 320 case buf ! off of 321 Nothing -> runParser_ peekOnly buf off nm err ok 322 Just x -> ok buf off nm (Just x) 323 where 324 peekOnly = Parser $ \buf off nm _ ok -> 325 ok buf off nm (buf ! off) 326 327element :: ( ParserSource input 328 , Eq (Element input) 329 , Element input ~ Element (Chunk input) 330 ) 331 => Element input 332 -> Parser input () 333element expectedElement = Parser $ \buf off nm err ok -> 334 case buf ! off of 335 Nothing -> err buf off nm $ NotEnough 1 336 Just x | expectedElement == x -> ok buf (succ off) nm () 337 | otherwise -> err buf off nm $ ExpectedElement expectedElement x 338{-# INLINE element #-} 339 340elements :: ( ParserSource input, Sequential (Chunk input) 341 , Element (Chunk input) ~ Element input 342 , Eq (Chunk input) 343 ) 344 => Chunk input -> Parser input () 345elements = consumeEq 346 where 347 consumeEq :: ( ParserSource input 348 , Sequential (Chunk input) 349 , Element (Chunk input) ~ Element input 350 , Eq (Chunk input) 351 ) 352 => Chunk input -> Parser input () 353 consumeEq expected = Parser $ \buf off nm err ok -> 354 if endOfParserSource buf off 355 then 356 err buf off nm $ NotEnough lenE 357 else 358 let !lenI = sizeAsOffset (length buf) - off 359 in if lenI >= lenE 360 then 361 let a = subChunk buf off lenE 362 in if a == expected 363 then ok buf (off + sizeAsOffset lenE) nm () 364 else err buf off nm $ Expected expected a 365 else 366 let a = subChunk buf off lenI 367 (e', r) = splitAt lenI expected 368 in if a == e' 369 then runParser_ (consumeEq r) buf (off + sizeAsOffset lenI) nm err ok 370 else err buf off nm $ Expected e' a 371 where 372 !lenE = length expected 373 {-# NOINLINE consumeEq #-} 374{-# INLINE elements #-} 375 376-- | take one element if satisfy the given predicate 377satisfy :: ParserSource input => Maybe String -> (Element input -> Bool) -> Parser input (Element input) 378satisfy desc predicate = Parser $ \buf off nm err ok -> 379 case buf ! off of 380 Nothing -> err buf off nm $ NotEnough 1 381 Just x | predicate x -> ok buf (succ off) nm x 382 | otherwise -> err buf off nm $ Satisfy desc 383{-# INLINE satisfy #-} 384 385-- | take one element if satisfy the given predicate 386satisfy_ :: ParserSource input => (Element input -> Bool) -> Parser input (Element input) 387satisfy_ = satisfy Nothing 388{-# INLINE satisfy_ #-} 389 390take :: ( ParserSource input 391 , Sequential (Chunk input) 392 , Element input ~ Element (Chunk input) 393 ) 394 => CountOf (Element (Chunk input)) 395 -> Parser input (Chunk input) 396take n = Parser $ \buf off nm err ok -> 397 let lenI = sizeAsOffset (length buf) - off 398 in if endOfParserSource buf off && n > 0 399 then err buf off nm $ NotEnough n 400 else case n - lenI of 401 Just s | s > 0 -> let h = subChunk buf off lenI 402 in runParser_ (take s) buf (sizeAsOffset lenI) nm err $ 403 \buf' off' nm' t -> ok buf' off' nm' (h <> t) 404 _ -> ok buf (off + sizeAsOffset n) nm (subChunk buf off n) 405 406takeWhile :: ( ParserSource input, Sequential (Chunk input) 407 ) 408 => (Element input -> Bool) 409 -> Parser input (Chunk input) 410takeWhile predicate = Parser $ \buf off nm err ok -> 411 if endOfParserSource buf off 412 then ok buf off nm mempty 413 else let (b1, off') = spanChunk buf off predicate 414 in if endOfParserSource buf off' 415 then runParser_ (takeWhile predicate) buf off' nm err 416 $ \buf' off'' nm' b1T -> ok buf' off'' nm' (b1 <> b1T) 417 else ok buf off' nm b1 418 419-- | Take the remaining elements from the current position in the stream 420takeAll :: (ParserSource input, Sequential (Chunk input)) => Parser input (Chunk input) 421takeAll = getAll >> returnBuffer 422 where 423 returnBuffer :: ParserSource input => Parser input (Chunk input) 424 returnBuffer = Parser $ \buf off nm _ ok -> 425 let !lenI = length buf 426 !off' = sizeAsOffset lenI 427 !sz = off' - off 428 in ok buf off' nm (subChunk buf off sz) 429 {-# INLINE returnBuffer #-} 430 431 getAll :: (ParserSource input, Sequential (Chunk input)) => Parser input () 432 getAll = Parser $ \buf off nm err ok -> 433 case nm of 434 NoMore -> ok buf off nm () 435 More -> ParseMore $ \nextChunk -> 436 if nullChunk buf nextChunk 437 then ok buf off NoMore () 438 else runParser getAll (appendChunk buf nextChunk) off nm err ok 439 {-# NOINLINE getAll #-} 440{-# INLINE takeAll #-} 441 442skip :: ParserSource input => CountOf (Element input) -> Parser input () 443skip n = Parser $ \buf off nm err ok -> 444 let lenI = sizeAsOffset (length buf) - off 445 in if endOfParserSource buf off && n > 0 446 then err buf off nm $ NotEnough n 447 else case n - lenI of 448 Just s | s > 0 -> runParser_ (skip s) buf (sizeAsOffset lenI) nm err ok 449 _ -> ok buf (off + sizeAsOffset n) nm () 450 451skipWhile :: ( ParserSource input, Sequential (Chunk input) 452 ) 453 => (Element input -> Bool) 454 -> Parser input () 455skipWhile predicate = Parser $ \buf off nm err ok -> 456 if endOfParserSource buf off 457 then ok buf off nm () 458 else let (_, off') = spanChunk buf off predicate 459 in if endOfParserSource buf off' 460 then runParser_ (skipWhile predicate) buf off' nm err ok 461 else ok buf off' nm () 462 463-- | consume every chunk of the stream 464-- 465skipAll :: (ParserSource input, Collection (Chunk input)) => Parser input () 466skipAll = flushAll 467 where 468 flushAll :: (ParserSource input, Collection (Chunk input)) => Parser input () 469 flushAll = Parser $ \buf off nm err ok -> 470 let !off' = sizeAsOffset $ length buf in 471 case nm of 472 NoMore -> ok buf off' NoMore () 473 More -> ParseMore $ \nextChunk -> 474 if null nextChunk 475 then ok buf off' NoMore () 476 else runParser flushAll buf off nm err ok 477 {-# NOINLINE flushAll #-} 478{-# INLINE skipAll #-} 479 480string :: String -> Parser String () 481string = elements 482{-# INLINE string #-} 483 484data Condition = Between !And | Exactly !Word 485 deriving (Show, Eq, Typeable) 486data And = And !Word !Word 487 deriving (Eq, Typeable) 488instance Show And where 489 show (And a b) = show a <> " and " <> show b 490 491-- | repeat the given parser a given amount of time 492-- 493-- Unlike @some@ or @many@, this operation will bring more precision on how 494-- many times you wish a parser to be sequenced. 495-- 496-- ## Repeat @Exactly@ a number of time 497-- 498-- > repeat (Exactly 6) (takeWhile ((/=) ',') <* element ',') 499-- 500-- ## Repeat @Between@ lower `@And@` upper times 501-- 502-- > repeat (Between $ 1 `And` 10) (takeWhile ((/=) ',') <* element ',') 503-- 504repeat :: ParserSource input 505 => Condition -> Parser input a -> Parser input [a] 506repeat (Exactly n) = repeatE n 507repeat (Between a) = repeatA a 508 509repeatE :: (ParserSource input) 510 => Word -> Parser input a -> Parser input [a] 511repeatE 0 _ = return [] 512repeatE n p = (:) <$> p <*> repeatE (n-1) p 513 514repeatA :: (ParserSource input) 515 => And -> Parser input a -> Parser input [a] 516repeatA (And 0 0) _ = return [] 517repeatA (And 0 n) p = ((:) <$> p <*> repeatA (And 0 (n-1)) p) <|> return [] 518repeatA (And l u) p = (:) <$> p <*> repeatA (And (l-1) (u-1)) p 519