1-- |
2-- Module      : Foundation.Parser
3-- License     : BSD-style
4-- Maintainer  : Haskell Foundation
5-- Stability   : experimental
6-- Portability : portable
7--
8-- The current implementation is mainly, if not copy/pasted, inspired from
9-- `memory`'s Parser.
10--
11-- Foundation Parser makes use of the Foundation's @Collection@ and
12-- @Sequential@ classes to allow you to define generic parsers over any
13-- @Sequential@ of inpu.
14--
15-- This way you can easily implements parsers over @LString@, @String@.
16--
17--
18-- > flip parseOnly "my.email@address.com" $ do
19-- >    EmailAddress
20-- >      <$> (takeWhile ((/=) '@' <*  element '@')
21-- >      <*> takeAll
22--
23
24{-# LANGUAGE Rank2Types #-}
25{-# LANGUAGE FlexibleContexts #-}
26{-# LANGUAGE FlexibleInstances #-}
27
28module Foundation.Parser
29    ( Parser
30    , parse
31    , parseFeed
32    , parseOnly
33    , -- * Result
34      Result(..)
35    , ParseError(..)
36    , reportError
37
38    , -- * Parser source
39      ParserSource(..)
40
41    , -- * combinator
42      peek
43    , element
44    , anyElement
45    , elements
46    , string
47
48    , satisfy
49    , satisfy_
50    , take
51    , takeWhile
52    , takeAll
53
54    , skip
55    , skipWhile
56    , skipAll
57
58    , (<|>)
59    , many
60    , some
61    , optional
62    , repeat, Condition(..), And(..)
63    ) where
64
65import           Control.Applicative (Alternative, empty, (<|>), many, some, optional)
66import           Control.Monad (MonadPlus, mzero, mplus)
67
68import           Basement.Compat.Base
69import           Basement.Types.OffsetSize
70import           Foundation.Numerical
71import           Foundation.Collection hiding (take, takeWhile)
72import qualified Foundation.Collection as C
73import           Foundation.String
74
75-- Error handling -------------------------------------------------------------
76
77-- | common parser error definition
78data ParseError input
79    = NotEnough (CountOf (Element input))
80        -- ^ meaning the parser was short of @CountOf@ @Element@ of `input`.
81    | NotEnoughParseOnly
82        -- ^ The parser needed more data, only when using @parseOnly@
83    | ExpectedElement (Element input) (Element input)
84        -- ^ when using @element@
85    | Expected (Chunk input) (Chunk input)
86        -- ^ when using @elements@ or @string@
87    | Satisfy (Maybe String)
88        -- ^ the @satisfy@ or @satisfy_@ function failed,
89  deriving (Typeable)
90instance (Typeable input, Show input) => Exception (ParseError input)
91
92instance Show input => Show (ParseError input) where
93    show (NotEnough (CountOf sz)) = "NotEnough: missing " <> show sz <> " element(s)"
94    show NotEnoughParseOnly    = "NotEnough, parse only"
95    show (ExpectedElement _ _) = "Expected _ but received _"
96    show (Expected _ _)        = "Expected _ but received _"
97    show (Satisfy Nothing)     = "Satisfy"
98    show (Satisfy (Just s))    = "Satisfy: " <> toList s
99
100instance {-# OVERLAPPING #-} Show (ParseError String) where
101    show (NotEnough (CountOf sz)) = "NotEnough: missing " <> show sz <> " element(s)"
102    show NotEnoughParseOnly    = "NotEnough, parse only"
103    show (ExpectedElement a b) = "Expected "<>show a<>" but received " <> show b
104    show (Expected a b)        = "Expected "<>show a<>" but received " <> show b
105    show (Satisfy Nothing)     = "Satisfy"
106    show (Satisfy (Just s))    = "Satisfy: " <> toList s
107
108-- Results --------------------------------------------------------------------
109
110-- | result of executing the `parser` over the given `input`
111data Result input result
112    = ParseFailed (ParseError input)
113        -- ^ the parser failed with the given @ParserError@
114    | ParseOk     (Chunk input) result
115        -- ^ the parser complete successfuly with the remaining @Chunk@
116    | ParseMore   (Chunk input -> Result input result)
117        -- ^ the parser needs more input, pass an empty @Chunk@ or @mempty@
118        -- to tell the parser you don't have anymore inputs.
119
120instance (Show k, Show input) => Show (Result input k) where
121    show (ParseFailed err) = "Parser failed: " <> show err
122    show (ParseOk _ k) = "Parser succeed: " <> show k
123    show (ParseMore _) = "Parser incomplete: need more"
124instance Functor (Result input) where
125    fmap f r = case r of
126        ParseFailed err -> ParseFailed err
127        ParseOk rest a  -> ParseOk rest (f a)
128        ParseMore more -> ParseMore (fmap f . more)
129
130-- Parser Source --------------------------------------------------------------
131
132class (Sequential input, IndexedCollection input) => ParserSource input where
133    type Chunk input
134
135    nullChunk :: input -> Chunk input -> Bool
136
137    appendChunk :: input -> Chunk input -> input
138
139    subChunk :: input -> Offset (Element input) -> CountOf (Element input) -> Chunk input
140
141    spanChunk :: input -> Offset (Element input) -> (Element input -> Bool) -> (Chunk input, Offset (Element input))
142
143endOfParserSource :: ParserSource input => input -> Offset (Element input) -> Bool
144endOfParserSource l off = off .==# length l
145{-# INLINE endOfParserSource #-}
146
147-- Parser ---------------------------------------------------------------------
148
149data NoMore = More | NoMore
150  deriving (Show, Eq)
151
152type Failure input         result = input -> Offset (Element input) -> NoMore -> ParseError input -> Result input result
153
154type Success input result' result = input -> Offset (Element input) -> NoMore -> result'          -> Result input result
155
156-- | Foundation's @Parser@ monad.
157--
158-- Its implementation is based on the parser in `memory`.
159newtype Parser input result = Parser
160    { runParser :: forall result'
161                 . input -> Offset (Element input) -> NoMore
162                -> Failure input        result'
163                -> Success input result result'
164                -> Result input result'
165    }
166
167instance Functor (Parser input) where
168    fmap f fa = Parser $ \buf off nm err ok ->
169        runParser fa buf off nm err $ \buf' off' nm' a -> ok buf' off' nm' (f a)
170    {-# INLINE fmap #-}
171
172instance ParserSource input => Applicative (Parser input) where
173    pure a = Parser $ \buf off nm _ ok -> ok buf off nm a
174    {-# INLINE pure #-}
175    fab <*> fa = Parser $ \buf0 off0 nm0 err ok ->
176        runParser  fab buf0 off0 nm0 err $ \buf1 off1 nm1 ab ->
177        runParser_ fa  buf1 off1 nm1 err $ \buf2 off2 nm2 -> ok buf2 off2 nm2 . ab
178    {-# INLINE (<*>) #-}
179
180instance ParserSource input => Monad (Parser input) where
181    return = pure
182    {-# INLINE return #-}
183    m >>= k       = Parser $ \buf off nm err ok ->
184        runParser  m     buf  off  nm  err $ \buf' off' nm' a ->
185        runParser_ (k a) buf' off' nm' err ok
186    {-# INLINE (>>=) #-}
187
188instance ParserSource input => MonadPlus (Parser input) where
189    mzero = error "Foundation.Parser.Internal.MonadPlus.mzero"
190    mplus f g = Parser $ \buf off nm err ok ->
191        runParser f buf off nm (\buf' _ nm' _ -> runParser g buf' off nm' err ok) ok
192    {-# INLINE mplus #-}
193instance ParserSource input => Alternative (Parser input) where
194    empty = error "Foundation.Parser.Internal.Alternative.empty"
195    (<|>) = mplus
196    {-# INLINE (<|>) #-}
197
198runParser_ :: ParserSource input
199           => Parser input result
200           -> input
201           -> Offset (Element input)
202           -> NoMore
203           -> Failure input        result'
204           -> Success input result result'
205           -> Result input         result'
206runParser_ parser buf off NoMore err ok = runParser parser buf off NoMore err ok
207runParser_ parser buf off nm     err ok
208    | endOfParserSource buf off = ParseMore $ \chunk ->
209        if nullChunk buf chunk
210            then runParser parser buf off NoMore err ok
211            else runParser parser (appendChunk buf chunk) off nm err ok
212    | otherwise = runParser parser buf                    off nm err ok
213{-# INLINE runParser_ #-}
214
215-- | Run a parser on an @initial input.
216--
217-- If the Parser need more data than available, the @feeder function
218-- is automatically called and fed to the More continuation.
219parseFeed :: (ParserSource input, Monad m)
220          => m (Chunk input)
221          -> Parser input a
222          -> input
223          -> m (Result input a)
224parseFeed feeder p initial = loop $ parse p initial
225  where loop (ParseMore k) = feeder >>= (loop . k)
226        loop r             = return r
227
228-- | Run a Parser on a ByteString and return a 'Result'
229parse :: ParserSource input
230      => Parser input a -> input -> Result input a
231parse p s = runParser p s 0 More failure success
232
233failure :: input -> Offset (Element input) -> NoMore -> ParseError input -> Result input r
234failure _ _ _ = ParseFailed
235{-# INLINE failure #-}
236
237success :: ParserSource input => input -> Offset (Element input) -> NoMore -> r -> Result input r
238success buf off _ = ParseOk rest
239  where
240    !rest = subChunk buf off (length buf `sizeSub` offsetAsSize off)
241{-# INLINE success #-}
242
243-- | parse only the given input
244--
245-- The left-over `Element input` will be ignored, if the parser call for more
246-- data it will be continuously fed with `Nothing` (up to 256 iterations).
247--
248parseOnly :: (ParserSource input, Monoid (Chunk input))
249          => Parser input a
250          -> input
251          -> Either (ParseError input) a
252parseOnly p i = case runParser p i 0 NoMore failure success of
253    ParseFailed err  -> Left err
254    ParseOk     _ r  -> Right r
255    ParseMore   _    -> Left NotEnoughParseOnly
256
257-- ------------------------------------------------------------------------- --
258--                              String Parser                                --
259-- ------------------------------------------------------------------------- --
260
261instance ParserSource String where
262    type Chunk String = String
263    nullChunk _ = null
264    {-# INLINE nullChunk #-}
265    appendChunk = mappend
266    {-# INLINE appendChunk #-}
267    subChunk c off sz = C.take sz $ C.drop (offsetAsSize off) c
268    {-# INLINE subChunk #-}
269    spanChunk buf off predicate =
270        let c      = C.drop (offsetAsSize off) buf
271            (t, _) = C.span predicate c
272          in (t, off `offsetPlusE` length t)
273    {-# INLINE spanChunk #-}
274
275instance ParserSource [a] where
276    type Chunk [a] = [a]
277    nullChunk _ = null
278    {-# INLINE nullChunk #-}
279    appendChunk = mappend
280    {-# INLINE appendChunk #-}
281    subChunk c off sz = C.take sz $ C.drop (offsetAsSize off) c
282    {-# INLINE subChunk #-}
283    spanChunk buf off predicate =
284        let c      = C.drop (offsetAsSize off) buf
285            (t, _) = C.span predicate c
286          in (t, off `offsetPlusE` length t)
287    {-# INLINE spanChunk #-}
288
289-- ------------------------------------------------------------------------- --
290--                          Helpers                                          --
291-- ------------------------------------------------------------------------- --
292
293-- | helper function to report error when writing parsers
294--
295-- This way we can provide more detailed error when building custom
296-- parsers and still avoid to use the naughty _fail_.
297--
298-- @
299-- myParser :: Parser input Int
300-- myParser = reportError $ Satisfy (Just "this function is not implemented...")
301-- @
302--
303reportError :: ParseError input -> Parser input a
304reportError pe = Parser $ \buf off nm err _ -> err buf off nm pe
305
306-- | Get the next `Element input` from the parser
307anyElement :: ParserSource input => Parser input (Element input)
308anyElement = Parser $ \buf off nm err ok ->
309    case buf ! off of
310        Nothing -> err buf off        nm $ NotEnough 1
311        Just x  -> ok  buf (succ off) nm x
312{-# INLINE anyElement #-}
313
314-- | peek the first element from the input source without consuming it
315--
316-- Returns 'Nothing' if there is no more input to parse.
317--
318peek :: ParserSource input => Parser input (Maybe (Element input))
319peek = Parser $ \buf off nm err ok ->
320    case buf ! off of
321        Nothing -> runParser_ peekOnly buf off nm err ok
322        Just x  -> ok buf off nm (Just x)
323  where
324    peekOnly = Parser $ \buf off nm _ ok ->
325        ok buf off nm (buf ! off)
326
327element :: ( ParserSource input
328           , Eq (Element input)
329           , Element input ~ Element (Chunk input)
330           )
331        => Element input
332        -> Parser input ()
333element expectedElement = Parser $ \buf off nm err ok ->
334    case buf ! off of
335        Nothing -> err buf off nm $ NotEnough 1
336        Just x | expectedElement == x -> ok  buf (succ off) nm ()
337               | otherwise            -> err buf off nm $ ExpectedElement expectedElement x
338{-# INLINE element #-}
339
340elements :: ( ParserSource input, Sequential (Chunk input)
341            , Element (Chunk input) ~ Element input
342            , Eq (Chunk input)
343            )
344         => Chunk input -> Parser input ()
345elements = consumeEq
346  where
347    consumeEq :: ( ParserSource input
348                 , Sequential (Chunk input)
349                 , Element (Chunk input) ~ Element input
350                 , Eq (Chunk input)
351                 )
352              => Chunk input -> Parser input ()
353    consumeEq expected = Parser $ \buf off nm err ok ->
354      if endOfParserSource buf off
355        then
356          err buf off nm $ NotEnough lenE
357        else
358          let !lenI = sizeAsOffset (length buf) - off
359           in if lenI >= lenE
360             then
361              let a = subChunk buf off lenE
362               in if a == expected
363                    then ok buf (off + sizeAsOffset lenE) nm ()
364                    else err buf off nm $ Expected expected a
365             else
366              let a = subChunk buf off lenI
367                  (e', r) = splitAt lenI expected
368               in if a == e'
369                    then runParser_ (consumeEq r) buf (off + sizeAsOffset lenI) nm err ok
370                    else err buf off nm $ Expected e' a
371      where
372        !lenE = length expected
373    {-# NOINLINE consumeEq #-}
374{-# INLINE elements #-}
375
376-- | take one element if satisfy the given predicate
377satisfy :: ParserSource input => Maybe String -> (Element input -> Bool) -> Parser input (Element input)
378satisfy desc predicate = Parser $ \buf off nm err ok ->
379    case buf ! off of
380        Nothing -> err buf off nm $ NotEnough 1
381        Just x | predicate x -> ok  buf (succ off) nm x
382               | otherwise   -> err buf off nm $ Satisfy desc
383{-# INLINE satisfy #-}
384
385-- | take one element if satisfy the given predicate
386satisfy_ :: ParserSource input => (Element input -> Bool) -> Parser input (Element input)
387satisfy_ = satisfy Nothing
388{-# INLINE satisfy_ #-}
389
390take :: ( ParserSource input
391        , Sequential (Chunk input)
392        , Element input ~ Element (Chunk input)
393        )
394     => CountOf (Element (Chunk input))
395     -> Parser input (Chunk input)
396take n = Parser $ \buf off nm err ok ->
397    let lenI = sizeAsOffset (length buf) - off
398     in if endOfParserSource buf off && n > 0
399       then err buf off nm $ NotEnough n
400       else case n - lenI of
401              Just s | s > 0 -> let h = subChunk buf off lenI
402                                 in runParser_ (take s) buf (sizeAsOffset lenI) nm err $
403                                      \buf' off' nm' t -> ok buf' off' nm' (h <> t)
404              _              -> ok buf (off + sizeAsOffset n) nm (subChunk buf off n)
405
406takeWhile :: ( ParserSource input, Sequential (Chunk input)
407             )
408          => (Element input -> Bool)
409          -> Parser input (Chunk input)
410takeWhile predicate = Parser $ \buf off nm err ok ->
411    if endOfParserSource buf off
412      then ok buf off nm mempty
413      else let (b1, off') = spanChunk buf off predicate
414            in if endOfParserSource buf off'
415                  then runParser_ (takeWhile predicate) buf off' nm err
416                          $ \buf' off'' nm' b1T -> ok buf' off'' nm' (b1 <> b1T)
417                  else ok buf off' nm b1
418
419-- | Take the remaining elements from the current position in the stream
420takeAll :: (ParserSource input, Sequential (Chunk input)) => Parser input (Chunk input)
421takeAll = getAll >> returnBuffer
422  where
423    returnBuffer :: ParserSource input => Parser input (Chunk input)
424    returnBuffer = Parser $ \buf off nm _ ok ->
425        let !lenI = length buf
426            !off' = sizeAsOffset lenI
427            !sz   = off' - off
428         in ok buf off' nm (subChunk buf off sz)
429    {-# INLINE returnBuffer #-}
430
431    getAll :: (ParserSource input, Sequential (Chunk input)) => Parser input ()
432    getAll = Parser $ \buf off nm err ok ->
433      case nm of
434        NoMore -> ok buf off nm ()
435        More   -> ParseMore $ \nextChunk ->
436          if nullChunk buf nextChunk
437            then ok buf off NoMore ()
438            else runParser getAll (appendChunk buf nextChunk) off nm err ok
439    {-# NOINLINE getAll #-}
440{-# INLINE takeAll #-}
441
442skip :: ParserSource input => CountOf (Element input) -> Parser input ()
443skip n = Parser $ \buf off nm err ok ->
444    let lenI = sizeAsOffset (length buf) - off
445     in if endOfParserSource buf off && n > 0
446       then err buf off nm $ NotEnough n
447       else case n - lenI of
448              Just s | s > 0 -> runParser_ (skip s) buf (sizeAsOffset lenI) nm err ok
449              _              -> ok buf (off + sizeAsOffset n) nm ()
450
451skipWhile :: ( ParserSource input, Sequential (Chunk input)
452             )
453          => (Element input -> Bool)
454          -> Parser input ()
455skipWhile predicate = Parser $ \buf off nm err ok ->
456    if endOfParserSource buf off
457      then ok buf off nm ()
458      else let (_, off') = spanChunk buf off predicate
459            in if endOfParserSource buf off'
460                  then runParser_ (skipWhile predicate) buf off' nm err ok
461                  else ok buf off' nm ()
462
463-- | consume every chunk of the stream
464--
465skipAll :: (ParserSource input, Collection (Chunk input)) => Parser input ()
466skipAll = flushAll
467  where
468    flushAll :: (ParserSource input, Collection (Chunk input)) => Parser input ()
469    flushAll = Parser $ \buf off nm err ok ->
470        let !off' = sizeAsOffset $ length buf in
471        case nm of
472            NoMore -> ok buf off' NoMore ()
473            More   -> ParseMore $ \nextChunk ->
474              if null nextChunk
475                then ok buf off' NoMore ()
476                else runParser flushAll buf off nm err ok
477    {-# NOINLINE flushAll #-}
478{-# INLINE skipAll #-}
479
480string :: String -> Parser String ()
481string = elements
482{-# INLINE string #-}
483
484data Condition = Between !And | Exactly !Word
485  deriving (Show, Eq, Typeable)
486data And = And !Word !Word
487  deriving (Eq, Typeable)
488instance Show And where
489    show (And a b) = show a <> " and " <> show b
490
491-- | repeat the given parser a given amount of time
492--
493-- Unlike @some@ or @many@, this operation will bring more precision on how
494-- many times you wish a parser to be sequenced.
495--
496-- ## Repeat @Exactly@ a number of time
497--
498-- > repeat (Exactly 6) (takeWhile ((/=) ',') <* element ',')
499--
500-- ## Repeat @Between@ lower `@And@` upper times
501--
502-- > repeat (Between $ 1 `And` 10) (takeWhile ((/=) ',') <* element ',')
503--
504repeat :: ParserSource input
505       => Condition -> Parser input a -> Parser input [a]
506repeat (Exactly n) = repeatE n
507repeat (Between a) = repeatA a
508
509repeatE :: (ParserSource input)
510        => Word -> Parser input a -> Parser input [a]
511repeatE 0 _ = return []
512repeatE n p = (:) <$> p <*> repeatE (n-1) p
513
514repeatA :: (ParserSource input)
515        => And -> Parser input a -> Parser input [a]
516repeatA (And 0 0) _ = return []
517repeatA (And 0 n) p = ((:) <$> p <*> repeatA (And 0     (n-1)) p) <|> return []
518repeatA (And l u) p =  (:) <$> p <*> repeatA (And (l-1) (u-1)) p
519