1{-# LANGUAGE BangPatterns       #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE FlexibleContexts   #-}
4{-# LANGUAGE RankNTypes         #-}
5
6-- |
7-- Copyright: 2011 Michael Snoyman, 2010 John Millikin
8-- License: MIT
9--
10-- Consume attoparsec parsers via conduit.
11--
12-- This code was taken from attoparsec-enumerator and adapted for conduits.
13module Data.Conduit.Attoparsec
14    ( -- * Sink
15      sinkParser
16    , sinkParserEither
17      -- * Conduit
18    , conduitParser
19    , conduitParserEither
20
21      -- * Types
22    , ParseError (..)
23    , Position (..)
24    , PositionRange (..)
25      -- * Classes
26    , AttoparsecInput
27    ) where
28
29import           Control.Exception          (Exception)
30import           Control.Monad              (unless)
31import qualified Data.ByteString            as B
32import qualified Data.Text                  as T
33import qualified Data.Text.Internal         as TI
34import           Data.Typeable              (Typeable)
35import           Prelude                    hiding (lines)
36
37import qualified Data.Attoparsec.ByteString
38import qualified Data.Attoparsec.Text
39import qualified Data.Attoparsec.Types      as A
40import           Data.Conduit
41import Control.Monad.Trans.Resource (MonadThrow, throwM)
42
43-- | The context and message from a 'A.Fail' value.
44data ParseError = ParseError
45    { errorContexts :: [String]
46    , errorMessage  :: String
47    , errorPosition :: Position
48    } | DivergentParser
49    deriving (Show, Typeable)
50
51instance Exception ParseError
52
53data Position = Position
54    { posLine :: {-# UNPACK #-} !Int
55    , posCol  :: {-# UNPACK #-} !Int
56    , posOffset :: {-# UNPACK #-} !Int
57    -- ^ @since 1.2.0
58    }
59    deriving (Eq, Ord)
60
61instance Show Position where
62    show (Position l c off) = show l ++ ':' : show c ++ " (" ++ show off ++ ")"
63
64data PositionRange = PositionRange
65    { posRangeStart :: {-# UNPACK #-} !Position
66    , posRangeEnd   :: {-# UNPACK #-} !Position
67    }
68    deriving (Eq, Ord)
69
70instance Show PositionRange where
71    show (PositionRange s e) = show s ++ '-' : show e
72
73-- | A class of types which may be consumed by an Attoparsec parser.
74class AttoparsecInput a where
75    parseA :: A.Parser a b -> a -> A.IResult a b
76    feedA :: A.IResult a b -> a -> A.IResult a b
77    empty :: a
78    isNull :: a -> Bool
79    getLinesCols :: a -> Position
80
81    -- | Return the beginning of the first input with the length of
82    -- the second input removed. Assumes the second string is shorter
83    -- than the first.
84    stripFromEnd :: a -> a -> a
85
86instance AttoparsecInput B.ByteString where
87    parseA = Data.Attoparsec.ByteString.parse
88    feedA = Data.Attoparsec.ByteString.feed
89    empty = B.empty
90    isNull = B.null
91    getLinesCols = B.foldl' f (Position 0 0 0)
92      where
93        f (Position l c o) ch
94          | ch == 10 = Position (l + 1) 0 (o + 1)
95          | otherwise = Position l (c + 1) (o + 1)
96    stripFromEnd b1 b2 = B.take (B.length b1 - B.length b2) b1
97
98instance AttoparsecInput T.Text where
99    parseA = Data.Attoparsec.Text.parse
100    feedA = Data.Attoparsec.Text.feed
101    empty = T.empty
102    isNull = T.null
103    getLinesCols = T.foldl' f (Position 0 0 0)
104      where
105        f (Position l c o) ch
106          | ch == '\n' = Position (l + 1) 0 (o + 1)
107          | otherwise = Position l (c + 1) (o + 1)
108    stripFromEnd (TI.Text arr1 off1 len1) (TI.Text _ _ len2) =
109        TI.text arr1 off1 (len1 - len2)
110
111-- | Convert an Attoparsec 'A.Parser' into a 'Sink'. The parser will
112-- be streamed bytes until it returns 'A.Done' or 'A.Fail'.
113--
114-- If parsing fails, a 'ParseError' will be thrown with 'throwM'.
115--
116-- Since 0.5.0
117sinkParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a o m b
118sinkParser = fmap snd . sinkParserPosErr (Position 1 1 0)
119
120-- | Same as 'sinkParser', but we return an 'Either' type instead
121-- of raising an exception.
122--
123-- Since 1.1.5
124sinkParserEither :: (AttoparsecInput a, Monad m) => A.Parser a b -> ConduitT a o m (Either ParseError b)
125sinkParserEither = (fmap.fmap) snd . sinkParserPos (Position 1 1 0)
126
127
128-- | Consume a stream of parsed tokens, returning both the token and
129-- the position it appears at. This function will raise a 'ParseError'
130-- on bad input.
131--
132-- Since 0.5.0
133conduitParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a (PositionRange, b) m ()
134conduitParser parser =
135    conduit $ Position 1 1 0
136       where
137         conduit !pos = await >>= maybe (return ()) go
138             where
139               go x = do
140                   leftover x
141                   (!pos', !res) <- sinkParserPosErr pos parser
142                   yield (PositionRange pos pos', res)
143                   conduit pos'
144{-# SPECIALIZE conduitParser
145                   :: MonadThrow m
146                   => A.Parser T.Text b
147                   -> ConduitT T.Text (PositionRange, b) m () #-}
148{-# SPECIALIZE conduitParser
149                   :: MonadThrow m
150                   => A.Parser B.ByteString b
151                   -> ConduitT B.ByteString (PositionRange, b) m () #-}
152
153
154
155-- | Same as 'conduitParser', but we return an 'Either' type instead
156-- of raising an exception.
157conduitParserEither
158    :: (Monad m, AttoparsecInput a)
159    => A.Parser a b
160    -> ConduitT a (Either ParseError (PositionRange, b)) m ()
161conduitParserEither parser =
162    conduit $ Position 1 1 0
163  where
164    conduit !pos = await >>= maybe (return ()) go
165      where
166        go x = do
167          leftover x
168          eres <- sinkParserPos pos parser
169          case eres of
170            Left e -> yield $ Left e
171            Right (!pos', !res) -> do
172              yield $! Right (PositionRange pos pos', res)
173              conduit pos'
174{-# SPECIALIZE conduitParserEither
175                   :: Monad m
176                   => A.Parser T.Text b
177                   -> ConduitT T.Text (Either ParseError (PositionRange, b)) m () #-}
178{-# SPECIALIZE conduitParserEither
179                   :: Monad m
180                   => A.Parser B.ByteString b
181                   -> ConduitT B.ByteString (Either ParseError (PositionRange, b)) m () #-}
182
183
184
185
186sinkParserPosErr
187    :: (AttoparsecInput a, MonadThrow m)
188    => Position
189    -> A.Parser a b
190    -> ConduitT a o m (Position, b)
191sinkParserPosErr pos0 p = sinkParserPos pos0 p >>= f
192    where
193      f (Left e) = throwM e
194      f (Right a) = return a
195{-# INLINE sinkParserPosErr #-}
196
197
198sinkParserPos
199    :: (AttoparsecInput a, Monad m)
200    => Position
201    -> A.Parser a b
202    -> ConduitT a o m (Either ParseError (Position, b))
203sinkParserPos pos0 p = sink empty pos0 (parseA p)
204  where
205    sink prev pos parser = await >>= maybe close push
206      where
207        push c
208            | isNull c  = sink prev pos parser
209            | otherwise = go False c $ parser c
210
211        close = go True prev (feedA (parser empty) empty)
212
213        go end c (A.Done lo x) = do
214            let pos'
215                    | end       = pos
216                    | otherwise = addLinesCols prev pos
217                y = stripFromEnd c lo
218                pos'' = addLinesCols y pos'
219            unless (isNull lo) $ leftover lo
220            pos'' `seq` return $! Right (pos'', x)
221        go end c (A.Fail rest contexts msg) =
222            let x = stripFromEnd c rest
223                pos'
224                    | end       = pos
225                    | otherwise = addLinesCols prev pos
226                pos'' = addLinesCols x pos'
227             in pos'' `seq` return $! Left (ParseError contexts msg pos'')
228        go end c (A.Partial parser')
229            | end       = return $! Left DivergentParser
230            | otherwise =
231                pos' `seq` sink c pos' parser'
232              where
233                pos' = addLinesCols prev pos
234
235    addLinesCols :: AttoparsecInput a => a -> Position -> Position
236    addLinesCols x (Position lines cols off) =
237        lines' `seq` cols' `seq` off' `seq` Position lines' cols' off'
238      where
239        Position dlines dcols doff = getLinesCols x
240        lines' = lines + dlines
241        cols' = (if dlines > 0 then 1 else cols) + dcols
242        off' = off + doff
243{-# INLINE sinkParserPos #-}
244