1{-# LANGUAGE BangPatterns       #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE FlexibleContexts   #-}
4{-# LANGUAGE RankNTypes         #-}
5
6-- |
7-- Copyright: 2011 Michael Snoyman, 2010 John Millikin
8-- License: MIT
9--
10-- Consume attoparsec parsers via conduit.
11--
12-- This code was taken from attoparsec-enumerator and adapted for conduits.
13module Data.Conduit.Attoparsec
14    ( -- * Sink
15      sinkParser
16    , sinkParserEither
17      -- * Conduit
18    , conduitParser
19    , conduitParserEither
20
21      -- * Types
22    , ParseError (..)
23    , Position (..)
24    , PositionRange (..)
25      -- * Classes
26    , AttoparsecInput
27    ) where
28
29import           Control.Exception          (Exception)
30import           Control.Monad              (unless)
31import qualified Data.ByteString            as B
32import qualified Data.Text                  as T
33import qualified Data.Text.Internal         as TI
34import           Data.Typeable              (Typeable)
35import           Prelude                    hiding (lines)
36
37import qualified Data.Attoparsec.ByteString
38import qualified Data.Attoparsec.Text
39import qualified Data.Attoparsec.Types      as A
40import           Data.Conduit
41import Control.Monad.Trans.Resource (MonadThrow, throwM)
42
43-- | The context and message from a 'A.Fail' value.
44data ParseError = ParseError
45    { errorContexts :: [String]
46    , errorMessage  :: String
47    , errorPosition :: Position
48    } | DivergentParser
49    deriving (Show, Typeable)
50
51instance Exception ParseError
52
53data Position = Position
54    { posLine :: {-# UNPACK #-} !Int
55    , posCol  :: {-# UNPACK #-} !Int
56    , posOffset :: {-# UNPACK #-} !Int
57    -- ^ @since 1.2.0
58    }
59    deriving (Eq, Ord)
60
61instance Show Position where
62    show (Position l c off) = show l ++ ':' : show c ++ " (" ++ show off ++ ")"
63
64data PositionRange = PositionRange
65    { posRangeStart :: {-# UNPACK #-} !Position
66    , posRangeEnd   :: {-# UNPACK #-} !Position
67    }
68    deriving (Eq, Ord)
69
70instance Show PositionRange where
71    show (PositionRange s e) = show s ++ '-' : show e
72
73-- | A class of types which may be consumed by an Attoparsec parser.
74class AttoparsecInput a where
75    parseA :: A.Parser a b -> a -> A.IResult a b
76    feedA :: A.IResult a b -> a -> A.IResult a b
77    empty :: a
78    isNull :: a -> Bool
79    notEmpty :: [a] -> [a]
80    getLinesCols :: a -> Position
81
82    -- | Return the beginning of the first input with the length of
83    -- the second input removed. Assumes the second string is shorter
84    -- than the first.
85    stripFromEnd :: a -> a -> a
86
87instance AttoparsecInput B.ByteString where
88    parseA = Data.Attoparsec.ByteString.parse
89    feedA = Data.Attoparsec.ByteString.feed
90    empty = B.empty
91    isNull = B.null
92    notEmpty = filter (not . B.null)
93    getLinesCols = B.foldl' f (Position 0 0 0)
94      where
95        f (Position l c o) ch
96          | ch == 10 = Position (l + 1) 0 (o + 1)
97          | otherwise = Position l (c + 1) (o + 1)
98    stripFromEnd b1 b2 = B.take (B.length b1 - B.length b2) b1
99
100instance AttoparsecInput T.Text where
101    parseA = Data.Attoparsec.Text.parse
102    feedA = Data.Attoparsec.Text.feed
103    empty = T.empty
104    isNull = T.null
105    notEmpty = filter (not . T.null)
106    getLinesCols = T.foldl' f (Position 0 0 0)
107      where
108        f (Position l c o) ch
109          | ch == '\n' = Position (l + 1) 0 (o + 1)
110          | otherwise = Position l (c + 1) (o + 1)
111    stripFromEnd (TI.Text arr1 off1 len1) (TI.Text _ _ len2) =
112        TI.text arr1 off1 (len1 - len2)
113
114-- | Convert an Attoparsec 'A.Parser' into a 'Sink'. The parser will
115-- be streamed bytes until it returns 'A.Done' or 'A.Fail'.
116--
117-- If parsing fails, a 'ParseError' will be thrown with 'throwM'.
118--
119-- Since 0.5.0
120sinkParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> Consumer a m b
121sinkParser = fmap snd . sinkParserPosErr (Position 1 1 0)
122
123-- | Same as 'sinkParser', but we return an 'Either' type instead
124-- of raising an exception.
125--
126-- Since 1.1.5
127sinkParserEither :: (AttoparsecInput a, Monad m) => A.Parser a b -> Consumer a m (Either ParseError b)
128sinkParserEither = (fmap.fmap) snd . sinkParserPos (Position 1 1 0)
129
130
131-- | Consume a stream of parsed tokens, returning both the token and
132-- the position it appears at. This function will raise a 'ParseError'
133-- on bad input.
134--
135-- Since 0.5.0
136conduitParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> Conduit a m (PositionRange, b)
137conduitParser parser =
138    conduit $ Position 1 1 0
139       where
140         conduit !pos = await >>= maybe (return ()) go
141             where
142               go x = do
143                   leftover x
144                   (!pos', !res) <- sinkParserPosErr pos parser
145                   yield (PositionRange pos pos', res)
146                   conduit pos'
147{-# SPECIALIZE conduitParser
148                   :: MonadThrow m
149                   => A.Parser T.Text b
150                   -> Conduit T.Text m (PositionRange, b) #-}
151{-# SPECIALIZE conduitParser
152                   :: MonadThrow m
153                   => A.Parser B.ByteString b
154                   -> Conduit B.ByteString m (PositionRange, b) #-}
155
156
157
158-- | Same as 'conduitParser', but we return an 'Either' type instead
159-- of raising an exception.
160conduitParserEither
161    :: (Monad m, AttoparsecInput a)
162    => A.Parser a b
163    -> Conduit a m (Either ParseError (PositionRange, b))
164conduitParserEither parser =
165    conduit $ Position 1 1 0
166  where
167    conduit !pos = await >>= maybe (return ()) go
168      where
169        go x = do
170          leftover x
171          eres <- sinkParserPos pos parser
172          case eres of
173            Left e -> yield $ Left e
174            Right (!pos', !res) -> do
175              yield $! Right (PositionRange pos pos', res)
176              conduit pos'
177{-# SPECIALIZE conduitParserEither
178                   :: Monad m
179                   => A.Parser T.Text b
180                   -> Conduit T.Text m (Either ParseError (PositionRange, b)) #-}
181{-# SPECIALIZE conduitParserEither
182                   :: Monad m
183                   => A.Parser B.ByteString b
184                   -> Conduit B.ByteString m (Either ParseError (PositionRange, b)) #-}
185
186
187
188
189sinkParserPosErr
190    :: (AttoparsecInput a, MonadThrow m)
191    => Position
192    -> A.Parser a b
193    -> Consumer a m (Position, b)
194sinkParserPosErr pos0 p = sinkParserPos pos0 p >>= f
195    where
196      f (Left e) = throwM e
197      f (Right a) = return a
198{-# INLINE sinkParserPosErr #-}
199
200
201sinkParserPos
202    :: (AttoparsecInput a, Monad m)
203    => Position
204    -> A.Parser a b
205    -> Consumer a m (Either ParseError (Position, b))
206sinkParserPos pos0 p = sink empty pos0 (parseA p)
207  where
208    sink prev pos parser = await >>= maybe close push
209      where
210        push c
211            | isNull c  = sink prev pos parser
212            | otherwise = go False c $ parser c
213
214        close = go True prev (feedA (parser empty) empty)
215
216        go end c (A.Done lo x) = do
217            let pos'
218                    | end       = pos
219                    | otherwise = addLinesCols prev pos
220                y = stripFromEnd c lo
221                pos'' = addLinesCols y pos'
222            unless (isNull lo) $ leftover lo
223            pos'' `seq` return $! Right (pos'', x)
224        go end c (A.Fail rest contexts msg) =
225            let x = stripFromEnd c rest
226                pos'
227                    | end       = pos
228                    | otherwise = addLinesCols prev pos
229                pos'' = addLinesCols x pos'
230             in pos'' `seq` return $! Left (ParseError contexts msg pos'')
231        go end c (A.Partial parser')
232            | end       = return $! Left DivergentParser
233            | otherwise =
234                pos' `seq` sink c pos' parser'
235              where
236                pos' = addLinesCols prev pos
237
238    addLinesCols :: AttoparsecInput a => a -> Position -> Position
239    addLinesCols x (Position lines cols off) =
240        lines' `seq` cols' `seq` off' `seq` Position lines' cols' off'
241      where
242        Position dlines dcols doff = getLinesCols x
243        lines' = lines + dlines
244        cols' = (if dlines > 0 then 1 else cols) + dcols
245        off' = off + doff
246{-# INLINE sinkParserPos #-}
247