1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE DeriveDataTypeable #-} 3{-# LANGUAGE FlexibleContexts #-} 4{-# LANGUAGE RankNTypes #-} 5 6-- | 7-- Copyright: 2011 Michael Snoyman, 2010 John Millikin 8-- License: MIT 9-- 10-- Consume attoparsec parsers via conduit. 11-- 12-- This code was taken from attoparsec-enumerator and adapted for conduits. 13module Data.Conduit.Attoparsec 14 ( -- * Sink 15 sinkParser 16 , sinkParserEither 17 -- * Conduit 18 , conduitParser 19 , conduitParserEither 20 21 -- * Types 22 , ParseError (..) 23 , Position (..) 24 , PositionRange (..) 25 -- * Classes 26 , AttoparsecInput 27 ) where 28 29import Control.Exception (Exception) 30import Control.Monad (unless) 31import qualified Data.ByteString as B 32import qualified Data.Text as T 33import qualified Data.Text.Internal as TI 34import Data.Typeable (Typeable) 35import Prelude hiding (lines) 36 37import qualified Data.Attoparsec.ByteString 38import qualified Data.Attoparsec.Text 39import qualified Data.Attoparsec.Types as A 40import Data.Conduit 41import Control.Monad.Trans.Resource (MonadThrow, throwM) 42 43-- | The context and message from a 'A.Fail' value. 44data ParseError = ParseError 45 { errorContexts :: [String] 46 , errorMessage :: String 47 , errorPosition :: Position 48 } | DivergentParser 49 deriving (Show, Typeable) 50 51instance Exception ParseError 52 53data Position = Position 54 { posLine :: {-# UNPACK #-} !Int 55 , posCol :: {-# UNPACK #-} !Int 56 , posOffset :: {-# UNPACK #-} !Int 57 -- ^ @since 1.2.0 58 } 59 deriving (Eq, Ord) 60 61instance Show Position where 62 show (Position l c off) = show l ++ ':' : show c ++ " (" ++ show off ++ ")" 63 64data PositionRange = PositionRange 65 { posRangeStart :: {-# UNPACK #-} !Position 66 , posRangeEnd :: {-# UNPACK #-} !Position 67 } 68 deriving (Eq, Ord) 69 70instance Show PositionRange where 71 show (PositionRange s e) = show s ++ '-' : show e 72 73-- | A class of types which may be consumed by an Attoparsec parser. 74class AttoparsecInput a where 75 parseA :: A.Parser a b -> a -> A.IResult a b 76 feedA :: A.IResult a b -> a -> A.IResult a b 77 empty :: a 78 isNull :: a -> Bool 79 getLinesCols :: a -> Position 80 81 -- | Return the beginning of the first input with the length of 82 -- the second input removed. Assumes the second string is shorter 83 -- than the first. 84 stripFromEnd :: a -> a -> a 85 86instance AttoparsecInput B.ByteString where 87 parseA = Data.Attoparsec.ByteString.parse 88 feedA = Data.Attoparsec.ByteString.feed 89 empty = B.empty 90 isNull = B.null 91 getLinesCols = B.foldl' f (Position 0 0 0) 92 where 93 f (Position l c o) ch 94 | ch == 10 = Position (l + 1) 0 (o + 1) 95 | otherwise = Position l (c + 1) (o + 1) 96 stripFromEnd b1 b2 = B.take (B.length b1 - B.length b2) b1 97 98instance AttoparsecInput T.Text where 99 parseA = Data.Attoparsec.Text.parse 100 feedA = Data.Attoparsec.Text.feed 101 empty = T.empty 102 isNull = T.null 103 getLinesCols = T.foldl' f (Position 0 0 0) 104 where 105 f (Position l c o) ch 106 | ch == '\n' = Position (l + 1) 0 (o + 1) 107 | otherwise = Position l (c + 1) (o + 1) 108 stripFromEnd (TI.Text arr1 off1 len1) (TI.Text _ _ len2) = 109 TI.text arr1 off1 (len1 - len2) 110 111-- | Convert an Attoparsec 'A.Parser' into a 'Sink'. The parser will 112-- be streamed bytes until it returns 'A.Done' or 'A.Fail'. 113-- 114-- If parsing fails, a 'ParseError' will be thrown with 'throwM'. 115-- 116-- Since 0.5.0 117sinkParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a o m b 118sinkParser = fmap snd . sinkParserPosErr (Position 1 1 0) 119 120-- | Same as 'sinkParser', but we return an 'Either' type instead 121-- of raising an exception. 122-- 123-- Since 1.1.5 124sinkParserEither :: (AttoparsecInput a, Monad m) => A.Parser a b -> ConduitT a o m (Either ParseError b) 125sinkParserEither = (fmap.fmap) snd . sinkParserPos (Position 1 1 0) 126 127 128-- | Consume a stream of parsed tokens, returning both the token and 129-- the position it appears at. This function will raise a 'ParseError' 130-- on bad input. 131-- 132-- Since 0.5.0 133conduitParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a (PositionRange, b) m () 134conduitParser parser = 135 conduit $ Position 1 1 0 136 where 137 conduit !pos = await >>= maybe (return ()) go 138 where 139 go x = do 140 leftover x 141 (!pos', !res) <- sinkParserPosErr pos parser 142 yield (PositionRange pos pos', res) 143 conduit pos' 144{-# SPECIALIZE conduitParser 145 :: MonadThrow m 146 => A.Parser T.Text b 147 -> ConduitT T.Text (PositionRange, b) m () #-} 148{-# SPECIALIZE conduitParser 149 :: MonadThrow m 150 => A.Parser B.ByteString b 151 -> ConduitT B.ByteString (PositionRange, b) m () #-} 152 153 154 155-- | Same as 'conduitParser', but we return an 'Either' type instead 156-- of raising an exception. 157conduitParserEither 158 :: (Monad m, AttoparsecInput a) 159 => A.Parser a b 160 -> ConduitT a (Either ParseError (PositionRange, b)) m () 161conduitParserEither parser = 162 conduit $ Position 1 1 0 163 where 164 conduit !pos = await >>= maybe (return ()) go 165 where 166 go x = do 167 leftover x 168 eres <- sinkParserPos pos parser 169 case eres of 170 Left e -> yield $ Left e 171 Right (!pos', !res) -> do 172 yield $! Right (PositionRange pos pos', res) 173 conduit pos' 174{-# SPECIALIZE conduitParserEither 175 :: Monad m 176 => A.Parser T.Text b 177 -> ConduitT T.Text (Either ParseError (PositionRange, b)) m () #-} 178{-# SPECIALIZE conduitParserEither 179 :: Monad m 180 => A.Parser B.ByteString b 181 -> ConduitT B.ByteString (Either ParseError (PositionRange, b)) m () #-} 182 183 184 185 186sinkParserPosErr 187 :: (AttoparsecInput a, MonadThrow m) 188 => Position 189 -> A.Parser a b 190 -> ConduitT a o m (Position, b) 191sinkParserPosErr pos0 p = sinkParserPos pos0 p >>= f 192 where 193 f (Left e) = throwM e 194 f (Right a) = return a 195{-# INLINE sinkParserPosErr #-} 196 197 198sinkParserPos 199 :: (AttoparsecInput a, Monad m) 200 => Position 201 -> A.Parser a b 202 -> ConduitT a o m (Either ParseError (Position, b)) 203sinkParserPos pos0 p = sink empty pos0 (parseA p) 204 where 205 sink prev pos parser = await >>= maybe close push 206 where 207 push c 208 | isNull c = sink prev pos parser 209 | otherwise = go False c $ parser c 210 211 close = go True prev (feedA (parser empty) empty) 212 213 go end c (A.Done lo x) = do 214 let pos' 215 | end = pos 216 | otherwise = addLinesCols prev pos 217 y = stripFromEnd c lo 218 pos'' = addLinesCols y pos' 219 unless (isNull lo) $ leftover lo 220 pos'' `seq` return $! Right (pos'', x) 221 go end c (A.Fail rest contexts msg) = 222 let x = stripFromEnd c rest 223 pos' 224 | end = pos 225 | otherwise = addLinesCols prev pos 226 pos'' = addLinesCols x pos' 227 in pos'' `seq` return $! Left (ParseError contexts msg pos'') 228 go end c (A.Partial parser') 229 | end = return $! Left DivergentParser 230 | otherwise = 231 pos' `seq` sink c pos' parser' 232 where 233 pos' = addLinesCols prev pos 234 235 addLinesCols :: AttoparsecInput a => a -> Position -> Position 236 addLinesCols x (Position lines cols off) = 237 lines' `seq` cols' `seq` off' `seq` Position lines' cols' off' 238 where 239 Position dlines dcols doff = getLinesCols x 240 lines' = lines + dlines 241 cols' = (if dlines > 0 then 1 else cols) + dcols 242 off' = off + doff 243{-# INLINE sinkParserPos #-} 244