1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE DeriveDataTypeable #-} 3{-# LANGUAGE FlexibleContexts #-} 4{-# LANGUAGE RankNTypes #-} 5 6-- | 7-- Copyright: 2011 Michael Snoyman, 2010 John Millikin 8-- License: MIT 9-- 10-- Consume attoparsec parsers via conduit. 11-- 12-- This code was taken from attoparsec-enumerator and adapted for conduits. 13module Data.Conduit.Attoparsec 14 ( -- * Sink 15 sinkParser 16 , sinkParserEither 17 -- * Conduit 18 , conduitParser 19 , conduitParserEither 20 21 -- * Types 22 , ParseError (..) 23 , Position (..) 24 , PositionRange (..) 25 -- * Classes 26 , AttoparsecInput 27 ) where 28 29import Control.Exception (Exception) 30import Control.Monad (unless) 31import qualified Data.ByteString as B 32import qualified Data.Text as T 33import qualified Data.Text.Internal as TI 34import Data.Typeable (Typeable) 35import Prelude hiding (lines) 36 37import qualified Data.Attoparsec.ByteString 38import qualified Data.Attoparsec.Text 39import qualified Data.Attoparsec.Types as A 40import Data.Conduit 41import Control.Monad.Trans.Resource (MonadThrow, throwM) 42 43-- | The context and message from a 'A.Fail' value. 44data ParseError = ParseError 45 { errorContexts :: [String] 46 , errorMessage :: String 47 , errorPosition :: Position 48 } | DivergentParser 49 deriving (Show, Typeable) 50 51instance Exception ParseError 52 53data Position = Position 54 { posLine :: {-# UNPACK #-} !Int 55 , posCol :: {-# UNPACK #-} !Int 56 , posOffset :: {-# UNPACK #-} !Int 57 -- ^ @since 1.2.0 58 } 59 deriving (Eq, Ord) 60 61instance Show Position where 62 show (Position l c off) = show l ++ ':' : show c ++ " (" ++ show off ++ ")" 63 64data PositionRange = PositionRange 65 { posRangeStart :: {-# UNPACK #-} !Position 66 , posRangeEnd :: {-# UNPACK #-} !Position 67 } 68 deriving (Eq, Ord) 69 70instance Show PositionRange where 71 show (PositionRange s e) = show s ++ '-' : show e 72 73-- | A class of types which may be consumed by an Attoparsec parser. 74class AttoparsecInput a where 75 parseA :: A.Parser a b -> a -> A.IResult a b 76 feedA :: A.IResult a b -> a -> A.IResult a b 77 empty :: a 78 isNull :: a -> Bool 79 notEmpty :: [a] -> [a] 80 getLinesCols :: a -> Position 81 82 -- | Return the beginning of the first input with the length of 83 -- the second input removed. Assumes the second string is shorter 84 -- than the first. 85 stripFromEnd :: a -> a -> a 86 87instance AttoparsecInput B.ByteString where 88 parseA = Data.Attoparsec.ByteString.parse 89 feedA = Data.Attoparsec.ByteString.feed 90 empty = B.empty 91 isNull = B.null 92 notEmpty = filter (not . B.null) 93 getLinesCols = B.foldl' f (Position 0 0 0) 94 where 95 f (Position l c o) ch 96 | ch == 10 = Position (l + 1) 0 (o + 1) 97 | otherwise = Position l (c + 1) (o + 1) 98 stripFromEnd b1 b2 = B.take (B.length b1 - B.length b2) b1 99 100instance AttoparsecInput T.Text where 101 parseA = Data.Attoparsec.Text.parse 102 feedA = Data.Attoparsec.Text.feed 103 empty = T.empty 104 isNull = T.null 105 notEmpty = filter (not . T.null) 106 getLinesCols = T.foldl' f (Position 0 0 0) 107 where 108 f (Position l c o) ch 109 | ch == '\n' = Position (l + 1) 0 (o + 1) 110 | otherwise = Position l (c + 1) (o + 1) 111 stripFromEnd (TI.Text arr1 off1 len1) (TI.Text _ _ len2) = 112 TI.text arr1 off1 (len1 - len2) 113 114-- | Convert an Attoparsec 'A.Parser' into a 'Sink'. The parser will 115-- be streamed bytes until it returns 'A.Done' or 'A.Fail'. 116-- 117-- If parsing fails, a 'ParseError' will be thrown with 'throwM'. 118-- 119-- Since 0.5.0 120sinkParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> Consumer a m b 121sinkParser = fmap snd . sinkParserPosErr (Position 1 1 0) 122 123-- | Same as 'sinkParser', but we return an 'Either' type instead 124-- of raising an exception. 125-- 126-- Since 1.1.5 127sinkParserEither :: (AttoparsecInput a, Monad m) => A.Parser a b -> Consumer a m (Either ParseError b) 128sinkParserEither = (fmap.fmap) snd . sinkParserPos (Position 1 1 0) 129 130 131-- | Consume a stream of parsed tokens, returning both the token and 132-- the position it appears at. This function will raise a 'ParseError' 133-- on bad input. 134-- 135-- Since 0.5.0 136conduitParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> Conduit a m (PositionRange, b) 137conduitParser parser = 138 conduit $ Position 1 1 0 139 where 140 conduit !pos = await >>= maybe (return ()) go 141 where 142 go x = do 143 leftover x 144 (!pos', !res) <- sinkParserPosErr pos parser 145 yield (PositionRange pos pos', res) 146 conduit pos' 147{-# SPECIALIZE conduitParser 148 :: MonadThrow m 149 => A.Parser T.Text b 150 -> Conduit T.Text m (PositionRange, b) #-} 151{-# SPECIALIZE conduitParser 152 :: MonadThrow m 153 => A.Parser B.ByteString b 154 -> Conduit B.ByteString m (PositionRange, b) #-} 155 156 157 158-- | Same as 'conduitParser', but we return an 'Either' type instead 159-- of raising an exception. 160conduitParserEither 161 :: (Monad m, AttoparsecInput a) 162 => A.Parser a b 163 -> Conduit a m (Either ParseError (PositionRange, b)) 164conduitParserEither parser = 165 conduit $ Position 1 1 0 166 where 167 conduit !pos = await >>= maybe (return ()) go 168 where 169 go x = do 170 leftover x 171 eres <- sinkParserPos pos parser 172 case eres of 173 Left e -> yield $ Left e 174 Right (!pos', !res) -> do 175 yield $! Right (PositionRange pos pos', res) 176 conduit pos' 177{-# SPECIALIZE conduitParserEither 178 :: Monad m 179 => A.Parser T.Text b 180 -> Conduit T.Text m (Either ParseError (PositionRange, b)) #-} 181{-# SPECIALIZE conduitParserEither 182 :: Monad m 183 => A.Parser B.ByteString b 184 -> Conduit B.ByteString m (Either ParseError (PositionRange, b)) #-} 185 186 187 188 189sinkParserPosErr 190 :: (AttoparsecInput a, MonadThrow m) 191 => Position 192 -> A.Parser a b 193 -> Consumer a m (Position, b) 194sinkParserPosErr pos0 p = sinkParserPos pos0 p >>= f 195 where 196 f (Left e) = throwM e 197 f (Right a) = return a 198{-# INLINE sinkParserPosErr #-} 199 200 201sinkParserPos 202 :: (AttoparsecInput a, Monad m) 203 => Position 204 -> A.Parser a b 205 -> Consumer a m (Either ParseError (Position, b)) 206sinkParserPos pos0 p = sink empty pos0 (parseA p) 207 where 208 sink prev pos parser = await >>= maybe close push 209 where 210 push c 211 | isNull c = sink prev pos parser 212 | otherwise = go False c $ parser c 213 214 close = go True prev (feedA (parser empty) empty) 215 216 go end c (A.Done lo x) = do 217 let pos' 218 | end = pos 219 | otherwise = addLinesCols prev pos 220 y = stripFromEnd c lo 221 pos'' = addLinesCols y pos' 222 unless (isNull lo) $ leftover lo 223 pos'' `seq` return $! Right (pos'', x) 224 go end c (A.Fail rest contexts msg) = 225 let x = stripFromEnd c rest 226 pos' 227 | end = pos 228 | otherwise = addLinesCols prev pos 229 pos'' = addLinesCols x pos' 230 in pos'' `seq` return $! Left (ParseError contexts msg pos'') 231 go end c (A.Partial parser') 232 | end = return $! Left DivergentParser 233 | otherwise = 234 pos' `seq` sink c pos' parser' 235 where 236 pos' = addLinesCols prev pos 237 238 addLinesCols :: AttoparsecInput a => a -> Position -> Position 239 addLinesCols x (Position lines cols off) = 240 lines' `seq` cols' `seq` off' `seq` Position lines' cols' off' 241 where 242 Position dlines dcols doff = getLinesCols x 243 lines' = lines + dlines 244 cols' = (if dlines > 0 then 1 else cols) + dcols 245 off' = off + doff 246{-# INLINE sinkParserPos #-} 247