1{-# LANGUAGE BangPatterns, CPP, DeriveFunctor #-} 2 3-- | This module allows for streaming decoding of CSV data. This is 4-- useful if you need to parse large amounts of input in constant 5-- space. The API also allows you to ignore type conversion errors on 6-- a per-record basis. 7module Data.Csv.Streaming 8 ( 9 -- * Usage example 10 -- $example 11 12 -- * Stream representation 13 -- $stream-representation 14 Records(..) 15 16 -- * Decoding records 17 -- $typeconversion 18 19 -- ** Index-based record conversion 20 -- $indexbased 21 , HasHeader(..) 22 , decode 23 , decodeWith 24 25 -- ** Name-based record conversion 26 -- $namebased 27 , decodeByName 28 , decodeByNameWith 29 ) where 30 31import Control.DeepSeq (NFData(rnf)) 32import qualified Data.ByteString as B 33import qualified Data.ByteString.Lazy as BL 34import qualified Data.ByteString.Lazy.Char8 as BL8 35import Data.Foldable (Foldable(..)) 36import Prelude hiding (foldr) 37 38import Data.Csv.Conversion 39import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith, 40 decodeWith) 41import qualified Data.Csv.Incremental as I 42import Data.Csv.Parser 43import Data.Csv.Types 44 45#if !MIN_VERSION_base(4,8,0) 46import Control.Applicative ((<$>), (<*>), pure) 47import Data.Traversable (Traversable(..)) 48#endif 49 50#if !MIN_VERSION_bytestring(0,10,0) 51import qualified Data.ByteString.Lazy.Internal as BL -- for constructors 52#endif 53 54-- $example 55-- 56-- A short usage example: 57-- 58-- > for_ (decode NoHeader "John,27\r\nJane,28\r\n") $ \ (name, age :: Int) -> 59-- > putStrLn $ name ++ " is " ++ show age ++ " years old" 60-- 61-- N.B. The 'Foldable' instance, which is used above, skips records 62-- that failed to convert. If you don't want this behavior, work 63-- directly with the 'Cons' and 'Nil' constructors. 64 65-- $stream-representation 66-- 67-- A stream of records is represented as a (lazy) list that may 68-- contain errors. 69 70-- $typeconversion 71-- 72-- Just like in the case of non-streaming decoding, there are two ways 73-- to convert CSV records to and from and user-defined data types: 74-- index-based conversion and name-based conversion. 75 76-- $indexbased 77-- 78-- See documentation on index-based conversion in "Data.Csv" for more 79-- information. 80 81-- $namebased 82-- 83-- See documentation on name-based conversion in "Data.Csv" for more 84-- information. 85 86-- | A stream of parsed records. If type conversion failed for the 87-- record, the error is returned as @'Left' errMsg@. 88data Records a 89 = -- | A record or an error message, followed by more records. 90 Cons (Either String a) (Records a) 91 92 -- | End of stream, potentially due to a parse error. If a parse 93 -- error occured, the first field contains the error message. 94 -- The second field contains any unconsumed input. 95 | Nil (Maybe String) BL.ByteString 96 deriving (Eq, Functor, Show) 97 98-- | Skips records that failed to convert. 99instance Foldable Records where 100 foldr = foldrRecords 101#if MIN_VERSION_base(4,6,0) 102 foldl' = foldlRecords' 103#endif 104 105foldrRecords :: (a -> b -> b) -> b -> Records a -> b 106foldrRecords f = go 107 where 108 go z (Cons (Right x) rs) = f x (go z rs) 109 go z (Cons (Left _) rs) = go z rs 110 go z _ = z 111{-# INLINE foldrRecords #-} 112 113#if MIN_VERSION_base(4,6,0) 114foldlRecords' :: (a -> b -> a) -> a -> Records b -> a 115foldlRecords' f = go 116 where 117 go z (Cons (Right x) rs) = let z' = f z x in z' `seq` go z' rs 118 go z (Cons (Left _) rs) = go z rs 119 go z _ = z 120{-# INLINE foldlRecords' #-} 121#endif 122 123instance Traversable Records where 124 traverse _ (Nil merr rest) = pure $ Nil merr rest 125 traverse f (Cons x xs) = Cons <$> traverseElem x <*> traverse f xs 126 where 127 traverseElem (Left err) = pure $ Left err 128 traverseElem (Right y) = Right <$> f y 129 130instance NFData a => NFData (Records a) where 131 rnf (Cons r rs) = rnf r `seq` rnf rs 132#if MIN_VERSION_bytestring(0,10,0) 133 rnf (Nil errMsg rest) = rnf errMsg `seq` rnf rest 134#else 135 rnf (Nil errMsg rest) = rnf errMsg `seq` rnfLazyByteString rest 136 137rnfLazyByteString :: BL.ByteString -> () 138rnfLazyByteString BL.Empty = () 139rnfLazyByteString (BL.Chunk _ b) = rnfLazyByteString b 140#endif 141 142-- | Efficiently deserialize CSV records in a streaming fashion. 143-- Equivalent to @'decodeWith' 'defaultDecodeOptions'@. 144decode :: FromRecord a 145 => HasHeader -- ^ Data contains header that should be 146 -- skipped 147 -> BL.ByteString -- ^ CSV data 148 -> Records a 149decode = decodeWith defaultDecodeOptions 150 151-- | Like 'decode', but lets you customize how the CSV data is parsed. 152decodeWith :: FromRecord a 153 => DecodeOptions -- ^ Decoding options 154 -> HasHeader -- ^ Data contains header that should be 155 -- skipped 156 -> BL.ByteString -- ^ CSV data 157 -> Records a 158decodeWith !opts hasHeader s0 = 159 go (BL.toChunks s0) (I.decodeWith opts hasHeader) 160 where 161 go ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs 162 go ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss)) 163 go [] (Many xs k) = foldr Cons (go [] (k B.empty)) xs 164 go (s:ss) (Many xs k) = foldr Cons (go ss (k s)) xs 165 166-- | Efficiently deserialize CSV in a streaming fashion. The data is 167-- assumed to be preceded by a header. Returns @'Left' errMsg@ if 168-- parsing the header fails. Equivalent to @'decodeByNameWith' 169-- 'defaultDecodeOptions'@. 170decodeByName :: FromNamedRecord a 171 => BL.ByteString -- ^ CSV data 172 -> Either String (Header, Records a) 173decodeByName = decodeByNameWith defaultDecodeOptions 174 175-- TODO: Include something more in error messages? 176 177-- | Like 'decodeByName', but lets you customize how the CSV data is 178-- parsed. 179decodeByNameWith :: FromNamedRecord a 180 => DecodeOptions -- ^ Decoding options 181 -> BL.ByteString -- ^ CSV data 182 -> Either String (Header, Records a) 183decodeByNameWith !opts s0 = go (BL.toChunks s0) (I.decodeByNameWith opts) 184 where 185 go ss (DoneH hdr p) = Right (hdr, go2 ss p) 186 go ss (FailH rest err) = Left $ err ++ " at " ++ 187 show (BL8.unpack . BL.fromChunks $ rest : ss) 188 go [] (PartialH k) = go [] (k B.empty) 189 go (s:ss) (PartialH k) = go ss (k s) 190 191 go2 ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs 192 go2 ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss)) 193 go2 [] (Many xs k) = foldr Cons (go2 [] (k B.empty)) xs 194 go2 (s:ss) (Many xs k) = foldr Cons (go2 ss (k s)) xs 195