1{-# LANGUAGE BangPatterns, CPP, DeriveFunctor #-}
2
3-- | This module allows for streaming decoding of CSV data. This is
4-- useful if you need to parse large amounts of input in constant
5-- space. The API also allows you to ignore type conversion errors on
6-- a per-record basis.
7module Data.Csv.Streaming
8    (
9    -- * Usage example
10    -- $example
11
12    -- * Stream representation
13    -- $stream-representation
14      Records(..)
15
16    -- * Decoding records
17    -- $typeconversion
18
19    -- ** Index-based record conversion
20    -- $indexbased
21    , HasHeader(..)
22    , decode
23    , decodeWith
24
25    -- ** Name-based record conversion
26    -- $namebased
27    , decodeByName
28    , decodeByNameWith
29    ) where
30
31import Control.DeepSeq (NFData(rnf))
32import qualified Data.ByteString as B
33import qualified Data.ByteString.Lazy as BL
34import qualified Data.ByteString.Lazy.Char8 as BL8
35import Data.Foldable (Foldable(..))
36import Prelude hiding (foldr)
37
38import Data.Csv.Conversion
39import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith,
40                                    decodeWith)
41import qualified Data.Csv.Incremental as I
42import Data.Csv.Parser
43import Data.Csv.Types
44
45#if !MIN_VERSION_base(4,8,0)
46import Control.Applicative ((<$>), (<*>), pure)
47import Data.Traversable (Traversable(..))
48#endif
49
50#if !MIN_VERSION_bytestring(0,10,0)
51import qualified Data.ByteString.Lazy.Internal as BL  -- for constructors
52#endif
53
54-- $example
55--
56-- A short usage example:
57--
58-- > for_ (decode NoHeader "John,27\r\nJane,28\r\n") $ \ (name, age :: Int) ->
59-- >     putStrLn $ name ++ " is " ++ show age ++ " years old"
60--
61-- N.B. The 'Foldable' instance, which is used above, skips records
62-- that failed to convert. If you don't want this behavior, work
63-- directly with the 'Cons' and 'Nil' constructors.
64
65-- $stream-representation
66--
67-- A stream of records is represented as a (lazy) list that may
68-- contain errors.
69
70-- $typeconversion
71--
72-- Just like in the case of non-streaming decoding, there are two ways
73-- to convert CSV records to and from and user-defined data types:
74-- index-based conversion and name-based conversion.
75
76-- $indexbased
77--
78-- See documentation on index-based conversion in "Data.Csv" for more
79-- information.
80
81-- $namebased
82--
83-- See documentation on name-based conversion in "Data.Csv" for more
84-- information.
85
86-- | A stream of parsed records. If type conversion failed for the
87-- record, the error is returned as @'Left' errMsg@.
88data Records a
89    = -- | A record or an error message, followed by more records.
90      Cons (Either String a) (Records a)
91
92      -- | End of stream, potentially due to a parse error. If a parse
93      -- error occured, the first field contains the error message.
94      -- The second field contains any unconsumed input.
95    | Nil (Maybe String) BL.ByteString
96    deriving (Eq, Functor, Show)
97
98-- | Skips records that failed to convert.
99instance Foldable Records where
100    foldr = foldrRecords
101#if MIN_VERSION_base(4,6,0)
102    foldl' = foldlRecords'
103#endif
104
105foldrRecords :: (a -> b -> b) -> b -> Records a -> b
106foldrRecords f = go
107  where
108    go z (Cons (Right x) rs) = f x (go z rs)
109    go z (Cons (Left _) rs) = go z rs
110    go z _ = z
111{-# INLINE foldrRecords #-}
112
113#if MIN_VERSION_base(4,6,0)
114foldlRecords' :: (a -> b -> a) -> a -> Records b -> a
115foldlRecords' f = go
116  where
117    go z (Cons (Right x) rs) = let z' = f z x in z' `seq` go z' rs
118    go z (Cons (Left _) rs) = go z rs
119    go z _ = z
120{-# INLINE foldlRecords' #-}
121#endif
122
123instance Traversable Records where
124    traverse _ (Nil merr rest) = pure $ Nil merr rest
125    traverse f (Cons x xs)     = Cons <$> traverseElem x <*> traverse f xs
126      where
127        traverseElem (Left err) = pure $ Left err
128        traverseElem (Right y)  = Right <$> f y
129
130instance NFData a => NFData (Records a) where
131    rnf (Cons r rs) = rnf r `seq` rnf rs
132#if MIN_VERSION_bytestring(0,10,0)
133    rnf (Nil errMsg rest) = rnf errMsg `seq` rnf rest
134#else
135    rnf (Nil errMsg rest) = rnf errMsg `seq` rnfLazyByteString rest
136
137rnfLazyByteString :: BL.ByteString -> ()
138rnfLazyByteString BL.Empty       = ()
139rnfLazyByteString (BL.Chunk _ b) = rnfLazyByteString b
140#endif
141
142-- | Efficiently deserialize CSV records in a streaming fashion.
143-- Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
144decode :: FromRecord a
145       => HasHeader      -- ^ Data contains header that should be
146                         -- skipped
147       -> BL.ByteString  -- ^ CSV data
148       -> Records a
149decode = decodeWith defaultDecodeOptions
150
151-- | Like 'decode', but lets you customize how the CSV data is parsed.
152decodeWith :: FromRecord a
153           => DecodeOptions  -- ^ Decoding options
154           -> HasHeader      -- ^ Data contains header that should be
155                             -- skipped
156           -> BL.ByteString  -- ^ CSV data
157           -> Records a
158decodeWith !opts hasHeader s0 =
159    go (BL.toChunks s0) (I.decodeWith opts hasHeader)
160  where
161    go ss (Done xs)       = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs
162    go ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss))
163    go [] (Many xs k)     = foldr Cons (go [] (k B.empty)) xs
164    go (s:ss) (Many xs k) = foldr Cons (go ss (k s)) xs
165
166-- | Efficiently deserialize CSV in a streaming fashion. The data is
167-- assumed to be preceded by a header. Returns @'Left' errMsg@ if
168-- parsing the header fails. Equivalent to @'decodeByNameWith'
169-- 'defaultDecodeOptions'@.
170decodeByName :: FromNamedRecord a
171             => BL.ByteString  -- ^ CSV data
172             -> Either String (Header, Records a)
173decodeByName = decodeByNameWith defaultDecodeOptions
174
175-- TODO: Include something more in error messages?
176
177-- | Like 'decodeByName', but lets you customize how the CSV data is
178-- parsed.
179decodeByNameWith :: FromNamedRecord a
180                 => DecodeOptions  -- ^ Decoding options
181                 -> BL.ByteString  -- ^ CSV data
182                 -> Either String (Header, Records a)
183decodeByNameWith !opts s0 = go (BL.toChunks s0) (I.decodeByNameWith opts)
184  where
185    go ss (DoneH hdr p)    = Right (hdr, go2 ss p)
186    go ss (FailH rest err) = Left $ err ++ " at " ++
187                             show (BL8.unpack . BL.fromChunks $ rest : ss)
188    go [] (PartialH k)     = go [] (k B.empty)
189    go (s:ss) (PartialH k) = go ss (k s)
190
191    go2 ss (Done xs)       = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs
192    go2 ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss))
193    go2 [] (Many xs k)     = foldr Cons (go2 [] (k B.empty)) xs
194    go2 (s:ss) (Many xs k) = foldr Cons (go2 ss (k s)) xs
195