1{-# LANGUAGE DeriveDataTypeable #-}
2-- | This is a middle-level wrapper around the zlib C API. It allows you to
3-- work fully with bytestrings and not touch the FFI at all, but is still
4-- low-level enough to allow you to implement high-level abstractions such as
5-- enumerators. Significantly, it does not use lazy IO.
6--
7-- You'll probably need to reference the docs a bit to understand the
8-- WindowBits parameters below, but a basic rule of thumb is 15 is for zlib
9-- compression, and 31 for gzip compression.
10--
11-- A simple streaming compressor in pseudo-code would look like:
12--
13-- > def <- initDeflate ...
14-- > popper <- feedDeflate def rawContent
15-- > pullPopper popper
16-- > ...
17-- > finishDeflate def sendCompressedData
18--
19-- You can see a more complete example is available in the included
20-- file-test.hs.
21module Data.Streaming.Zlib
22    ( -- * Inflate
23      Inflate
24    , initInflate
25    , initInflateWithDictionary
26    , feedInflate
27    , finishInflate
28    , flushInflate
29    , getUnusedInflate
30    , isCompleteInflate
31      -- * Deflate
32    , Deflate
33    , initDeflate
34    , initDeflateWithDictionary
35    , feedDeflate
36    , finishDeflate
37    , flushDeflate
38    , fullFlushDeflate
39      -- * Data types
40    , WindowBits (..)
41    , defaultWindowBits
42    , ZlibException (..)
43    , Popper
44    , PopperRes (..)
45    ) where
46
47import Data.Streaming.Zlib.Lowlevel
48import Foreign.ForeignPtr
49import Foreign.C.Types
50import Data.ByteString.Unsafe
51import Codec.Compression.Zlib (WindowBits(WindowBits), defaultWindowBits)
52import qualified Data.ByteString as S
53import Data.ByteString.Lazy.Internal (defaultChunkSize)
54import Data.Typeable (Typeable)
55import Control.Exception (Exception)
56import Control.Monad (when)
57import Data.IORef
58
59type ZStreamPair = (ForeignPtr ZStreamStruct, ForeignPtr CChar)
60
61-- | The state of an inflation (eg, decompression) process. All allocated
62-- memory is automatically reclaimed by the garbage collector.
63-- Also can contain the inflation dictionary that is used for decompression.
64data Inflate = Inflate
65    ZStreamPair
66    (IORef S.ByteString) -- last ByteString fed in, needed for getUnusedInflate
67    (IORef Bool)         -- set True when zlib indicates that inflation is complete
68    (Maybe S.ByteString) -- dictionary
69
70-- | The state of a deflation (eg, compression) process. All allocated memory
71-- is automatically reclaimed by the garbage collector.
72newtype Deflate = Deflate ZStreamPair
73
74-- | Exception that can be thrown from the FFI code. The parameter is the
75-- numerical error code from the zlib library. Quoting the zlib.h file
76-- directly:
77--
78-- * #define Z_OK            0
79--
80-- * #define Z_STREAM_END    1
81--
82-- * #define Z_NEED_DICT     2
83--
84-- * #define Z_ERRNO        (-1)
85--
86-- * #define Z_STREAM_ERROR (-2)
87--
88-- * #define Z_DATA_ERROR   (-3)
89--
90-- * #define Z_MEM_ERROR    (-4)
91--
92-- * #define Z_BUF_ERROR    (-5)
93--
94-- * #define Z_VERSION_ERROR (-6)
95
96data ZlibException = ZlibException Int
97    deriving (Show, Typeable)
98instance Exception ZlibException
99
100-- | Some constants for the error codes, used internally
101zStreamEnd :: CInt
102zStreamEnd = 1
103
104zNeedDict :: CInt
105zNeedDict = 2
106
107zBufError :: CInt
108zBufError = -5
109
110-- | Initialize an inflation process with the given 'WindowBits'. You will need
111-- to call 'feedInflate' to feed compressed data to this and
112-- 'finishInflate' to extract the final chunk of decompressed data.
113initInflate :: WindowBits -> IO Inflate
114initInflate w = do
115    zstr <- zstreamNew
116    inflateInit2 zstr w
117    fzstr <- newForeignPtr c_free_z_stream_inflate zstr
118    fbuff <- mallocForeignPtrBytes defaultChunkSize
119    withForeignPtr fbuff $ \buff ->
120        c_set_avail_out zstr buff $ fromIntegral defaultChunkSize
121    lastBS <- newIORef S.empty
122    complete <- newIORef False
123    return $ Inflate (fzstr, fbuff) lastBS complete Nothing
124
125-- | Initialize an inflation process with the given 'WindowBits'.
126-- Unlike initInflate a dictionary for inflation is set which must
127-- match the one set during compression.
128initInflateWithDictionary :: WindowBits -> S.ByteString -> IO Inflate
129initInflateWithDictionary w bs = do
130    zstr <- zstreamNew
131    inflateInit2 zstr w
132    fzstr <- newForeignPtr c_free_z_stream_inflate zstr
133    fbuff <- mallocForeignPtrBytes defaultChunkSize
134
135    withForeignPtr fbuff $ \buff ->
136        c_set_avail_out zstr buff $ fromIntegral defaultChunkSize
137    lastBS <- newIORef S.empty
138    complete <- newIORef False
139    return $ Inflate (fzstr, fbuff) lastBS complete (Just bs)
140
141-- | Initialize a deflation process with the given compression level and
142-- 'WindowBits'. You will need to call 'feedDeflate' to feed uncompressed
143-- data to this and 'finishDeflate' to extract the final chunks of compressed
144-- data.
145initDeflate :: Int -- ^ Compression level
146            -> WindowBits -> IO Deflate
147initDeflate level w = do
148    zstr <- zstreamNew
149    deflateInit2 zstr level w 8 StrategyDefault
150    fzstr <- newForeignPtr c_free_z_stream_deflate zstr
151    fbuff <- mallocForeignPtrBytes defaultChunkSize
152    withForeignPtr fbuff $ \buff ->
153        c_set_avail_out zstr buff $ fromIntegral defaultChunkSize
154    return $ Deflate (fzstr, fbuff)
155
156-- | Initialize an deflation process with the given compression level and
157-- 'WindowBits'.
158-- Unlike initDeflate a dictionary for deflation is set.
159initDeflateWithDictionary :: Int -- ^ Compression level
160                          -> S.ByteString -- ^ Deflate dictionary
161                          -> WindowBits -> IO Deflate
162initDeflateWithDictionary level bs w = do
163    zstr <- zstreamNew
164    deflateInit2 zstr level w 8 StrategyDefault
165    fzstr <- newForeignPtr c_free_z_stream_deflate zstr
166    fbuff <- mallocForeignPtrBytes defaultChunkSize
167
168    unsafeUseAsCStringLen bs $ \(cstr, len) -> do
169        c_call_deflate_set_dictionary zstr cstr $ fromIntegral len
170
171    withForeignPtr fbuff $ \buff ->
172        c_set_avail_out zstr buff $ fromIntegral defaultChunkSize
173    return $ Deflate (fzstr, fbuff)
174
175-- | Feed the given 'S.ByteString' to the inflater. Return a 'Popper',
176-- an IO action that returns the decompressed data a chunk at a time.
177-- The 'Popper' must be called to exhaustion before using the 'Inflate'
178-- object again.
179--
180-- Note that this function automatically buffers the output to
181-- 'defaultChunkSize', and therefore you won't get any data from the popper
182-- until that much decompressed data is available. After you have fed all of
183-- the compressed data to this function, you can extract your final chunk of
184-- decompressed data using 'finishInflate'.
185feedInflate
186    :: Inflate
187    -> S.ByteString
188    -> IO Popper
189feedInflate (Inflate (fzstr, fbuff) lastBS complete inflateDictionary) bs = do
190    -- Write the BS to lastBS for use by getUnusedInflate. This is
191    -- theoretically unnecessary, since we could just grab the pointer from the
192    -- fzstr when needed. However, in that case, we wouldn't be holding onto a
193    -- reference to the ForeignPtr, so the GC may decide to collect the
194    -- ByteString in the interim.
195    writeIORef lastBS bs
196
197    withForeignPtr fzstr $ \zstr ->
198        unsafeUseAsCStringLen bs $ \(cstr, len) ->
199            c_set_avail_in zstr cstr $ fromIntegral len
200    return $ drain fbuff fzstr (Just bs) inflate False
201  where
202    inflate zstr = do
203        res <- c_call_inflate_noflush zstr
204        res2 <- if (res == zNeedDict)
205            then maybe (return zNeedDict)
206                       (\dict -> (unsafeUseAsCStringLen dict $ \(cstr, len) -> do
207                                    c_call_inflate_set_dictionary zstr cstr $ fromIntegral len
208                                    c_call_inflate_noflush zstr))
209                       inflateDictionary
210            else return res
211        when (res2 == zStreamEnd) (writeIORef complete True)
212        return res2
213
214-- | An IO action that returns the next chunk of data, returning 'PRDone' when
215-- there is no more data to be popped.
216type Popper = IO PopperRes
217
218data PopperRes = PRDone
219               | PRNext !S.ByteString
220               | PRError !ZlibException
221    deriving (Show, Typeable)
222
223-- | Ensure that the given @ByteString@ is not deallocated.
224keepAlive :: Maybe S.ByteString -> IO a -> IO a
225keepAlive Nothing = id
226keepAlive (Just bs) = unsafeUseAsCStringLen bs . const
227
228drain :: ForeignPtr CChar
229      -> ForeignPtr ZStreamStruct
230      -> Maybe S.ByteString
231      -> (ZStream' -> IO CInt)
232      -> Bool
233      -> Popper
234drain fbuff fzstr mbs func isFinish = withForeignPtr fzstr $ \zstr -> keepAlive mbs $ do
235    res <- func zstr
236    if res < 0 && res /= zBufError
237        then return $ PRError $ ZlibException $ fromIntegral res
238        else do
239            avail <- c_get_avail_out zstr
240            let size = defaultChunkSize - fromIntegral avail
241                toOutput = avail == 0 || (isFinish && size /= 0)
242            if toOutput
243                then withForeignPtr fbuff $ \buff -> do
244                    bs <- S.packCStringLen (buff, size)
245                    c_set_avail_out zstr buff
246                        $ fromIntegral defaultChunkSize
247                    return $ PRNext bs
248                else return PRDone
249
250
251-- | As explained in 'feedInflate', inflation buffers your decompressed
252-- data. After you call 'feedInflate' with your last chunk of compressed
253-- data, you will likely have some data still sitting in the buffer. This
254-- function will return it to you.
255finishInflate :: Inflate -> IO S.ByteString
256finishInflate (Inflate (fzstr, fbuff) _ _ _) =
257    withForeignPtr fzstr $ \zstr ->
258        withForeignPtr fbuff $ \buff -> do
259            avail <- c_get_avail_out zstr
260            let size = defaultChunkSize - fromIntegral avail
261            bs <- S.packCStringLen (buff, size)
262            c_set_avail_out zstr buff $ fromIntegral defaultChunkSize
263            return bs
264
265-- | Flush the inflation buffer. Useful for interactive application.
266--
267-- This is actually a synonym for 'finishInflate'. It is provided for its more
268-- semantic name.
269--
270-- Since 0.0.3
271flushInflate :: Inflate -> IO S.ByteString
272flushInflate = finishInflate
273
274-- | Retrieve any data remaining after inflating. For more information on motivation, see:
275--
276-- <https://github.com/fpco/streaming-commons/issues/20>
277--
278-- Since 0.1.11
279getUnusedInflate :: Inflate -> IO S.ByteString
280getUnusedInflate (Inflate (fzstr, _) ref _ _) = do
281    bs <- readIORef ref
282    len <- withForeignPtr fzstr c_get_avail_in
283    return $ S.drop (S.length bs - fromIntegral len) bs
284
285-- | Returns True if the inflater has reached end-of-stream, or False if
286-- it is still expecting more data.
287--
288-- Since 0.1.18
289isCompleteInflate :: Inflate -> IO Bool
290isCompleteInflate (Inflate _ _ complete _) = readIORef complete
291
292-- | Feed the given 'S.ByteString' to the deflater. Return a 'Popper',
293-- an IO action that returns the compressed data a chunk at a time.
294-- The 'Popper' must be called to exhaustion before using the 'Deflate'
295-- object again.
296--
297-- Note that this function automatically buffers the output to
298-- 'defaultChunkSize', and therefore you won't get any data from the popper
299-- until that much compressed data is available. After you have fed all of the
300-- decompressed data to this function, you can extract your final chunks of
301-- compressed data using 'finishDeflate'.
302feedDeflate :: Deflate -> S.ByteString -> IO Popper
303feedDeflate (Deflate (fzstr, fbuff)) bs = do
304    withForeignPtr fzstr $ \zstr ->
305        unsafeUseAsCStringLen bs $ \(cstr, len) -> do
306            c_set_avail_in zstr cstr $ fromIntegral len
307    return $ drain fbuff fzstr (Just bs) c_call_deflate_noflush False
308
309-- | As explained in 'feedDeflate', deflation buffers your compressed
310-- data. After you call 'feedDeflate' with your last chunk of uncompressed
311-- data, use this to flush the rest of the data and signal end of input.
312finishDeflate :: Deflate -> Popper
313finishDeflate (Deflate (fzstr, fbuff)) =
314    drain fbuff fzstr Nothing c_call_deflate_finish True
315
316-- | Flush the deflation buffer. Useful for interactive application.
317-- Internally this passes Z_SYNC_FLUSH to the zlib library.
318--
319-- Unlike 'finishDeflate', 'flushDeflate' does not signal end of input,
320-- meaning you can feed more uncompressed data afterward.
321--
322-- Since 0.0.3
323flushDeflate :: Deflate -> Popper
324flushDeflate (Deflate (fzstr, fbuff)) =
325    drain fbuff fzstr Nothing c_call_deflate_flush True
326
327-- | Full flush the deflation buffer. Useful for interactive
328-- applications where previously streamed data may not be
329-- available. Using `fullFlushDeflate` too often can seriously degrade
330-- compression. Internally this passes Z_FULL_FLUSH to the zlib
331-- library.
332--
333-- Like 'flushDeflate', 'fullFlushDeflate' does not signal end of input,
334-- meaning you can feed more uncompressed data afterward.
335--
336-- Since 0.1.5
337fullFlushDeflate :: Deflate -> Popper
338fullFlushDeflate (Deflate (fzstr, fbuff)) =
339    drain fbuff fzstr Nothing c_call_deflate_full_flush True
340