1{-# LANGUAGE CPP, BangPatterns #-}
2-- |
3-- Module      : BuilderBufferRange
4-- Copyright   : (c) 2010 Simon Meier
5-- License     : BSD3-style (see LICENSE)
6--
7-- Maintainer  : Leon P Smith <leon@melding-monads.com>
8-- Stability   : experimental
9-- Portability : tested on GHC only
10--
11-- Benchmark the benefit of using a packed representation for the buffer range.
12--
13module BuilderBufferRange where
14
15
16import Foreign
17import Data.Monoid
18import Control.Monad (unless)
19import qualified Data.ByteString      as S
20import qualified Data.ByteString.Lazy as L
21
22#ifdef BYTESTRING_IN_BASE
23import Data.ByteString.Base (inlinePerformIO)
24import qualified Data.ByteString.Base as S
25import qualified Data.ByteString.Lazy.Base as L -- FIXME: is this the right module for access to 'Chunks'?
26#else
27import Data.ByteString.Internal (inlinePerformIO)
28import qualified Data.ByteString.Internal as S
29import qualified Data.ByteString.Lazy.Internal as L
30#endif
31
32import qualified Blaze.ByteString.Builder.Internal as B
33import Blaze.ByteString.Builder.Write
34import Blaze.ByteString.Builder.Word
35
36import Criterion.Main
37
38------------------------------------------------------------------------------
39-- Benchmarks
40------------------------------------------------------------------------------
41
42main :: IO ()
43main = defaultMain $ concat
44    [ benchmark "putBuilder"
45        (putBuilder . mconcat . map fromWord8)
46        (mconcat . map fromWord8)
47        word8s
48    , benchmark "fromWriteSingleton"
49        (mconcat . map  putWord8)
50        (mconcat . map fromWord8)
51        word8s
52    , benchmark "fromWrite"
53        (mconcat . map (putWrite . writeWord8))
54        (mconcat . map (fromWrite . writeWord8))
55        word8s
56    ]
57  where
58    benchmark name putF builderF x =
59        [ bench (name ++ " Put") $
60            whnf (L.length . toLazyByteString . putF) x
61        , bench (name ++ " Builder") $
62            whnf (L.length . B.toLazyByteString . builderF) x
63        ]
64
65word8s :: [Word8]
66word8s = take 100000 $ cycle [0..]
67{-# NOINLINE word8s #-}
68
69
70------------------------------------------------------------------------------
71-- The Builder type
72------------------------------------------------------------------------------
73
74data BufferRange = BR {-# UNPACK #-} !(Ptr Word8)
75                      {-# UNPACK #-} !(Ptr Word8)
76
77newtype Put = Put (PutStep -> PutStep)
78
79data PutSignal =
80    Done {-# UNPACK #-} !(Ptr Word8)
81  | BufferFull
82      {-# UNPACK #-} !Int
83      {-# UNPACK #-} !(Ptr Word8)
84                     !PutStep
85  | ModifyChunks
86      {-# UNPACK #-} !(Ptr Word8)
87                     !(L.ByteString -> L.ByteString)
88                     !PutStep
89
90type PutStep =  BufferRange -> IO PutSignal
91
92instance Monoid Put where
93  mempty = Put id
94  {-# INLINE mempty #-}
95  (Put p1) `mappend` (Put p2) = Put $ p1 . p2
96  {-# INLINE mappend #-}
97  mconcat = foldr mappend mempty
98  {-# INLINE mconcat #-}
99
100putWrite :: Write -> Put
101putWrite (Write size io) =
102    Put step
103  where
104    step k (BR pf pe)
105      | pf `plusPtr` size <= pe = do
106          io pf
107          let !br' = BR (pf `plusPtr` size) pe
108          k br'
109      | otherwise = return $ BufferFull size pf (step k)
110{-# INLINE putWrite #-}
111
112putWriteSingleton :: (a -> Write) -> a -> Put
113putWriteSingleton write =
114    mkPut
115  where
116    mkPut x = Put step
117      where
118        step k (BR pf pe)
119          | pf `plusPtr` size <= pe = do
120              io pf
121              let !br' = BR (pf `plusPtr` size) pe
122              k br'
123          | otherwise               = return $ BufferFull size pf (step k)
124          where
125            Write size io = write x
126{-# INLINE putWriteSingleton #-}
127
128putBuilder :: B.Builder -> Put
129putBuilder (B.Builder b) =
130    Put step
131  where
132    finalStep _ pf = return $ B.Done pf
133
134    step k = go (b finalStep)
135      where
136        go buildStep (BR pf pe) = do
137          signal <- buildStep pf pe
138          case signal of
139            B.Done pf' -> do
140              let !br' = BR pf' pe
141              k br'
142            B.BufferFull minSize pf' nextBuildStep ->
143              return $ BufferFull minSize pf' (go nextBuildStep)
144            B.ModifyChunks _ _ _ ->
145              error "putBuilder: ModifyChunks not implemented"
146
147putWord8 :: Word8 -> Put
148putWord8 = putWriteSingleton writeWord8
149
150{-
151  m >>= f  = GetC $ \done empty pe ->
152      runGetC m (\pr' x -> runGetC (f x) done empty pe pr')
153                (\m' -> empty (m' >>= f))
154                pe
155
156
157newtype GetC r a = GetC {
158    runGetC ::
159      (Ptr Word8 -> a -> IO r) ->   -- done
160      (GetC r a -> IO r     )  ->   -- empty buffer
161      Ptr Word8                ->   -- end of buffer
162      Ptr Word8                ->   -- next byte to read
163      IO r
164  }
165
166instance Functor (GetC r) where
167  fmap f g = GetC $ \done empty ->
168      runGetC g (\pr' x -> done pr' (f x))
169                (\g'    -> empty (fmap f g'))
170
171instance Monad (GetC r) where
172  return x = GetC $ \done _ _ pr -> done pr x
173  m >>= f  = GetC $ \done empty pe ->
174      runGetC m (\pr' x -> runGetC (f x) done empty pe pr')
175                (\m' -> empty (m' >>= f))
176                pe
177
178-}
179
180------------------------------------------------------------------------------
181-- Internal global constants.
182------------------------------------------------------------------------------
183
184-- | Default size (~32kb) for the buffer that becomes a chunk of the output
185-- stream once it is filled.
186--
187defaultBufferSize :: Int
188defaultBufferSize = 32 * 1024 - overhead -- Copied from Data.ByteString.Lazy.
189    where overhead = 2 * sizeOf (undefined :: Int)
190
191-- | The minimal length (~4kb) a buffer must have before filling it and
192-- outputting it as a chunk of the output stream.
193--
194-- This size determines when a buffer is spilled after a 'flush' or a direct
195-- bytestring insertion. It is also the size of the first chunk generated by
196-- 'toLazyByteString'.
197defaultMinimalBufferSize :: Int
198defaultMinimalBufferSize = 4 * 1024 - overhead
199    where overhead = 2 * sizeOf (undefined :: Int)
200
201-- | The default length (64) for the first buffer to be allocated when
202-- converting a 'Builder' to a lazy bytestring.
203--
204-- See 'toLazyByteStringWith' for further explanation.
205defaultFirstBufferSize :: Int
206defaultFirstBufferSize = 64
207
208-- | The maximal number of bytes for that copying is cheaper than direct
209-- insertion into the output stream. This takes into account the fragmentation
210-- that may occur in the output buffer due to the early 'flush' implied by the
211-- direct bytestring insertion.
212--
213-- @'defaultMaximalCopySize' = 2 * 'defaultMinimalBufferSize'@
214--
215defaultMaximalCopySize :: Int
216defaultMaximalCopySize = 2 * defaultMinimalBufferSize
217
218------------------------------------------------------------------------------
219-- Flushing and running a Builder
220------------------------------------------------------------------------------
221
222
223-- | Output all data written in the current buffer and start a new chunk.
224--
225-- The use uf this function depends on how the resulting bytestrings are
226-- consumed. 'flush' is possibly not very useful in non-interactive scenarios.
227-- However, it is kept for compatibility with the builder provided by
228-- Data.Binary.Builder.
229--
230-- When using 'toLazyByteString' to extract a lazy 'L.ByteString' from a
231-- 'Builder', this means that a new chunk will be started in the resulting lazy
232-- 'L.ByteString'. The remaining part of the buffer is spilled, if the
233-- reamining free space is smaller than the minimal desired buffer size.
234--
235{-
236flush :: Builder
237flush = Builder $ \k pf _ -> return $ ModifyChunks pf id k
238-}
239
240-- | Run a 'Builder' with the given buffer sizes.
241--
242-- Use this function for integrating the 'Builder' type with other libraries
243-- that generate lazy bytestrings.
244--
245-- Note that the builders should guarantee that on average the desired chunk
246-- size is attained. Builders may decide to start a new buffer and not
247-- completely fill the existing buffer, if this is faster. However, they should
248-- not spill too much of the buffer, if they cannot compensate for it.
249--
250-- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate
251-- a lazy bytestring according to the following strategy. First, we allocate
252-- a buffer of size @firstBufSize@ and start filling it. If it overflows, we
253-- allocate a buffer of size @minBufSize@ and copy the first buffer to it in
254-- order to avoid generating a too small chunk. Finally, every next buffer will
255-- be of size @bufSize@. This, slow startup strategy is required to achieve
256-- good speed for short (<200 bytes) resulting bytestrings, as for them the
257-- allocation cost is of a large buffer cannot be compensated. Moreover, this
258-- strategy also allows us to avoid spilling too much memory for short
259-- resulting bytestrings.
260--
261-- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer
262-- is no longer copied but allocated and filled directly. Hence, setting
263-- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer
264-- of size @bufSize@. This is recommended, if you know that you always output
265-- more than @minBufSize@ bytes.
266toLazyByteStringWith
267    :: Int           -- ^ Buffer size (upper-bounds the resulting chunk size).
268    -> Int           -- ^ Minimal free buffer space for continuing filling
269                     -- the same buffer after a 'flush' or a direct bytestring
270                     -- insertion. This corresponds to the minimal desired
271                     -- chunk size.
272    -> Int           -- ^ Size of the first buffer to be used and copied for
273                     -- larger resulting sequences
274    -> Put           -- ^ Builder to run.
275    -> L.ByteString  -- ^ Lazy bytestring to output after the builder is
276                     -- finished.
277    -> L.ByteString  -- ^ Resulting lazy bytestring
278toLazyByteStringWith bufSize minBufSize firstBufSize (Put b) k =
279    inlinePerformIO $ fillFirstBuffer (b finalStep)
280  where
281    finalStep (BR pf _) = return $ Done pf
282    -- fill a first very small buffer, if we need more space then copy it
283    -- to the new buffer of size 'minBufSize'. This way we don't pay the
284    -- allocation cost of the big 'bufSize' buffer, when outputting only
285    -- small sequences.
286    fillFirstBuffer !step0
287      | minBufSize <= firstBufSize = fillNewBuffer firstBufSize step0
288      | otherwise                  = do
289          fpbuf <- S.mallocByteString firstBufSize
290          withForeignPtr fpbuf $ \pf -> do
291              let !br      = BR pf (pf `plusPtr` firstBufSize)
292                  mkbs pf' = S.PS fpbuf 0 (pf' `minusPtr` pf)
293                  {-# INLINE mkbs #-}
294              next <- step0 br
295              case next of
296                  Done pf'
297                    | pf' == pf -> return k
298                    | otherwise -> return $ L.Chunk (mkbs pf') k
299
300                  BufferFull newSize pf' nextStep  -> do
301                      let !l  = pf' `minusPtr` pf
302                      fillNewBuffer (max (l + newSize) minBufSize) $
303                          \(BR pfNew peNew) -> do
304                              copyBytes pfNew pf l
305                              let !brNew = BR (pfNew `plusPtr` l) peNew
306                              nextStep brNew
307
308                  ModifyChunks pf' bsk nextStep
309                      | pf' == pf ->
310                          return $ bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep)
311                      | otherwise ->
312                          return $ L.Chunk (mkbs pf')
313                              (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep))
314
315    -- allocate and fill a new buffer
316    fillNewBuffer !size !step0 = do
317        fpbuf <- S.mallocByteString size
318        withForeignPtr fpbuf $ fillBuffer fpbuf
319      where
320        fillBuffer fpbuf !pbuf = fill pbuf step0
321          where
322            !pe = pbuf `plusPtr` size
323            fill !pf !step = do
324                let !br = BR pf pe
325                next <- step br
326                let mkbs pf' = S.PS fpbuf (pf `minusPtr` pbuf) (pf' `minusPtr` pf)
327                    {-# INLINE mkbs #-}
328                case next of
329                    Done pf'
330                      | pf' == pf -> return k
331                      | otherwise -> return $ L.Chunk (mkbs pf') k
332
333                    BufferFull newSize pf' nextStep ->
334                        return $ L.Chunk (mkbs pf')
335                            (inlinePerformIO $
336                                fillNewBuffer (max newSize bufSize) nextStep)
337
338                    ModifyChunks  pf' bsk nextStep
339                      | pf' == pf                      ->
340                          return $ bsk (inlinePerformIO $ fill pf' nextStep)
341                      | minBufSize < pe `minusPtr` pf' ->
342                          return $ L.Chunk (mkbs pf')
343                              (bsk (inlinePerformIO $ fill pf' nextStep))
344                      | otherwise                      ->
345                          return $ L.Chunk (mkbs pf')
346                              (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep))
347
348
349-- | Extract the lazy 'L.ByteString' from the builder by running it with default
350-- buffer sizes. Use this function, if you do not have any special
351-- considerations with respect to buffer sizes.
352--
353-- @ 'toLazyByteString' b = 'toLazyByteStringWith' 'defaultBufferSize' 'defaultMinimalBufferSize' 'defaultFirstBufferSize' b L.empty@
354--
355-- Note that @'toLazyByteString'@ is a 'Monoid' homomorphism.
356--
357-- > toLazyByteString mempty          == mempty
358-- > toLazyByteString (x `mappend` y) == toLazyByteString x `mappend` toLazyByteString y
359--
360-- However, in the second equation, the left-hand-side is generally faster to
361-- execute.
362--
363toLazyByteString :: Put -> L.ByteString
364toLazyByteString b = toLazyByteStringWith
365    defaultBufferSize defaultMinimalBufferSize defaultFirstBufferSize b L.empty
366{-# INLINE toLazyByteString #-}
367
368{-
369-- | Pack the chunks of a lazy bytestring into a single strict bytestring.
370packChunks :: L.ByteString -> S.ByteString
371packChunks lbs = do
372    S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs)
373  where
374    copyChunks !L.Empty                         !_pf = return ()
375    copyChunks !(L.Chunk (S.PS fpbuf o l) lbs') !pf  = do
376        withForeignPtr fpbuf $ \pbuf ->
377            copyBytes pf (pbuf `plusPtr` o) l
378        copyChunks lbs' (pf `plusPtr` l)
379
380-- | Run the builder to construct a strict bytestring containing the sequence
381-- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its
382-- chunks to a appropriately sized strict bytestring.
383--
384-- > toByteString = packChunks . toLazyByteString
385--
386-- Note that @'toByteString'@ is a 'Monoid' homomorphism.
387--
388-- > toByteString mempty          == mempty
389-- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y
390--
391-- However, in the second equation, the left-hand-side is generally faster to
392-- execute.
393--
394toByteString :: Builder -> S.ByteString
395toByteString = packChunks . toLazyByteString
396
397
398-- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of
399-- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the
400-- buffer is full.
401--
402-- Compared to 'toLazyByteStringWith' this function requires less allocation,
403-- as the output buffer is only allocated once at the start of the
404-- serialization and whenever something bigger than the current buffer size has
405-- to be copied into the buffer, which should happen very seldomly for the
406-- default buffer size of 32kb. Hence, the pressure on the garbage collector is
407-- reduced, which can be an advantage when building long sequences of bytes.
408--
409toByteStringIOWith :: Int                      -- ^ Buffer size (upper bounds
410                                               -- the number of bytes forced
411                                               -- per call to the 'IO' action).
412                   -> (S.ByteString -> IO ())  -- ^ 'IO' action to execute per
413                                               -- full buffer, which is
414                                               -- referenced by a strict
415                                               -- 'S.ByteString'.
416                   -> Builder                  -- ^ 'Builder' to run.
417                   -> IO ()                    -- ^ Resulting 'IO' action.
418toByteStringIOWith bufSize io (Builder b) =
419    fillNewBuffer bufSize (b finalStep)
420  where
421    finalStep pf _ = return $ Done pf
422
423    fillNewBuffer !size !step0 = do
424        S.mallocByteString size >>= fillBuffer
425      where
426        fillBuffer fpbuf = fill step0
427          where
428            -- safe because the constructed ByteString references the foreign
429            -- pointer AFTER its buffer was filled.
430            pf = unsafeForeignPtrToPtr fpbuf
431            fill !step = do
432                next <- step pf (pf `plusPtr` size)
433                case next of
434                    Done pf' ->
435                        unless (pf' == pf) (io $  S.PS fpbuf 0 (pf' `minusPtr` pf))
436
437                    BufferFull newSize pf' nextStep  -> do
438                        io $ S.PS fpbuf 0 (pf' `minusPtr` pf)
439                        if bufSize < newSize
440                          then fillNewBuffer newSize nextStep
441                          else fill nextStep
442
443                    ModifyChunks  pf' bsk nextStep  -> do
444                        unless (pf' == pf) (io $  S.PS fpbuf 0 (pf' `minusPtr` pf))
445                        -- was: mapM_ io $ L.toChunks (bsk L.empty)
446                        L.foldrChunks (\bs -> (io bs >>)) (return ()) (bsk L.empty)
447                        fill nextStep
448
449-- | Run the builder with a 'defaultBufferSize'd buffer and execute the given
450-- 'IO' action whenever the buffer is full or gets flushed.
451--
452-- @ 'toByteStringIO' = 'toByteStringIOWith' 'defaultBufferSize'@
453--
454-- This is a 'Monoid' homomorphism in the following sense.
455--
456-- > toByteStringIO io mempty          == return ()
457-- > toByteStringIO io (x `mappend` y) == toByteStringIO io x >> toByteStringIO io y
458--
459toByteStringIO :: (S.ByteString -> IO ()) -> Builder -> IO ()
460toByteStringIO = toByteStringIOWith defaultBufferSize
461{-# INLINE toByteStringIO #-}
462
463-}
464