1{-# LANGUAGE CPP, BangPatterns #-} 2-- | 3-- Module : BuilderBufferRange 4-- Copyright : (c) 2010 Simon Meier 5-- License : BSD3-style (see LICENSE) 6-- 7-- Maintainer : Leon P Smith <leon@melding-monads.com> 8-- Stability : experimental 9-- Portability : tested on GHC only 10-- 11-- Benchmark the benefit of using a packed representation for the buffer range. 12-- 13module BuilderBufferRange where 14 15 16import Foreign 17import Data.Monoid 18import Control.Monad (unless) 19import qualified Data.ByteString as S 20import qualified Data.ByteString.Lazy as L 21 22#ifdef BYTESTRING_IN_BASE 23import Data.ByteString.Base (inlinePerformIO) 24import qualified Data.ByteString.Base as S 25import qualified Data.ByteString.Lazy.Base as L -- FIXME: is this the right module for access to 'Chunks'? 26#else 27import Data.ByteString.Internal (inlinePerformIO) 28import qualified Data.ByteString.Internal as S 29import qualified Data.ByteString.Lazy.Internal as L 30#endif 31 32import qualified Blaze.ByteString.Builder.Internal as B 33import Blaze.ByteString.Builder.Write 34import Blaze.ByteString.Builder.Word 35 36import Criterion.Main 37 38------------------------------------------------------------------------------ 39-- Benchmarks 40------------------------------------------------------------------------------ 41 42main :: IO () 43main = defaultMain $ concat 44 [ benchmark "putBuilder" 45 (putBuilder . mconcat . map fromWord8) 46 (mconcat . map fromWord8) 47 word8s 48 , benchmark "fromWriteSingleton" 49 (mconcat . map putWord8) 50 (mconcat . map fromWord8) 51 word8s 52 , benchmark "fromWrite" 53 (mconcat . map (putWrite . writeWord8)) 54 (mconcat . map (fromWrite . writeWord8)) 55 word8s 56 ] 57 where 58 benchmark name putF builderF x = 59 [ bench (name ++ " Put") $ 60 whnf (L.length . toLazyByteString . putF) x 61 , bench (name ++ " Builder") $ 62 whnf (L.length . B.toLazyByteString . builderF) x 63 ] 64 65word8s :: [Word8] 66word8s = take 100000 $ cycle [0..] 67{-# NOINLINE word8s #-} 68 69 70------------------------------------------------------------------------------ 71-- The Builder type 72------------------------------------------------------------------------------ 73 74data BufferRange = BR {-# UNPACK #-} !(Ptr Word8) 75 {-# UNPACK #-} !(Ptr Word8) 76 77newtype Put = Put (PutStep -> PutStep) 78 79data PutSignal = 80 Done {-# UNPACK #-} !(Ptr Word8) 81 | BufferFull 82 {-# UNPACK #-} !Int 83 {-# UNPACK #-} !(Ptr Word8) 84 !PutStep 85 | ModifyChunks 86 {-# UNPACK #-} !(Ptr Word8) 87 !(L.ByteString -> L.ByteString) 88 !PutStep 89 90type PutStep = BufferRange -> IO PutSignal 91 92instance Monoid Put where 93 mempty = Put id 94 {-# INLINE mempty #-} 95 (Put p1) `mappend` (Put p2) = Put $ p1 . p2 96 {-# INLINE mappend #-} 97 mconcat = foldr mappend mempty 98 {-# INLINE mconcat #-} 99 100putWrite :: Write -> Put 101putWrite (Write size io) = 102 Put step 103 where 104 step k (BR pf pe) 105 | pf `plusPtr` size <= pe = do 106 io pf 107 let !br' = BR (pf `plusPtr` size) pe 108 k br' 109 | otherwise = return $ BufferFull size pf (step k) 110{-# INLINE putWrite #-} 111 112putWriteSingleton :: (a -> Write) -> a -> Put 113putWriteSingleton write = 114 mkPut 115 where 116 mkPut x = Put step 117 where 118 step k (BR pf pe) 119 | pf `plusPtr` size <= pe = do 120 io pf 121 let !br' = BR (pf `plusPtr` size) pe 122 k br' 123 | otherwise = return $ BufferFull size pf (step k) 124 where 125 Write size io = write x 126{-# INLINE putWriteSingleton #-} 127 128putBuilder :: B.Builder -> Put 129putBuilder (B.Builder b) = 130 Put step 131 where 132 finalStep _ pf = return $ B.Done pf 133 134 step k = go (b finalStep) 135 where 136 go buildStep (BR pf pe) = do 137 signal <- buildStep pf pe 138 case signal of 139 B.Done pf' -> do 140 let !br' = BR pf' pe 141 k br' 142 B.BufferFull minSize pf' nextBuildStep -> 143 return $ BufferFull minSize pf' (go nextBuildStep) 144 B.ModifyChunks _ _ _ -> 145 error "putBuilder: ModifyChunks not implemented" 146 147putWord8 :: Word8 -> Put 148putWord8 = putWriteSingleton writeWord8 149 150{- 151 m >>= f = GetC $ \done empty pe -> 152 runGetC m (\pr' x -> runGetC (f x) done empty pe pr') 153 (\m' -> empty (m' >>= f)) 154 pe 155 156 157newtype GetC r a = GetC { 158 runGetC :: 159 (Ptr Word8 -> a -> IO r) -> -- done 160 (GetC r a -> IO r ) -> -- empty buffer 161 Ptr Word8 -> -- end of buffer 162 Ptr Word8 -> -- next byte to read 163 IO r 164 } 165 166instance Functor (GetC r) where 167 fmap f g = GetC $ \done empty -> 168 runGetC g (\pr' x -> done pr' (f x)) 169 (\g' -> empty (fmap f g')) 170 171instance Monad (GetC r) where 172 return x = GetC $ \done _ _ pr -> done pr x 173 m >>= f = GetC $ \done empty pe -> 174 runGetC m (\pr' x -> runGetC (f x) done empty pe pr') 175 (\m' -> empty (m' >>= f)) 176 pe 177 178-} 179 180------------------------------------------------------------------------------ 181-- Internal global constants. 182------------------------------------------------------------------------------ 183 184-- | Default size (~32kb) for the buffer that becomes a chunk of the output 185-- stream once it is filled. 186-- 187defaultBufferSize :: Int 188defaultBufferSize = 32 * 1024 - overhead -- Copied from Data.ByteString.Lazy. 189 where overhead = 2 * sizeOf (undefined :: Int) 190 191-- | The minimal length (~4kb) a buffer must have before filling it and 192-- outputting it as a chunk of the output stream. 193-- 194-- This size determines when a buffer is spilled after a 'flush' or a direct 195-- bytestring insertion. It is also the size of the first chunk generated by 196-- 'toLazyByteString'. 197defaultMinimalBufferSize :: Int 198defaultMinimalBufferSize = 4 * 1024 - overhead 199 where overhead = 2 * sizeOf (undefined :: Int) 200 201-- | The default length (64) for the first buffer to be allocated when 202-- converting a 'Builder' to a lazy bytestring. 203-- 204-- See 'toLazyByteStringWith' for further explanation. 205defaultFirstBufferSize :: Int 206defaultFirstBufferSize = 64 207 208-- | The maximal number of bytes for that copying is cheaper than direct 209-- insertion into the output stream. This takes into account the fragmentation 210-- that may occur in the output buffer due to the early 'flush' implied by the 211-- direct bytestring insertion. 212-- 213-- @'defaultMaximalCopySize' = 2 * 'defaultMinimalBufferSize'@ 214-- 215defaultMaximalCopySize :: Int 216defaultMaximalCopySize = 2 * defaultMinimalBufferSize 217 218------------------------------------------------------------------------------ 219-- Flushing and running a Builder 220------------------------------------------------------------------------------ 221 222 223-- | Output all data written in the current buffer and start a new chunk. 224-- 225-- The use uf this function depends on how the resulting bytestrings are 226-- consumed. 'flush' is possibly not very useful in non-interactive scenarios. 227-- However, it is kept for compatibility with the builder provided by 228-- Data.Binary.Builder. 229-- 230-- When using 'toLazyByteString' to extract a lazy 'L.ByteString' from a 231-- 'Builder', this means that a new chunk will be started in the resulting lazy 232-- 'L.ByteString'. The remaining part of the buffer is spilled, if the 233-- reamining free space is smaller than the minimal desired buffer size. 234-- 235{- 236flush :: Builder 237flush = Builder $ \k pf _ -> return $ ModifyChunks pf id k 238-} 239 240-- | Run a 'Builder' with the given buffer sizes. 241-- 242-- Use this function for integrating the 'Builder' type with other libraries 243-- that generate lazy bytestrings. 244-- 245-- Note that the builders should guarantee that on average the desired chunk 246-- size is attained. Builders may decide to start a new buffer and not 247-- completely fill the existing buffer, if this is faster. However, they should 248-- not spill too much of the buffer, if they cannot compensate for it. 249-- 250-- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate 251-- a lazy bytestring according to the following strategy. First, we allocate 252-- a buffer of size @firstBufSize@ and start filling it. If it overflows, we 253-- allocate a buffer of size @minBufSize@ and copy the first buffer to it in 254-- order to avoid generating a too small chunk. Finally, every next buffer will 255-- be of size @bufSize@. This, slow startup strategy is required to achieve 256-- good speed for short (<200 bytes) resulting bytestrings, as for them the 257-- allocation cost is of a large buffer cannot be compensated. Moreover, this 258-- strategy also allows us to avoid spilling too much memory for short 259-- resulting bytestrings. 260-- 261-- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer 262-- is no longer copied but allocated and filled directly. Hence, setting 263-- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer 264-- of size @bufSize@. This is recommended, if you know that you always output 265-- more than @minBufSize@ bytes. 266toLazyByteStringWith 267 :: Int -- ^ Buffer size (upper-bounds the resulting chunk size). 268 -> Int -- ^ Minimal free buffer space for continuing filling 269 -- the same buffer after a 'flush' or a direct bytestring 270 -- insertion. This corresponds to the minimal desired 271 -- chunk size. 272 -> Int -- ^ Size of the first buffer to be used and copied for 273 -- larger resulting sequences 274 -> Put -- ^ Builder to run. 275 -> L.ByteString -- ^ Lazy bytestring to output after the builder is 276 -- finished. 277 -> L.ByteString -- ^ Resulting lazy bytestring 278toLazyByteStringWith bufSize minBufSize firstBufSize (Put b) k = 279 inlinePerformIO $ fillFirstBuffer (b finalStep) 280 where 281 finalStep (BR pf _) = return $ Done pf 282 -- fill a first very small buffer, if we need more space then copy it 283 -- to the new buffer of size 'minBufSize'. This way we don't pay the 284 -- allocation cost of the big 'bufSize' buffer, when outputting only 285 -- small sequences. 286 fillFirstBuffer !step0 287 | minBufSize <= firstBufSize = fillNewBuffer firstBufSize step0 288 | otherwise = do 289 fpbuf <- S.mallocByteString firstBufSize 290 withForeignPtr fpbuf $ \pf -> do 291 let !br = BR pf (pf `plusPtr` firstBufSize) 292 mkbs pf' = S.PS fpbuf 0 (pf' `minusPtr` pf) 293 {-# INLINE mkbs #-} 294 next <- step0 br 295 case next of 296 Done pf' 297 | pf' == pf -> return k 298 | otherwise -> return $ L.Chunk (mkbs pf') k 299 300 BufferFull newSize pf' nextStep -> do 301 let !l = pf' `minusPtr` pf 302 fillNewBuffer (max (l + newSize) minBufSize) $ 303 \(BR pfNew peNew) -> do 304 copyBytes pfNew pf l 305 let !brNew = BR (pfNew `plusPtr` l) peNew 306 nextStep brNew 307 308 ModifyChunks pf' bsk nextStep 309 | pf' == pf -> 310 return $ bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep) 311 | otherwise -> 312 return $ L.Chunk (mkbs pf') 313 (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep)) 314 315 -- allocate and fill a new buffer 316 fillNewBuffer !size !step0 = do 317 fpbuf <- S.mallocByteString size 318 withForeignPtr fpbuf $ fillBuffer fpbuf 319 where 320 fillBuffer fpbuf !pbuf = fill pbuf step0 321 where 322 !pe = pbuf `plusPtr` size 323 fill !pf !step = do 324 let !br = BR pf pe 325 next <- step br 326 let mkbs pf' = S.PS fpbuf (pf `minusPtr` pbuf) (pf' `minusPtr` pf) 327 {-# INLINE mkbs #-} 328 case next of 329 Done pf' 330 | pf' == pf -> return k 331 | otherwise -> return $ L.Chunk (mkbs pf') k 332 333 BufferFull newSize pf' nextStep -> 334 return $ L.Chunk (mkbs pf') 335 (inlinePerformIO $ 336 fillNewBuffer (max newSize bufSize) nextStep) 337 338 ModifyChunks pf' bsk nextStep 339 | pf' == pf -> 340 return $ bsk (inlinePerformIO $ fill pf' nextStep) 341 | minBufSize < pe `minusPtr` pf' -> 342 return $ L.Chunk (mkbs pf') 343 (bsk (inlinePerformIO $ fill pf' nextStep)) 344 | otherwise -> 345 return $ L.Chunk (mkbs pf') 346 (bsk (inlinePerformIO $ fillNewBuffer bufSize nextStep)) 347 348 349-- | Extract the lazy 'L.ByteString' from the builder by running it with default 350-- buffer sizes. Use this function, if you do not have any special 351-- considerations with respect to buffer sizes. 352-- 353-- @ 'toLazyByteString' b = 'toLazyByteStringWith' 'defaultBufferSize' 'defaultMinimalBufferSize' 'defaultFirstBufferSize' b L.empty@ 354-- 355-- Note that @'toLazyByteString'@ is a 'Monoid' homomorphism. 356-- 357-- > toLazyByteString mempty == mempty 358-- > toLazyByteString (x `mappend` y) == toLazyByteString x `mappend` toLazyByteString y 359-- 360-- However, in the second equation, the left-hand-side is generally faster to 361-- execute. 362-- 363toLazyByteString :: Put -> L.ByteString 364toLazyByteString b = toLazyByteStringWith 365 defaultBufferSize defaultMinimalBufferSize defaultFirstBufferSize b L.empty 366{-# INLINE toLazyByteString #-} 367 368{- 369-- | Pack the chunks of a lazy bytestring into a single strict bytestring. 370packChunks :: L.ByteString -> S.ByteString 371packChunks lbs = do 372 S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs) 373 where 374 copyChunks !L.Empty !_pf = return () 375 copyChunks !(L.Chunk (S.PS fpbuf o l) lbs') !pf = do 376 withForeignPtr fpbuf $ \pbuf -> 377 copyBytes pf (pbuf `plusPtr` o) l 378 copyChunks lbs' (pf `plusPtr` l) 379 380-- | Run the builder to construct a strict bytestring containing the sequence 381-- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its 382-- chunks to a appropriately sized strict bytestring. 383-- 384-- > toByteString = packChunks . toLazyByteString 385-- 386-- Note that @'toByteString'@ is a 'Monoid' homomorphism. 387-- 388-- > toByteString mempty == mempty 389-- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y 390-- 391-- However, in the second equation, the left-hand-side is generally faster to 392-- execute. 393-- 394toByteString :: Builder -> S.ByteString 395toByteString = packChunks . toLazyByteString 396 397 398-- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of 399-- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the 400-- buffer is full. 401-- 402-- Compared to 'toLazyByteStringWith' this function requires less allocation, 403-- as the output buffer is only allocated once at the start of the 404-- serialization and whenever something bigger than the current buffer size has 405-- to be copied into the buffer, which should happen very seldomly for the 406-- default buffer size of 32kb. Hence, the pressure on the garbage collector is 407-- reduced, which can be an advantage when building long sequences of bytes. 408-- 409toByteStringIOWith :: Int -- ^ Buffer size (upper bounds 410 -- the number of bytes forced 411 -- per call to the 'IO' action). 412 -> (S.ByteString -> IO ()) -- ^ 'IO' action to execute per 413 -- full buffer, which is 414 -- referenced by a strict 415 -- 'S.ByteString'. 416 -> Builder -- ^ 'Builder' to run. 417 -> IO () -- ^ Resulting 'IO' action. 418toByteStringIOWith bufSize io (Builder b) = 419 fillNewBuffer bufSize (b finalStep) 420 where 421 finalStep pf _ = return $ Done pf 422 423 fillNewBuffer !size !step0 = do 424 S.mallocByteString size >>= fillBuffer 425 where 426 fillBuffer fpbuf = fill step0 427 where 428 -- safe because the constructed ByteString references the foreign 429 -- pointer AFTER its buffer was filled. 430 pf = unsafeForeignPtrToPtr fpbuf 431 fill !step = do 432 next <- step pf (pf `plusPtr` size) 433 case next of 434 Done pf' -> 435 unless (pf' == pf) (io $ S.PS fpbuf 0 (pf' `minusPtr` pf)) 436 437 BufferFull newSize pf' nextStep -> do 438 io $ S.PS fpbuf 0 (pf' `minusPtr` pf) 439 if bufSize < newSize 440 then fillNewBuffer newSize nextStep 441 else fill nextStep 442 443 ModifyChunks pf' bsk nextStep -> do 444 unless (pf' == pf) (io $ S.PS fpbuf 0 (pf' `minusPtr` pf)) 445 -- was: mapM_ io $ L.toChunks (bsk L.empty) 446 L.foldrChunks (\bs -> (io bs >>)) (return ()) (bsk L.empty) 447 fill nextStep 448 449-- | Run the builder with a 'defaultBufferSize'd buffer and execute the given 450-- 'IO' action whenever the buffer is full or gets flushed. 451-- 452-- @ 'toByteStringIO' = 'toByteStringIOWith' 'defaultBufferSize'@ 453-- 454-- This is a 'Monoid' homomorphism in the following sense. 455-- 456-- > toByteStringIO io mempty == return () 457-- > toByteStringIO io (x `mappend` y) == toByteStringIO io x >> toByteStringIO io y 458-- 459toByteStringIO :: (S.ByteString -> IO ()) -> Builder -> IO () 460toByteStringIO = toByteStringIOWith defaultBufferSize 461{-# INLINE toByteStringIO #-} 462 463-} 464