1{-# LANGUAGE CPP, BangPatterns #-} 2{-# LANGUAGE GeneralizedNewtypeDeriving #-} 3 4------------------------------------------------------------------------------ 5-- | 6-- Module: Blaze.ByteString.Builder 7-- Copyright: (c) 2013 Leon P Smith 8-- License: BSD3 9-- Maintainer: Leon P Smith <leon@melding-monads.com> 10-- Stability: experimental 11-- 12-- "Blaze.ByteString.Builder" is the main module, which you should import as a user 13-- of the @blaze-builder@ library. 14-- 15-- > import Blaze.ByteString.Builder 16-- 17-- It provides you with a type 'Builder' that allows to efficiently construct 18-- lazy bytestrings with a large average chunk size. 19-- 20-- Intuitively, a 'Builder' denotes the construction of a part of a lazy 21-- bytestring. Builders can either be created using one of the primitive 22-- combinators in "Blaze.ByteString.Builder.Write" or by using one of the predefined 23-- combinators for standard Haskell values (see the exposed modules of this 24-- package). Concatenation of builders is done using 'mappend' from the 25-- 'Monoid' typeclass. 26-- 27-- Here is a small example that serializes a list of strings using the UTF-8 28-- encoding. 29-- 30-- @ import "Blaze.ByteString.Builder.Char.Utf8"@ 31-- 32-- > strings :: [String] 33-- > strings = replicate 10000 "Hello there!" 34-- 35-- The function @'fromString'@ creates a 'Builder' denoting the UTF-8 encoded 36-- argument. Hence, UTF-8 encoding and concatenating all @strings@ can be done 37-- follows. 38-- 39-- > concatenation :: Builder 40-- > concatenation = mconcat $ map fromString strings 41-- 42-- The function 'toLazyByteString' can be used to execute a 'Builder' and 43-- obtain the resulting lazy bytestring. 44-- 45-- > result :: L.ByteString 46-- > result = toLazyByteString concatenation 47-- 48-- The @result@ is a lazy bytestring containing 10000 repetitions of the string 49-- @\"Hello there!\"@ encoded using UTF-8. The corresponding 120000 bytes are 50-- distributed among three chunks of 32kb and a last chunk of 6kb. 51-- 52-- /A note on history./ This serialization library was inspired by the 53-- @Data.Binary.Builder@ module provided by the @binary@ package. It was 54-- originally developed with the specific needs of the @blaze-html@ package in 55-- mind. Since then it has been restructured to serve as a drop-in replacement 56-- for @Data.Binary.Builder@, which it improves upon both in speed as well as 57-- expressivity. 58-- 59------------------------------------------------------------------------------ 60 61module Blaze.ByteString.Builder 62 ( 63 -- * The 'Builder' type 64 B.Builder 65 66 -- * Creating builders 67 , module Blaze.ByteString.Builder.Int 68 , module Blaze.ByteString.Builder.Word 69 , module Blaze.ByteString.Builder.ByteString 70 , B.flush 71 72 -- * Executing builders 73 , B.toLazyByteString 74 , toLazyByteStringWith 75 , toByteString 76 , toByteStringIO 77 , toByteStringIOWith 78 79 -- * 'Write's 80 , W.Write 81 , W.fromWrite 82 , W.fromWriteSingleton 83 , W.fromWriteList 84 , writeToByteString 85 86 -- ** Writing 'Storable's 87 , W.writeStorable 88 , W.fromStorable 89 , W.fromStorables 90 91 ) where 92 93import Control.Monad(unless) 94 95#if __GLASGOW_HASKELL__ >= 702 96import Foreign 97import qualified Foreign.ForeignPtr.Unsafe as Unsafe 98#else 99import Foreign as Unsafe 100#endif 101 102import qualified Blaze.ByteString.Builder.Internal.Write as W 103import Blaze.ByteString.Builder.ByteString 104import Blaze.ByteString.Builder.Word 105import Blaze.ByteString.Builder.Int 106 107import Data.ByteString.Builder ( Builder ) 108import qualified Data.ByteString.Builder as B 109import qualified Data.ByteString.Builder.Extra as B 110 111import qualified Data.ByteString as S 112import qualified Data.ByteString.Internal as S 113import qualified Data.ByteString.Lazy as L 114import qualified Data.ByteString.Lazy.Internal as L 115 116#if __GLASGOW_HASKELL__ >= 702 117import System.IO.Unsafe (unsafeDupablePerformIO) 118#else 119unsafeDupablePerformIO :: IO a -> a 120unsafeDupablePerformIO = unsafePerformIO 121#endif 122 123withBS :: S.ByteString -> (ForeignPtr Word8 -> Int -> Int -> a) -> a 124#if MIN_VERSION_bytestring(0,11,0) 125withBS (S.BS fptr len) f = f fptr 0 len 126#else 127withBS (S.PS fptr offset len) f = f fptr offset len 128#endif 129 130mkBS :: ForeignPtr Word8 -> Int -> S.ByteString 131#if MIN_VERSION_bytestring(0,11,0) 132mkBS fptr len = S.BS fptr len 133#else 134mkBS fptr len = S.PS fptr 0 len 135#endif 136 137-- | Pack the chunks of a lazy bytestring into a single strict bytestring. 138packChunks :: L.ByteString -> S.ByteString 139packChunks lbs = do 140 S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs) 141 where 142 copyChunks !L.Empty !_pf = return () 143 copyChunks !(L.Chunk bs lbs') !pf = withBS bs $ \fpbuf o l -> do 144 withForeignPtr fpbuf $ \pbuf -> 145 copyBytes pf (pbuf `plusPtr` o) l 146 copyChunks lbs' (pf `plusPtr` l) 147 148-- | Run the builder to construct a strict bytestring containing the sequence 149-- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its 150-- chunks to a appropriately sized strict bytestring. 151-- 152-- > toByteString = packChunks . toLazyByteString 153-- 154-- Note that @'toByteString'@ is a 'Monoid' homomorphism. 155-- 156-- > toByteString mempty == mempty 157-- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y 158-- 159-- However, in the second equation, the left-hand-side is generally faster to 160-- execute. 161-- 162toByteString :: Builder -> S.ByteString 163toByteString = packChunks . B.toLazyByteString 164 165-- | Default size (~32kb) for the buffer that becomes a chunk of the output 166-- stream once it is filled. 167-- 168defaultBufferSize :: Int 169defaultBufferSize = 32 * 1024 - overhead -- Copied from Data.ByteString.Lazy. 170 where overhead = 2 * sizeOf (undefined :: Int) 171 172 173-- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of 174-- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the 175-- buffer is full. 176-- 177-- Compared to 'toLazyByteStringWith' this function requires less allocation, 178-- as the output buffer is only allocated once at the start of the 179-- serialization and whenever something bigger than the current buffer size has 180-- to be copied into the buffer, which should happen very seldomly for the 181-- default buffer size of 32kb. Hence, the pressure on the garbage collector is 182-- reduced, which can be an advantage when building long sequences of bytes. 183-- 184toByteStringIO :: (S.ByteString -> IO ()) -> Builder -> IO () 185toByteStringIO = toByteStringIOWith defaultBufferSize 186 187toByteStringIOWith :: Int -- ^ Buffer size (upper bounds 188 -- the number of bytes forced 189 -- per call to the 'IO' action). 190 -> (S.ByteString -> IO ()) -- ^ 'IO' action to execute per 191 -- full buffer, which is 192 -- referenced by a strict 193 -- 'S.ByteString'. 194 -> Builder -- ^ 'Builder' to run. 195 -> IO () -- ^ Resulting 'IO' action. 196toByteStringIOWith !bufSize io builder = do 197 S.mallocByteString bufSize >>= getBuffer (B.runBuilder builder) bufSize 198 where 199 getBuffer writer !size fp = do 200 let !ptr = Unsafe.unsafeForeignPtrToPtr fp 201 (bytes, next) <- writer ptr size 202 case next of 203 B.Done -> io $! mkBS fp bytes 204 B.More req writer' -> do 205 io $! mkBS fp bytes 206 let !size' = max bufSize req 207 S.mallocByteString size' >>= getBuffer writer' size' 208 B.Chunk bs' writer' -> do 209 if bytes > 0 210 then do 211 io $! mkBS fp bytes 212 unless (S.null bs') (io bs') 213 S.mallocByteString bufSize >>= getBuffer writer' bufSize 214 else do 215 unless (S.null bs') (io bs') 216 getBuffer writer' size fp 217 218 219-- | Run a 'Builder' with the given buffer sizes. 220-- 221-- Use this function for integrating the 'Builder' type with other libraries 222-- that generate lazy bytestrings. 223-- 224-- Note that the builders should guarantee that on average the desired chunk 225-- size is attained. Builders may decide to start a new buffer and not 226-- completely fill the existing buffer, if this is faster. However, they should 227-- not spill too much of the buffer, if they cannot compensate for it. 228-- 229-- FIXME: Note that the following paragraphs are not entirely correct as of 230-- blaze-builder-0.4: 231-- 232-- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate 233-- a lazy bytestring according to the following strategy. First, we allocate 234-- a buffer of size @firstBufSize@ and start filling it. If it overflows, we 235-- allocate a buffer of size @minBufSize@ and copy the first buffer to it in 236-- order to avoid generating a too small chunk. Finally, every next buffer will 237-- be of size @bufSize@. This, slow startup strategy is required to achieve 238-- good speed for short (<200 bytes) resulting bytestrings, as for them the 239-- allocation cost is of a large buffer cannot be compensated. Moreover, this 240-- strategy also allows us to avoid spilling too much memory for short 241-- resulting bytestrings. 242-- 243-- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer 244-- is no longer copied but allocated and filled directly. Hence, setting 245-- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer 246-- of size @bufSize@. This is recommended, if you know that you always output 247-- more than @minBufSize@ bytes. 248toLazyByteStringWith 249 :: Int -- ^ Buffer size (upper-bounds the resulting chunk size). 250 -> Int -- ^ This parameter is ignored as of blaze-builder-0.4 251 -> Int -- ^ Size of the first buffer to be used and copied for 252 -- larger resulting sequences 253 -> Builder -- ^ Builder to run. 254 -> L.ByteString -- ^ Lazy bytestring to output after the builder is 255 -- finished. 256 -> L.ByteString -- ^ Resulting lazy bytestring 257toLazyByteStringWith bufSize _minBufSize firstBufSize builder k = 258 B.toLazyByteStringWith (B.safeStrategy firstBufSize bufSize) k builder 259 260-- | Run a 'Write' to produce a strict 'S.ByteString'. 261-- This is equivalent to @('toByteString' . 'fromWrite')@, but is more 262-- efficient because it uses just one appropriately-sized buffer. 263writeToByteString :: W.Write -> S.ByteString 264writeToByteString !w = unsafeDupablePerformIO $ do 265 fptr <- S.mallocByteString (W.getBound w) 266 len <- withForeignPtr fptr $ \ptr -> do 267 end <- W.runWrite w ptr 268 return $! end `minusPtr` ptr 269 return $! S.fromForeignPtr fptr 0 len 270{-# INLINE writeToByteString #-} 271