1{-# LANGUAGE CPP, BangPatterns          #-}
2{-# LANGUAGE GeneralizedNewtypeDeriving #-}
3
4------------------------------------------------------------------------------
5-- |
6-- Module:      Blaze.ByteString.Builder
7-- Copyright:   (c) 2013 Leon P Smith
8-- License:     BSD3
9-- Maintainer:  Leon P Smith <leon@melding-monads.com>
10-- Stability:   experimental
11--
12-- "Blaze.ByteString.Builder" is the main module, which you should import as a user
13-- of the @blaze-builder@ library.
14--
15-- > import Blaze.ByteString.Builder
16--
17-- It provides you with a type 'Builder' that allows to efficiently construct
18-- lazy bytestrings with a large average chunk size.
19--
20-- Intuitively, a 'Builder' denotes the construction of a part of a lazy
21-- bytestring. Builders can either be created using one of the primitive
22-- combinators in "Blaze.ByteString.Builder.Write" or by using one of the predefined
23-- combinators for standard Haskell values (see the exposed modules of this
24-- package).  Concatenation of builders is done using 'mappend' from the
25-- 'Monoid' typeclass.
26--
27-- Here is a small example that serializes a list of strings using the UTF-8
28-- encoding.
29--
30-- @ import "Blaze.ByteString.Builder.Char.Utf8"@
31--
32-- > strings :: [String]
33-- > strings = replicate 10000 "Hello there!"
34--
35-- The function @'fromString'@ creates a 'Builder' denoting the UTF-8 encoded
36-- argument. Hence, UTF-8 encoding and concatenating all @strings@ can be done
37-- follows.
38--
39-- > concatenation :: Builder
40-- > concatenation = mconcat $ map fromString strings
41--
42-- The function 'toLazyByteString'  can be used to execute a 'Builder' and
43-- obtain the resulting lazy bytestring.
44--
45-- > result :: L.ByteString
46-- > result = toLazyByteString concatenation
47--
48-- The @result@ is a lazy bytestring containing 10000 repetitions of the string
49-- @\"Hello there!\"@ encoded using UTF-8. The corresponding 120000 bytes are
50-- distributed among three chunks of 32kb and a last chunk of 6kb.
51--
52-- /A note on history./ This serialization library was inspired by the
53-- @Data.Binary.Builder@ module provided by the @binary@ package. It was
54-- originally developed with the specific needs of the @blaze-html@ package in
55-- mind. Since then it has been restructured to serve as a drop-in replacement
56-- for @Data.Binary.Builder@, which it improves upon both in speed as well as
57-- expressivity.
58--
59------------------------------------------------------------------------------
60
61module Blaze.ByteString.Builder
62    (
63      -- * The 'Builder' type
64      B.Builder
65
66      -- * Creating builders
67    , module Blaze.ByteString.Builder.Int
68    , module Blaze.ByteString.Builder.Word
69    , module Blaze.ByteString.Builder.ByteString
70    , B.flush
71
72      -- * Executing builders
73    , B.toLazyByteString
74    , toLazyByteStringWith
75    , toByteString
76    , toByteStringIO
77    , toByteStringIOWith
78
79    -- * 'Write's
80    , W.Write
81    , W.fromWrite
82    , W.fromWriteSingleton
83    , W.fromWriteList
84    , writeToByteString
85
86    -- ** Writing 'Storable's
87    , W.writeStorable
88    , W.fromStorable
89    , W.fromStorables
90
91    ) where
92
93import Control.Monad(unless)
94
95#if __GLASGOW_HASKELL__ >= 702
96import Foreign
97import qualified Foreign.ForeignPtr.Unsafe as Unsafe
98#else
99import Foreign as Unsafe
100#endif
101
102import qualified Blaze.ByteString.Builder.Internal.Write as W
103import           Blaze.ByteString.Builder.ByteString
104import           Blaze.ByteString.Builder.Word
105import           Blaze.ByteString.Builder.Int
106
107import           Data.ByteString.Builder ( Builder )
108import qualified Data.ByteString.Builder       as B
109import qualified Data.ByteString.Builder.Extra as B
110
111import qualified Data.ByteString               as S
112import qualified Data.ByteString.Internal      as S
113import qualified Data.ByteString.Lazy          as L
114import qualified Data.ByteString.Lazy.Internal as L
115
116#if __GLASGOW_HASKELL__ >= 702
117import System.IO.Unsafe (unsafeDupablePerformIO)
118#else
119unsafeDupablePerformIO :: IO a -> a
120unsafeDupablePerformIO = unsafePerformIO
121#endif
122
123withBS :: S.ByteString -> (ForeignPtr Word8 -> Int -> Int -> a) -> a
124#if MIN_VERSION_bytestring(0,11,0)
125withBS (S.BS fptr len) f = f fptr 0 len
126#else
127withBS (S.PS fptr offset len) f = f fptr offset len
128#endif
129
130mkBS :: ForeignPtr Word8 -> Int -> S.ByteString
131#if MIN_VERSION_bytestring(0,11,0)
132mkBS fptr len = S.BS fptr len
133#else
134mkBS fptr len = S.PS fptr 0 len
135#endif
136
137-- | Pack the chunks of a lazy bytestring into a single strict bytestring.
138packChunks :: L.ByteString -> S.ByteString
139packChunks lbs = do
140    S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs)
141  where
142    copyChunks !L.Empty           !_pf = return ()
143    copyChunks !(L.Chunk bs lbs') !pf  = withBS bs $ \fpbuf o l -> do
144        withForeignPtr fpbuf $ \pbuf ->
145            copyBytes pf (pbuf `plusPtr` o) l
146        copyChunks lbs' (pf `plusPtr` l)
147
148-- | Run the builder to construct a strict bytestring containing the sequence
149-- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its
150-- chunks to a appropriately sized strict bytestring.
151--
152-- > toByteString = packChunks . toLazyByteString
153--
154-- Note that @'toByteString'@ is a 'Monoid' homomorphism.
155--
156-- > toByteString mempty          == mempty
157-- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y
158--
159-- However, in the second equation, the left-hand-side is generally faster to
160-- execute.
161--
162toByteString :: Builder -> S.ByteString
163toByteString = packChunks . B.toLazyByteString
164
165-- | Default size (~32kb) for the buffer that becomes a chunk of the output
166-- stream once it is filled.
167--
168defaultBufferSize :: Int
169defaultBufferSize = 32 * 1024 - overhead -- Copied from Data.ByteString.Lazy.
170    where overhead = 2 * sizeOf (undefined :: Int)
171
172
173-- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of
174-- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the
175-- buffer is full.
176--
177-- Compared to 'toLazyByteStringWith' this function requires less allocation,
178-- as the output buffer is only allocated once at the start of the
179-- serialization and whenever something bigger than the current buffer size has
180-- to be copied into the buffer, which should happen very seldomly for the
181-- default buffer size of 32kb. Hence, the pressure on the garbage collector is
182-- reduced, which can be an advantage when building long sequences of bytes.
183--
184toByteStringIO :: (S.ByteString -> IO ()) -> Builder -> IO ()
185toByteStringIO = toByteStringIOWith defaultBufferSize
186
187toByteStringIOWith :: Int                      -- ^ Buffer size (upper bounds
188                                               -- the number of bytes forced
189                                               -- per call to the 'IO' action).
190                   -> (S.ByteString -> IO ())  -- ^ 'IO' action to execute per
191                                               -- full buffer, which is
192                                               -- referenced by a strict
193                                               -- 'S.ByteString'.
194                   -> Builder                -- ^ 'Builder' to run.
195                   -> IO ()                    -- ^ Resulting 'IO' action.
196toByteStringIOWith !bufSize io builder = do
197    S.mallocByteString bufSize >>= getBuffer (B.runBuilder builder) bufSize
198  where
199    getBuffer writer !size fp = do
200      let !ptr = Unsafe.unsafeForeignPtrToPtr fp
201      (bytes, next) <- writer ptr size
202      case next of
203        B.Done -> io $! mkBS fp bytes
204        B.More req writer' -> do
205           io $! mkBS fp bytes
206           let !size' = max bufSize req
207           S.mallocByteString size' >>= getBuffer writer' size'
208        B.Chunk bs' writer' -> do
209           if bytes > 0
210             then do
211               io $! mkBS fp bytes
212               unless (S.null bs') (io bs')
213               S.mallocByteString bufSize >>= getBuffer writer' bufSize
214             else do
215               unless (S.null bs') (io bs')
216               getBuffer writer' size fp
217
218
219-- | Run a 'Builder' with the given buffer sizes.
220--
221-- Use this function for integrating the 'Builder' type with other libraries
222-- that generate lazy bytestrings.
223--
224-- Note that the builders should guarantee that on average the desired chunk
225-- size is attained. Builders may decide to start a new buffer and not
226-- completely fill the existing buffer, if this is faster. However, they should
227-- not spill too much of the buffer, if they cannot compensate for it.
228--
229-- FIXME: Note that the following paragraphs are not entirely correct as of
230-- blaze-builder-0.4:
231--
232-- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate
233-- a lazy bytestring according to the following strategy. First, we allocate
234-- a buffer of size @firstBufSize@ and start filling it. If it overflows, we
235-- allocate a buffer of size @minBufSize@ and copy the first buffer to it in
236-- order to avoid generating a too small chunk. Finally, every next buffer will
237-- be of size @bufSize@. This, slow startup strategy is required to achieve
238-- good speed for short (<200 bytes) resulting bytestrings, as for them the
239-- allocation cost is of a large buffer cannot be compensated. Moreover, this
240-- strategy also allows us to avoid spilling too much memory for short
241-- resulting bytestrings.
242--
243-- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer
244-- is no longer copied but allocated and filled directly. Hence, setting
245-- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer
246-- of size @bufSize@. This is recommended, if you know that you always output
247-- more than @minBufSize@ bytes.
248toLazyByteStringWith
249    :: Int           -- ^ Buffer size (upper-bounds the resulting chunk size).
250    -> Int           -- ^ This parameter is ignored as of blaze-builder-0.4
251    -> Int           -- ^ Size of the first buffer to be used and copied for
252                     -- larger resulting sequences
253    -> Builder       -- ^ Builder to run.
254    -> L.ByteString  -- ^ Lazy bytestring to output after the builder is
255                     -- finished.
256    -> L.ByteString  -- ^ Resulting lazy bytestring
257toLazyByteStringWith bufSize _minBufSize firstBufSize builder k =
258    B.toLazyByteStringWith (B.safeStrategy firstBufSize bufSize) k builder
259
260-- | Run a 'Write' to produce a strict 'S.ByteString'.
261-- This is equivalent to @('toByteString' . 'fromWrite')@, but is more
262-- efficient because it uses just one appropriately-sized buffer.
263writeToByteString :: W.Write -> S.ByteString
264writeToByteString !w = unsafeDupablePerformIO $ do
265    fptr <- S.mallocByteString (W.getBound w)
266    len <- withForeignPtr fptr $ \ptr -> do
267        end <- W.runWrite w ptr
268        return $! end `minusPtr` ptr
269    return $! S.fromForeignPtr fptr 0 len
270{-# INLINE writeToByteString #-}
271