1{-# LANGUAGE BangPatterns #-}
2-- |
3-- Module      :  Data.Attoparsec.ByteString.Buffer
4-- Copyright   :  Bryan O'Sullivan 2007-2015
5-- License     :  BSD3
6--
7-- Maintainer  :  bos@serpentine.com
8-- Stability   :  experimental
9-- Portability :  GHC
10--
11-- An "immutable" buffer that supports cheap appends.
12--
13-- A Buffer is divided into an immutable read-only zone, followed by a
14-- mutable area that we've preallocated, but not yet written to.
15--
16-- We overallocate at the end of a Buffer so that we can cheaply
17-- append.  Since a user of an existing Buffer cannot see past the end
18-- of its immutable zone into the data that will change during an
19-- append, this is safe.
20--
21-- Once we run out of space at the end of a Buffer, we do the usual
22-- doubling of the buffer size.
23--
24-- The fact of having a mutable buffer really helps with performance,
25-- but it does have a consequence: if someone misuses the Partial API
26-- that attoparsec uses by calling the same continuation repeatedly
27-- (which never makes sense in practice), they could overwrite data.
28--
29-- Since the API *looks* pure, it should *act* pure, too, so we use
30-- two generation counters (one mutable, one immutable) to track the
31-- number of appends to a mutable buffer. If the counters ever get out
32-- of sync, someone is appending twice to a mutable buffer, so we
33-- duplicate the entire buffer in order to preserve the immutability
34-- of its older self.
35--
36-- While we could go a step further and gain protection against API
37-- abuse on a multicore system, by use of an atomic increment
38-- instruction to bump the mutable generation counter, that would be
39-- very expensive, and feels like it would also be in the realm of the
40-- ridiculous.  Clients should never call a continuation more than
41-- once; we lack a linear type system that could enforce this; and
42-- there's only so far we should go to accommodate broken uses.
43
44module Data.Attoparsec.ByteString.Buffer
45    (
46      Buffer
47    , buffer
48    , unbuffer
49    , pappend
50    , length
51    , unsafeIndex
52    , substring
53    , unsafeDrop
54    ) where
55
56import Control.Exception (assert)
57import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr)
58import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO)
59import Data.Attoparsec.Internal.Compat
60import Data.List (foldl1')
61import Data.Monoid as Mon (Monoid(..))
62import Data.Semigroup (Semigroup(..))
63import Data.Word (Word8)
64import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
65import Foreign.Ptr (castPtr, plusPtr)
66import Foreign.Storable (peek, peekByteOff, poke, sizeOf)
67import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
68import Prelude hiding (length)
69
70-- If _cap is zero, this buffer is empty.
71data Buffer = Buf {
72      _fp  :: {-# UNPACK #-} !(ForeignPtr Word8)
73    , _off :: {-# UNPACK #-} !Int
74    , _len :: {-# UNPACK #-} !Int
75    , _cap :: {-# UNPACK #-} !Int
76    , _gen :: {-# UNPACK #-} !Int
77    }
78
79instance Show Buffer where
80    showsPrec p = showsPrec p . unbuffer
81
82-- | The initial 'Buffer' has no mutable zone, so we can avoid all
83-- copies in the (hopefully) common case of no further input being fed
84-- to us.
85buffer :: ByteString -> Buffer
86buffer bs = withPS bs $ \fp off len -> Buf fp off len len 0
87
88unbuffer :: Buffer -> ByteString
89unbuffer (Buf fp off len _ _) = mkPS fp off len
90
91instance Semigroup Buffer where
92    (Buf _ _ _ 0 _) <> b                    = b
93    a               <> (Buf _ _ _ 0 _)      = a
94    buf             <> (Buf fp off len _ _) = append buf fp off len
95
96instance Monoid Buffer where
97    mempty = Buf nullForeignPtr 0 0 0 0
98
99    mappend = (<>)
100
101    mconcat [] = Mon.mempty
102    mconcat xs = foldl1' mappend xs
103
104pappend :: Buffer -> ByteString -> Buffer
105pappend (Buf _ _ _ 0 _) bs  = buffer bs
106pappend buf             bs  = withPS bs $ \fp off len -> append buf fp off len
107
108append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer
109append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 =
110  inlinePerformIO . withForeignPtr fp0 $ \ptr0 ->
111    withForeignPtr fp1 $ \ptr1 -> do
112      let genSize = sizeOf (0::Int)
113          newlen  = len0 + len1
114      gen <- if gen0 == 0
115             then return 0
116             else peek (castPtr ptr0)
117      if gen == gen0 && newlen <= cap0
118        then do
119          let newgen = gen + 1
120          poke (castPtr ptr0) newgen
121          memcpy (ptr0 `plusPtr` (off0+len0))
122                 (ptr1 `plusPtr` off1)
123                 (fromIntegral len1)
124          return (Buf fp0 off0 newlen cap0 newgen)
125        else do
126          let newcap = newlen * 2
127          fp <- mallocPlainForeignPtrBytes (newcap + genSize)
128          withForeignPtr fp $ \ptr_ -> do
129            let ptr    = ptr_ `plusPtr` genSize
130                newgen = 1
131            poke (castPtr ptr_) newgen
132            memcpy ptr (ptr0 `plusPtr` off0) (fromIntegral len0)
133            memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1)
134                   (fromIntegral len1)
135            return (Buf fp genSize newlen newcap newgen)
136
137length :: Buffer -> Int
138length (Buf _ _ len _ _) = len
139{-# INLINE length #-}
140
141unsafeIndex :: Buffer -> Int -> Word8
142unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) .
143    inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i)
144{-# INLINE unsafeIndex #-}
145
146substring :: Int -> Int -> Buffer -> ByteString
147substring s l (Buf fp off len _ _) =
148  assert (s >= 0 && s <= len) .
149  assert (l >= 0 && l <= len-s) $
150  mkPS fp (off+s) l
151{-# INLINE substring #-}
152
153unsafeDrop :: Int -> Buffer -> ByteString
154unsafeDrop s (Buf fp off len _ _) =
155  assert (s >= 0 && s <= len) $
156  mkPS fp (off+s) (len-s)
157{-# INLINE unsafeDrop #-}
158