1{-# LANGUAGE BangPatterns #-} 2-- | 3-- Module : Data.Attoparsec.ByteString.Buffer 4-- Copyright : Bryan O'Sullivan 2007-2015 5-- License : BSD3 6-- 7-- Maintainer : bos@serpentine.com 8-- Stability : experimental 9-- Portability : GHC 10-- 11-- An "immutable" buffer that supports cheap appends. 12-- 13-- A Buffer is divided into an immutable read-only zone, followed by a 14-- mutable area that we've preallocated, but not yet written to. 15-- 16-- We overallocate at the end of a Buffer so that we can cheaply 17-- append. Since a user of an existing Buffer cannot see past the end 18-- of its immutable zone into the data that will change during an 19-- append, this is safe. 20-- 21-- Once we run out of space at the end of a Buffer, we do the usual 22-- doubling of the buffer size. 23-- 24-- The fact of having a mutable buffer really helps with performance, 25-- but it does have a consequence: if someone misuses the Partial API 26-- that attoparsec uses by calling the same continuation repeatedly 27-- (which never makes sense in practice), they could overwrite data. 28-- 29-- Since the API *looks* pure, it should *act* pure, too, so we use 30-- two generation counters (one mutable, one immutable) to track the 31-- number of appends to a mutable buffer. If the counters ever get out 32-- of sync, someone is appending twice to a mutable buffer, so we 33-- duplicate the entire buffer in order to preserve the immutability 34-- of its older self. 35-- 36-- While we could go a step further and gain protection against API 37-- abuse on a multicore system, by use of an atomic increment 38-- instruction to bump the mutable generation counter, that would be 39-- very expensive, and feels like it would also be in the realm of the 40-- ridiculous. Clients should never call a continuation more than 41-- once; we lack a linear type system that could enforce this; and 42-- there's only so far we should go to accommodate broken uses. 43 44module Data.Attoparsec.ByteString.Buffer 45 ( 46 Buffer 47 , buffer 48 , unbuffer 49 , pappend 50 , length 51 , unsafeIndex 52 , substring 53 , unsafeDrop 54 ) where 55 56import Control.Exception (assert) 57import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr) 58import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) 59import Data.Attoparsec.Internal.Compat 60import Data.List (foldl1') 61import Data.Monoid as Mon (Monoid(..)) 62import Data.Semigroup (Semigroup(..)) 63import Data.Word (Word8) 64import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) 65import Foreign.Ptr (castPtr, plusPtr) 66import Foreign.Storable (peek, peekByteOff, poke, sizeOf) 67import GHC.ForeignPtr (mallocPlainForeignPtrBytes) 68import Prelude hiding (length) 69 70-- If _cap is zero, this buffer is empty. 71data Buffer = Buf { 72 _fp :: {-# UNPACK #-} !(ForeignPtr Word8) 73 , _off :: {-# UNPACK #-} !Int 74 , _len :: {-# UNPACK #-} !Int 75 , _cap :: {-# UNPACK #-} !Int 76 , _gen :: {-# UNPACK #-} !Int 77 } 78 79instance Show Buffer where 80 showsPrec p = showsPrec p . unbuffer 81 82-- | The initial 'Buffer' has no mutable zone, so we can avoid all 83-- copies in the (hopefully) common case of no further input being fed 84-- to us. 85buffer :: ByteString -> Buffer 86buffer bs = withPS bs $ \fp off len -> Buf fp off len len 0 87 88unbuffer :: Buffer -> ByteString 89unbuffer (Buf fp off len _ _) = mkPS fp off len 90 91instance Semigroup Buffer where 92 (Buf _ _ _ 0 _) <> b = b 93 a <> (Buf _ _ _ 0 _) = a 94 buf <> (Buf fp off len _ _) = append buf fp off len 95 96instance Monoid Buffer where 97 mempty = Buf nullForeignPtr 0 0 0 0 98 99 mappend = (<>) 100 101 mconcat [] = Mon.mempty 102 mconcat xs = foldl1' mappend xs 103 104pappend :: Buffer -> ByteString -> Buffer 105pappend (Buf _ _ _ 0 _) bs = buffer bs 106pappend buf bs = withPS bs $ \fp off len -> append buf fp off len 107 108append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer 109append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 = 110 inlinePerformIO . withForeignPtr fp0 $ \ptr0 -> 111 withForeignPtr fp1 $ \ptr1 -> do 112 let genSize = sizeOf (0::Int) 113 newlen = len0 + len1 114 gen <- if gen0 == 0 115 then return 0 116 else peek (castPtr ptr0) 117 if gen == gen0 && newlen <= cap0 118 then do 119 let newgen = gen + 1 120 poke (castPtr ptr0) newgen 121 memcpy (ptr0 `plusPtr` (off0+len0)) 122 (ptr1 `plusPtr` off1) 123 (fromIntegral len1) 124 return (Buf fp0 off0 newlen cap0 newgen) 125 else do 126 let newcap = newlen * 2 127 fp <- mallocPlainForeignPtrBytes (newcap + genSize) 128 withForeignPtr fp $ \ptr_ -> do 129 let ptr = ptr_ `plusPtr` genSize 130 newgen = 1 131 poke (castPtr ptr_) newgen 132 memcpy ptr (ptr0 `plusPtr` off0) (fromIntegral len0) 133 memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) 134 (fromIntegral len1) 135 return (Buf fp genSize newlen newcap newgen) 136 137length :: Buffer -> Int 138length (Buf _ _ len _ _) = len 139{-# INLINE length #-} 140 141unsafeIndex :: Buffer -> Int -> Word8 142unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) . 143 inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i) 144{-# INLINE unsafeIndex #-} 145 146substring :: Int -> Int -> Buffer -> ByteString 147substring s l (Buf fp off len _ _) = 148 assert (s >= 0 && s <= len) . 149 assert (l >= 0 && l <= len-s) $ 150 mkPS fp (off+s) l 151{-# INLINE substring #-} 152 153unsafeDrop :: Int -> Buffer -> ByteString 154unsafeDrop s (Buf fp off len _ _) = 155 assert (s >= 0 && s <= len) $ 156 mkPS fp (off+s) (len-s) 157{-# INLINE unsafeDrop #-} 158