1{-# LANGUAGE Trustworthy, BangPatterns #-} 2{-# LANGUAGE CPP, NoImplicitPrelude #-} 3{-# OPTIONS_GHC -funbox-strict-fields #-} 4 5----------------------------------------------------------------------------- 6-- | 7-- Module : GHC.IO.Buffer 8-- Copyright : (c) The University of Glasgow 2008 9-- License : see libraries/base/LICENSE 10-- 11-- Maintainer : cvs-ghc@haskell.org 12-- Stability : internal 13-- Portability : non-portable (GHC Extensions) 14-- 15-- Buffers used in the IO system 16-- 17----------------------------------------------------------------------------- 18 19module GHC.IO.Buffer ( 20 -- * Buffers of any element 21 Buffer(..), BufferState(..), CharBuffer, CharBufElem, 22 23 -- ** Creation 24 newByteBuffer, 25 newCharBuffer, 26 newBuffer, 27 emptyBuffer, 28 29 -- ** Insertion/removal 30 bufferRemove, 31 bufferAdd, 32 slideContents, 33 bufferAdjustL, 34 35 -- ** Inspecting 36 isEmptyBuffer, 37 isFullBuffer, 38 isFullCharBuffer, 39 isWriteBuffer, 40 bufferElems, 41 bufferAvailable, 42 summaryBuffer, 43 44 -- ** Operating on the raw buffer as a Ptr 45 withBuffer, 46 withRawBuffer, 47 48 -- ** Assertions 49 checkBuffer, 50 51 -- * Raw buffers 52 RawBuffer, 53 readWord8Buf, 54 writeWord8Buf, 55 RawCharBuffer, 56 peekCharBuf, 57 readCharBuf, 58 writeCharBuf, 59 readCharBufPtr, 60 writeCharBufPtr, 61 charSize, 62 ) where 63 64import GHC.Base 65-- import GHC.IO 66import GHC.Num 67import GHC.Ptr 68import GHC.Word 69import GHC.Show 70import GHC.Real 71import Foreign.C.Types 72import Foreign.ForeignPtr 73import Foreign.Storable 74 75-- Char buffers use either UTF-16 or UTF-32, with the endianness matching 76-- the endianness of the host. 77-- 78-- Invariants: 79-- * a Char buffer consists of *valid* UTF-16 or UTF-32 80-- * only whole characters: no partial surrogate pairs 81 82#define CHARBUF_UTF32 83 84-- #define CHARBUF_UTF16 85-- 86-- NB. it won't work to just change this to CHARBUF_UTF16. Some of 87-- the code to make this work is there, and it has been tested with 88-- the Iconv codec, but there are some pieces that are known to be 89-- broken. In particular, the built-in codecs 90-- e.g. GHC.IO.Encoding.UTF{8,16,32} need to use isFullCharBuffer or 91-- similar in place of the ow >= os comparisons. 92 93-- --------------------------------------------------------------------------- 94-- Raw blocks of data 95 96type RawBuffer e = ForeignPtr e 97 98readWord8Buf :: RawBuffer Word8 -> Int -> IO Word8 99readWord8Buf arr ix = withForeignPtr arr $ \p -> peekByteOff p ix 100 101writeWord8Buf :: RawBuffer Word8 -> Int -> Word8 -> IO () 102writeWord8Buf arr ix w = withForeignPtr arr $ \p -> pokeByteOff p ix w 103 104#if defined(CHARBUF_UTF16) 105type CharBufElem = Word16 106#else 107type CharBufElem = Char 108#endif 109 110type RawCharBuffer = RawBuffer CharBufElem 111 112peekCharBuf :: RawCharBuffer -> Int -> IO Char 113peekCharBuf arr ix = withForeignPtr arr $ \p -> do 114 (c,_) <- readCharBufPtr p ix 115 return c 116 117{-# INLINE readCharBuf #-} 118readCharBuf :: RawCharBuffer -> Int -> IO (Char, Int) 119readCharBuf arr ix = withForeignPtr arr $ \p -> readCharBufPtr p ix 120 121{-# INLINE writeCharBuf #-} 122writeCharBuf :: RawCharBuffer -> Int -> Char -> IO Int 123writeCharBuf arr ix c = withForeignPtr arr $ \p -> writeCharBufPtr p ix c 124 125{-# INLINE readCharBufPtr #-} 126readCharBufPtr :: Ptr CharBufElem -> Int -> IO (Char, Int) 127#if defined(CHARBUF_UTF16) 128readCharBufPtr p ix = do 129 c1 <- peekElemOff p ix 130 if (c1 < 0xd800 || c1 > 0xdbff) 131 then return (chr (fromIntegral c1), ix+1) 132 else do c2 <- peekElemOff p (ix+1) 133 return (unsafeChr ((fromIntegral c1 - 0xd800)*0x400 + 134 (fromIntegral c2 - 0xdc00) + 0x10000), ix+2) 135#else 136readCharBufPtr p ix = do c <- peekElemOff (castPtr p) ix; return (c, ix+1) 137#endif 138 139{-# INLINE writeCharBufPtr #-} 140writeCharBufPtr :: Ptr CharBufElem -> Int -> Char -> IO Int 141#if defined(CHARBUF_UTF16) 142writeCharBufPtr p ix ch 143 | c < 0x10000 = do pokeElemOff p ix (fromIntegral c) 144 return (ix+1) 145 | otherwise = do let c' = c - 0x10000 146 pokeElemOff p ix (fromIntegral (c' `div` 0x400 + 0xd800)) 147 pokeElemOff p (ix+1) (fromIntegral (c' `mod` 0x400 + 0xdc00)) 148 return (ix+2) 149 where 150 c = ord ch 151#else 152writeCharBufPtr p ix ch = do pokeElemOff (castPtr p) ix ch; return (ix+1) 153#endif 154 155charSize :: Int 156#if defined(CHARBUF_UTF16) 157charSize = 2 158#else 159charSize = 4 160#endif 161 162-- --------------------------------------------------------------------------- 163-- Buffers 164 165-- | A mutable array of bytes that can be passed to foreign functions. 166-- 167-- The buffer is represented by a record, where the record contains 168-- the raw buffer and the start/end points of the filled portion. The 169-- buffer contents itself is mutable, but the rest of the record is 170-- immutable. This is a slightly odd mix, but it turns out to be 171-- quite practical: by making all the buffer metadata immutable, we 172-- can have operations on buffer metadata outside of the IO monad. 173-- 174-- The "live" elements of the buffer are those between the 'bufL' and 175-- 'bufR' offsets. In an empty buffer, 'bufL' is equal to 'bufR', but 176-- they might not be zero: for example, the buffer might correspond to 177-- a memory-mapped file and in which case 'bufL' will point to the 178-- next location to be written, which is not necessarily the beginning 179-- of the file. 180data Buffer e 181 = Buffer { 182 bufRaw :: !(RawBuffer e), 183 bufState :: BufferState, 184 bufSize :: !Int, -- in elements, not bytes 185 bufL :: !Int, -- offset of first item in the buffer 186 bufR :: !Int -- offset of last item + 1 187 } 188 189#if defined(CHARBUF_UTF16) 190type CharBuffer = Buffer Word16 191#else 192type CharBuffer = Buffer Char 193#endif 194 195data BufferState = ReadBuffer | WriteBuffer 196 deriving Eq -- ^ @since 4.2.0.0 197 198withBuffer :: Buffer e -> (Ptr e -> IO a) -> IO a 199withBuffer Buffer{ bufRaw=raw } f = withForeignPtr (castForeignPtr raw) f 200 201withRawBuffer :: RawBuffer e -> (Ptr e -> IO a) -> IO a 202withRawBuffer raw f = withForeignPtr (castForeignPtr raw) f 203 204isEmptyBuffer :: Buffer e -> Bool 205isEmptyBuffer Buffer{ bufL=l, bufR=r } = l == r 206 207isFullBuffer :: Buffer e -> Bool 208isFullBuffer Buffer{ bufR=w, bufSize=s } = s == w 209 210-- if a Char buffer does not have room for a surrogate pair, it is "full" 211isFullCharBuffer :: Buffer e -> Bool 212#if defined(CHARBUF_UTF16) 213isFullCharBuffer buf = bufferAvailable buf < 2 214#else 215isFullCharBuffer = isFullBuffer 216#endif 217 218isWriteBuffer :: Buffer e -> Bool 219isWriteBuffer buf = case bufState buf of 220 WriteBuffer -> True 221 ReadBuffer -> False 222 223bufferElems :: Buffer e -> Int 224bufferElems Buffer{ bufR=w, bufL=r } = w - r 225 226bufferAvailable :: Buffer e -> Int 227bufferAvailable Buffer{ bufR=w, bufSize=s } = s - w 228 229bufferRemove :: Int -> Buffer e -> Buffer e 230bufferRemove i buf@Buffer{ bufL=r } = bufferAdjustL (r+i) buf 231 232bufferAdjustL :: Int -> Buffer e -> Buffer e 233bufferAdjustL l buf@Buffer{ bufR=w } 234 | l == w = buf{ bufL=0, bufR=0 } 235 | otherwise = buf{ bufL=l, bufR=w } 236 237bufferAdd :: Int -> Buffer e -> Buffer e 238bufferAdd i buf@Buffer{ bufR=w } = buf{ bufR=w+i } 239 240emptyBuffer :: RawBuffer e -> Int -> BufferState -> Buffer e 241emptyBuffer raw sz state = 242 Buffer{ bufRaw=raw, bufState=state, bufR=0, bufL=0, bufSize=sz } 243 244newByteBuffer :: Int -> BufferState -> IO (Buffer Word8) 245newByteBuffer c st = newBuffer c c st 246 247newCharBuffer :: Int -> BufferState -> IO CharBuffer 248newCharBuffer c st = newBuffer (c * charSize) c st 249 250newBuffer :: Int -> Int -> BufferState -> IO (Buffer e) 251newBuffer bytes sz state = do 252 fp <- mallocForeignPtrBytes bytes 253 return (emptyBuffer fp sz state) 254 255-- | slides the contents of the buffer to the beginning 256slideContents :: Buffer Word8 -> IO (Buffer Word8) 257slideContents buf@Buffer{ bufL=l, bufR=r, bufRaw=raw } = do 258 let elems = r - l 259 withRawBuffer raw $ \p -> 260 do _ <- memmove p (p `plusPtr` l) (fromIntegral elems) 261 return () 262 return buf{ bufL=0, bufR=elems } 263 264foreign import ccall unsafe "memmove" 265 memmove :: Ptr a -> Ptr a -> CSize -> IO (Ptr a) 266 267summaryBuffer :: Buffer a -> String 268summaryBuffer !buf -- Strict => slightly better code 269 = "buf" ++ show (bufSize buf) ++ "(" ++ show (bufL buf) ++ "-" ++ show (bufR buf) ++ ")" 270 271-- INVARIANTS on Buffers: 272-- * r <= w 273-- * if r == w, and the buffer is for reading, then r == 0 && w == 0 274-- * a write buffer is never full. If an operation 275-- fills up the buffer, it will always flush it before 276-- returning. 277-- * a read buffer may be full as a result of hLookAhead. In normal 278-- operation, a read buffer always has at least one character of space. 279 280checkBuffer :: Buffer a -> IO () 281checkBuffer buf@Buffer{ bufState = state, bufL=r, bufR=w, bufSize=size } = do 282 check buf ( 283 size > 0 284 && r <= w 285 && w <= size 286 && ( r /= w || state == WriteBuffer || (r == 0 && w == 0) ) 287 && ( state /= WriteBuffer || w < size ) -- write buffer is never full 288 ) 289 290check :: Buffer a -> Bool -> IO () 291check _ True = return () 292check buf False = errorWithoutStackTrace ("buffer invariant violation: " ++ summaryBuffer buf) 293 294