1{-# LANGUAGE Trustworthy, BangPatterns #-}
2{-# LANGUAGE CPP, NoImplicitPrelude #-}
3{-# OPTIONS_GHC -funbox-strict-fields #-}
4
5-----------------------------------------------------------------------------
6-- |
7-- Module      :  GHC.IO.Buffer
8-- Copyright   :  (c) The University of Glasgow 2008
9-- License     :  see libraries/base/LICENSE
10--
11-- Maintainer  :  cvs-ghc@haskell.org
12-- Stability   :  internal
13-- Portability :  non-portable (GHC Extensions)
14--
15-- Buffers used in the IO system
16--
17-----------------------------------------------------------------------------
18
19module GHC.IO.Buffer (
20    -- * Buffers of any element
21    Buffer(..), BufferState(..), CharBuffer, CharBufElem,
22
23    -- ** Creation
24    newByteBuffer,
25    newCharBuffer,
26    newBuffer,
27    emptyBuffer,
28
29    -- ** Insertion/removal
30    bufferRemove,
31    bufferAdd,
32    slideContents,
33    bufferAdjustL,
34
35    -- ** Inspecting
36    isEmptyBuffer,
37    isFullBuffer,
38    isFullCharBuffer,
39    isWriteBuffer,
40    bufferElems,
41    bufferAvailable,
42    summaryBuffer,
43
44    -- ** Operating on the raw buffer as a Ptr
45    withBuffer,
46    withRawBuffer,
47
48    -- ** Assertions
49    checkBuffer,
50
51    -- * Raw buffers
52    RawBuffer,
53    readWord8Buf,
54    writeWord8Buf,
55    RawCharBuffer,
56    peekCharBuf,
57    readCharBuf,
58    writeCharBuf,
59    readCharBufPtr,
60    writeCharBufPtr,
61    charSize,
62 ) where
63
64import GHC.Base
65-- import GHC.IO
66import GHC.Num
67import GHC.Ptr
68import GHC.Word
69import GHC.Show
70import GHC.Real
71import Foreign.C.Types
72import Foreign.ForeignPtr
73import Foreign.Storable
74
75-- Char buffers use either UTF-16 or UTF-32, with the endianness matching
76-- the endianness of the host.
77--
78-- Invariants:
79--   * a Char buffer consists of *valid* UTF-16 or UTF-32
80--   * only whole characters: no partial surrogate pairs
81
82#define CHARBUF_UTF32
83
84-- #define CHARBUF_UTF16
85--
86-- NB. it won't work to just change this to CHARBUF_UTF16.  Some of
87-- the code to make this work is there, and it has been tested with
88-- the Iconv codec, but there are some pieces that are known to be
89-- broken.  In particular, the built-in codecs
90-- e.g. GHC.IO.Encoding.UTF{8,16,32} need to use isFullCharBuffer or
91-- similar in place of the ow >= os comparisons.
92
93-- ---------------------------------------------------------------------------
94-- Raw blocks of data
95
96type RawBuffer e = ForeignPtr e
97
98readWord8Buf :: RawBuffer Word8 -> Int -> IO Word8
99readWord8Buf arr ix = withForeignPtr arr $ \p -> peekByteOff p ix
100
101writeWord8Buf :: RawBuffer Word8 -> Int -> Word8 -> IO ()
102writeWord8Buf arr ix w = withForeignPtr arr $ \p -> pokeByteOff p ix w
103
104#if defined(CHARBUF_UTF16)
105type CharBufElem = Word16
106#else
107type CharBufElem = Char
108#endif
109
110type RawCharBuffer = RawBuffer CharBufElem
111
112peekCharBuf :: RawCharBuffer -> Int -> IO Char
113peekCharBuf arr ix = withForeignPtr arr $ \p -> do
114                        (c,_) <- readCharBufPtr p ix
115                        return c
116
117{-# INLINE readCharBuf #-}
118readCharBuf :: RawCharBuffer -> Int -> IO (Char, Int)
119readCharBuf arr ix = withForeignPtr arr $ \p -> readCharBufPtr p ix
120
121{-# INLINE writeCharBuf #-}
122writeCharBuf :: RawCharBuffer -> Int -> Char -> IO Int
123writeCharBuf arr ix c = withForeignPtr arr $ \p -> writeCharBufPtr p ix c
124
125{-# INLINE readCharBufPtr #-}
126readCharBufPtr :: Ptr CharBufElem -> Int -> IO (Char, Int)
127#if defined(CHARBUF_UTF16)
128readCharBufPtr p ix = do
129  c1 <- peekElemOff p ix
130  if (c1 < 0xd800 || c1 > 0xdbff)
131     then return (chr (fromIntegral c1), ix+1)
132     else do c2 <- peekElemOff p (ix+1)
133             return (unsafeChr ((fromIntegral c1 - 0xd800)*0x400 +
134                                (fromIntegral c2 - 0xdc00) + 0x10000), ix+2)
135#else
136readCharBufPtr p ix = do c <- peekElemOff (castPtr p) ix; return (c, ix+1)
137#endif
138
139{-# INLINE writeCharBufPtr #-}
140writeCharBufPtr :: Ptr CharBufElem -> Int -> Char -> IO Int
141#if defined(CHARBUF_UTF16)
142writeCharBufPtr p ix ch
143  | c < 0x10000 = do pokeElemOff p ix (fromIntegral c)
144                     return (ix+1)
145  | otherwise   = do let c' = c - 0x10000
146                     pokeElemOff p ix (fromIntegral (c' `div` 0x400 + 0xd800))
147                     pokeElemOff p (ix+1) (fromIntegral (c' `mod` 0x400 + 0xdc00))
148                     return (ix+2)
149  where
150    c = ord ch
151#else
152writeCharBufPtr p ix ch = do pokeElemOff (castPtr p) ix ch; return (ix+1)
153#endif
154
155charSize :: Int
156#if defined(CHARBUF_UTF16)
157charSize = 2
158#else
159charSize = 4
160#endif
161
162-- ---------------------------------------------------------------------------
163-- Buffers
164
165-- | A mutable array of bytes that can be passed to foreign functions.
166--
167-- The buffer is represented by a record, where the record contains
168-- the raw buffer and the start/end points of the filled portion.  The
169-- buffer contents itself is mutable, but the rest of the record is
170-- immutable.  This is a slightly odd mix, but it turns out to be
171-- quite practical: by making all the buffer metadata immutable, we
172-- can have operations on buffer metadata outside of the IO monad.
173--
174-- The "live" elements of the buffer are those between the 'bufL' and
175-- 'bufR' offsets.  In an empty buffer, 'bufL' is equal to 'bufR', but
176-- they might not be zero: for example, the buffer might correspond to
177-- a memory-mapped file and in which case 'bufL' will point to the
178-- next location to be written, which is not necessarily the beginning
179-- of the file.
180data Buffer e
181  = Buffer {
182        bufRaw   :: !(RawBuffer e),
183        bufState :: BufferState,
184        bufSize  :: !Int,          -- in elements, not bytes
185        bufL     :: !Int,          -- offset of first item in the buffer
186        bufR     :: !Int           -- offset of last item + 1
187  }
188
189#if defined(CHARBUF_UTF16)
190type CharBuffer = Buffer Word16
191#else
192type CharBuffer = Buffer Char
193#endif
194
195data BufferState = ReadBuffer | WriteBuffer
196  deriving Eq -- ^ @since 4.2.0.0
197
198withBuffer :: Buffer e -> (Ptr e -> IO a) -> IO a
199withBuffer Buffer{ bufRaw=raw } f = withForeignPtr (castForeignPtr raw) f
200
201withRawBuffer :: RawBuffer e -> (Ptr e -> IO a) -> IO a
202withRawBuffer raw f = withForeignPtr (castForeignPtr raw) f
203
204isEmptyBuffer :: Buffer e -> Bool
205isEmptyBuffer Buffer{ bufL=l, bufR=r } = l == r
206
207isFullBuffer :: Buffer e -> Bool
208isFullBuffer Buffer{ bufR=w, bufSize=s } = s == w
209
210-- if a Char buffer does not have room for a surrogate pair, it is "full"
211isFullCharBuffer :: Buffer e -> Bool
212#if defined(CHARBUF_UTF16)
213isFullCharBuffer buf = bufferAvailable buf < 2
214#else
215isFullCharBuffer = isFullBuffer
216#endif
217
218isWriteBuffer :: Buffer e -> Bool
219isWriteBuffer buf = case bufState buf of
220                        WriteBuffer -> True
221                        ReadBuffer  -> False
222
223bufferElems :: Buffer e -> Int
224bufferElems Buffer{ bufR=w, bufL=r } = w - r
225
226bufferAvailable :: Buffer e -> Int
227bufferAvailable Buffer{ bufR=w, bufSize=s } = s - w
228
229bufferRemove :: Int -> Buffer e -> Buffer e
230bufferRemove i buf@Buffer{ bufL=r } = bufferAdjustL (r+i) buf
231
232bufferAdjustL :: Int -> Buffer e -> Buffer e
233bufferAdjustL l buf@Buffer{ bufR=w }
234  | l == w    = buf{ bufL=0, bufR=0 }
235  | otherwise = buf{ bufL=l, bufR=w }
236
237bufferAdd :: Int -> Buffer e -> Buffer e
238bufferAdd i buf@Buffer{ bufR=w } = buf{ bufR=w+i }
239
240emptyBuffer :: RawBuffer e -> Int -> BufferState -> Buffer e
241emptyBuffer raw sz state =
242  Buffer{ bufRaw=raw, bufState=state, bufR=0, bufL=0, bufSize=sz }
243
244newByteBuffer :: Int -> BufferState -> IO (Buffer Word8)
245newByteBuffer c st = newBuffer c c st
246
247newCharBuffer :: Int -> BufferState -> IO CharBuffer
248newCharBuffer c st = newBuffer (c * charSize) c st
249
250newBuffer :: Int -> Int -> BufferState -> IO (Buffer e)
251newBuffer bytes sz state = do
252  fp <- mallocForeignPtrBytes bytes
253  return (emptyBuffer fp sz state)
254
255-- | slides the contents of the buffer to the beginning
256slideContents :: Buffer Word8 -> IO (Buffer Word8)
257slideContents buf@Buffer{ bufL=l, bufR=r, bufRaw=raw } = do
258  let elems = r - l
259  withRawBuffer raw $ \p ->
260      do _ <- memmove p (p `plusPtr` l) (fromIntegral elems)
261         return ()
262  return buf{ bufL=0, bufR=elems }
263
264foreign import ccall unsafe "memmove"
265   memmove :: Ptr a -> Ptr a -> CSize -> IO (Ptr a)
266
267summaryBuffer :: Buffer a -> String
268summaryBuffer !buf  -- Strict => slightly better code
269   = "buf" ++ show (bufSize buf) ++ "(" ++ show (bufL buf) ++ "-" ++ show (bufR buf) ++ ")"
270
271-- INVARIANTS on Buffers:
272--   * r <= w
273--   * if r == w, and the buffer is for reading, then r == 0 && w == 0
274--   * a write buffer is never full.  If an operation
275--     fills up the buffer, it will always flush it before
276--     returning.
277--   * a read buffer may be full as a result of hLookAhead.  In normal
278--     operation, a read buffer always has at least one character of space.
279
280checkBuffer :: Buffer a -> IO ()
281checkBuffer buf@Buffer{ bufState = state, bufL=r, bufR=w, bufSize=size } = do
282     check buf (
283        size > 0
284        && r <= w
285        && w <= size
286        && ( r /= w || state == WriteBuffer || (r == 0 && w == 0) )
287        && ( state /= WriteBuffer || w < size ) -- write buffer is never full
288      )
289
290check :: Buffer a -> Bool -> IO ()
291check _   True  = return ()
292check buf False = errorWithoutStackTrace ("buffer invariant violation: " ++ summaryBuffer buf)
293
294