1{-# LANGUAGE Trustworthy #-}
2{-# LANGUAGE CPP
3           , NoImplicitPrelude
4           , ExistentialQuantification
5  #-}
6{-# OPTIONS_GHC -funbox-strict-fields #-}
7{-# OPTIONS_HADDOCK not-home #-}
8
9-----------------------------------------------------------------------------
10-- |
11-- Module      :  GHC.IO.Handle.Types
12-- Copyright   :  (c) The University of Glasgow, 1994-2009
13-- License     :  see libraries/base/LICENSE
14--
15-- Maintainer  :  libraries@haskell.org
16-- Stability   :  internal
17-- Portability :  non-portable
18--
19-- Basic types for the implementation of IO Handles.
20--
21-----------------------------------------------------------------------------
22
23module GHC.IO.Handle.Types (
24      Handle(..), Handle__(..), showHandle,
25      checkHandleInvariants,
26      BufferList(..),
27      HandleType(..),
28      isReadableHandleType, isWritableHandleType, isReadWriteHandleType,
29      BufferMode(..),
30      BufferCodec(..),
31      NewlineMode(..), Newline(..), nativeNewline,
32      universalNewlineMode, noNewlineTranslation, nativeNewlineMode
33  ) where
34
35#undef DEBUG
36
37import GHC.Base
38import GHC.MVar
39import GHC.IO
40import GHC.IO.Buffer
41import GHC.IO.BufferedIO
42import GHC.IO.Encoding.Types
43import GHC.IORef
44import GHC.Show
45import GHC.Read
46import GHC.Word
47import GHC.IO.Device
48import Data.Typeable
49#if defined(DEBUG)
50import Control.Monad
51#endif
52
53-- ---------------------------------------------------------------------------
54-- Handle type
55
56--  A Handle is represented by (a reference to) a record
57--  containing the state of the I/O port/device. We record
58--  the following pieces of info:
59
60--    * type (read,write,closed etc.)
61--    * the underlying file descriptor
62--    * buffering mode
63--    * buffer, and spare buffers
64--    * user-friendly name (usually the
65--      FilePath used when IO.openFile was called)
66
67-- Note: when a Handle is garbage collected, we want to flush its buffer
68-- and close the OS file handle, so as to free up a (precious) resource.
69
70-- | Haskell defines operations to read and write characters from and to files,
71-- represented by values of type @Handle@.  Each value of this type is a
72-- /handle/: a record used by the Haskell run-time system to /manage/ I\/O
73-- with file system objects.  A handle has at least the following properties:
74--
75--  * whether it manages input or output or both;
76--
77--  * whether it is /open/, /closed/ or /semi-closed/;
78--
79--  * whether the object is seekable;
80--
81--  * whether buffering is disabled, or enabled on a line or block basis;
82--
83--  * a buffer (whose length may be zero).
84--
85-- Most handles will also have a current I\/O position indicating where the next
86-- input or output operation will occur.  A handle is /readable/ if it
87-- manages only input or both input and output; likewise, it is /writable/ if
88-- it manages only output or both input and output.  A handle is /open/ when
89-- first allocated.
90-- Once it is closed it can no longer be used for either input or output,
91-- though an implementation cannot re-use its storage while references
92-- remain to it.  Handles are in the 'Show' and 'Eq' classes.  The string
93-- produced by showing a handle is system dependent; it should include
94-- enough information to identify the handle for debugging.  A handle is
95-- equal according to '==' only to itself; no attempt
96-- is made to compare the internal state of different handles for equality.
97
98data Handle
99  = FileHandle                          -- A normal handle to a file
100        FilePath                        -- the file (used for error messages
101                                        -- only)
102        !(MVar Handle__)
103
104  | DuplexHandle                        -- A handle to a read/write stream
105        FilePath                        -- file for a FIFO, otherwise some
106                                        --   descriptive string (used for error
107                                        --   messages only)
108        !(MVar Handle__)                -- The read side
109        !(MVar Handle__)                -- The write side
110
111-- NOTES:
112--    * A 'FileHandle' is seekable.  A 'DuplexHandle' may or may not be
113--      seekable.
114
115-- | @since 4.1.0.0
116instance Eq Handle where
117 (FileHandle _ h1)     == (FileHandle _ h2)     = h1 == h2
118 (DuplexHandle _ h1 _) == (DuplexHandle _ h2 _) = h1 == h2
119 _ == _ = False
120
121data Handle__
122  = forall dev enc_state dec_state . (IODevice dev, BufferedIO dev, Typeable dev) =>
123    Handle__ {
124      haDevice      :: !dev,
125      haType        :: HandleType,           -- type (read/write/append etc.)
126      haByteBuffer  :: !(IORef (Buffer Word8)), -- See [note Buffering Implementation]
127      haBufferMode  :: BufferMode,
128      haLastDecode  :: !(IORef (dec_state, Buffer Word8)),
129      haCharBuffer  :: !(IORef (Buffer CharBufElem)), -- See [note Buffering Implementation]
130      haBuffers     :: !(IORef (BufferList CharBufElem)),  -- spare buffers
131      haEncoder     :: Maybe (TextEncoder enc_state),
132      haDecoder     :: Maybe (TextDecoder dec_state),
133      haCodec       :: Maybe TextEncoding,
134      haInputNL     :: Newline,
135      haOutputNL    :: Newline,
136      haOtherSide   :: Maybe (MVar Handle__) -- ptr to the write side of a
137                                             -- duplex handle.
138    }
139
140-- we keep a few spare buffers around in a handle to avoid allocating
141-- a new one for each hPutStr.  These buffers are *guaranteed* to be the
142-- same size as the main buffer.
143data BufferList e
144  = BufferListNil
145  | BufferListCons (RawBuffer e) (BufferList e)
146
147--  Internally, we classify handles as being one
148--  of the following:
149
150data HandleType
151 = ClosedHandle
152 | SemiClosedHandle
153 | ReadHandle
154 | WriteHandle
155 | AppendHandle
156 | ReadWriteHandle
157
158isReadableHandleType :: HandleType -> Bool
159isReadableHandleType ReadHandle         = True
160isReadableHandleType ReadWriteHandle    = True
161isReadableHandleType _                  = False
162
163isWritableHandleType :: HandleType -> Bool
164isWritableHandleType AppendHandle    = True
165isWritableHandleType WriteHandle     = True
166isWritableHandleType ReadWriteHandle = True
167isWritableHandleType _               = False
168
169isReadWriteHandleType :: HandleType -> Bool
170isReadWriteHandleType ReadWriteHandle{} = True
171isReadWriteHandleType _                 = False
172
173-- INVARIANTS on Handles:
174--
175--   * A handle *always* has a buffer, even if it is only 1 character long
176--     (an unbuffered handle needs a 1 character buffer in order to support
177--      hLookAhead and hIsEOF).
178--   * In a read Handle, the byte buffer is always empty (we decode when reading)
179--   * In a wriite Handle, the Char buffer is always empty (we encode when writing)
180--
181checkHandleInvariants :: Handle__ -> IO ()
182#if defined(DEBUG)
183checkHandleInvariants h_ = do
184 bbuf <- readIORef (haByteBuffer h_)
185 checkBuffer bbuf
186 cbuf <- readIORef (haCharBuffer h_)
187 checkBuffer cbuf
188 when (isWriteBuffer cbuf && not (isEmptyBuffer cbuf)) $
189   errorWithoutStackTrace ("checkHandleInvariants: char write buffer non-empty: " ++
190          summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf)
191 when (isWriteBuffer bbuf /= isWriteBuffer cbuf) $
192   errorWithoutStackTrace ("checkHandleInvariants: buffer modes differ: " ++
193          summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf)
194
195#else
196checkHandleInvariants _ = return ()
197#endif
198
199-- ---------------------------------------------------------------------------
200-- Buffering modes
201
202-- | Three kinds of buffering are supported: line-buffering,
203-- block-buffering or no-buffering.  These modes have the following
204-- effects. For output, items are written out, or /flushed/,
205-- from the internal buffer according to the buffer mode:
206--
207--  * /line-buffering/: the entire output buffer is flushed
208--    whenever a newline is output, the buffer overflows,
209--    a 'System.IO.hFlush' is issued, or the handle is closed.
210--
211--  * /block-buffering/: the entire buffer is written out whenever it
212--    overflows, a 'System.IO.hFlush' is issued, or the handle is closed.
213--
214--  * /no-buffering/: output is written immediately, and never stored
215--    in the buffer.
216--
217-- An implementation is free to flush the buffer more frequently,
218-- but not less frequently, than specified above.
219-- The output buffer is emptied as soon as it has been written out.
220--
221-- Similarly, input occurs according to the buffer mode for the handle:
222--
223--  * /line-buffering/: when the buffer for the handle is not empty,
224--    the next item is obtained from the buffer; otherwise, when the
225--    buffer is empty, characters up to and including the next newline
226--    character are read into the buffer.  No characters are available
227--    until the newline character is available or the buffer is full.
228--
229--  * /block-buffering/: when the buffer for the handle becomes empty,
230--    the next block of data is read into the buffer.
231--
232--  * /no-buffering/: the next input item is read and returned.
233--    The 'System.IO.hLookAhead' operation implies that even a no-buffered
234--    handle may require a one-character buffer.
235--
236-- The default buffering mode when a handle is opened is
237-- implementation-dependent and may depend on the file system object
238-- which is attached to that handle.
239-- For most implementations, physical files will normally be block-buffered
240-- and terminals will normally be line-buffered.
241
242data BufferMode
243 = NoBuffering  -- ^ buffering is disabled if possible.
244 | LineBuffering
245                -- ^ line-buffering should be enabled if possible.
246 | BlockBuffering (Maybe Int)
247                -- ^ block-buffering should be enabled if possible.
248                -- The size of the buffer is @n@ items if the argument
249                -- is 'Just' @n@ and is otherwise implementation-dependent.
250   deriving ( Eq   -- ^ @since 4.2.0.0
251            , Ord  -- ^ @since 4.2.0.0
252            , Read -- ^ @since 4.2.0.0
253            , Show -- ^ @since 4.2.0.0
254            )
255
256{-
257[note Buffering Implementation]
258
259Each Handle has two buffers: a byte buffer (haByteBuffer) and a Char
260buffer (haCharBuffer).
261
262[note Buffered Reading]
263
264For read Handles, bytes are read into the byte buffer, and immediately
265decoded into the Char buffer (see
266GHC.IO.Handle.Internals.readTextDevice).  The only way there might be
267some data left in the byte buffer is if there is a partial multi-byte
268character sequence that cannot be decoded into a full character.
269
270Note that the buffering mode (haBufferMode) makes no difference when
271reading data into a Handle.  When reading, we can always just read all
272the data there is available without blocking, decode it into the Char
273buffer, and then provide it immediately to the caller.
274
275[note Buffered Writing]
276
277Characters are written into the Char buffer by e.g. hPutStr.  At the
278end of the operation, or when the char buffer is full, the buffer is
279decoded to the byte buffer (see writeCharBuffer).  This is so that we
280can detect encoding errors at the right point.
281
282Hence, the Char buffer is always empty between Handle operations.
283
284[note Buffer Sizing]
285
286The char buffer is always a default size (dEFAULT_CHAR_BUFFER_SIZE).
287The byte buffer size is chosen by the underlying device (via its
288IODevice.newBuffer).  Hence the size of these buffers is not under
289user control.
290
291There are certain minimum sizes for these buffers imposed by the
292library (but not checked):
293
294 - we must be able to buffer at least one character, so that
295   hLookAhead can work
296
297 - the byte buffer must be able to store at least one encoded
298   character in the current encoding (6 bytes?)
299
300 - when reading, the char buffer must have room for two characters, so
301   that we can spot the \r\n sequence.
302
303How do we implement hSetBuffering?
304
305For reading, we have never used the user-supplied buffer size, because
306there's no point: we always pass all available data to the reader
307immediately.  Buffering would imply waiting until a certain amount of
308data is available, which has no advantages.  So hSetBuffering is
309essentially a no-op for read handles, except that it turns on/off raw
310mode for the underlying device if necessary.
311
312For writing, the buffering mode is handled by the write operations
313themselves (hPutChar and hPutStr).  Every write ends with
314writeCharBuffer, which checks whether the buffer should be flushed
315according to the current buffering mode.  Additionally, we look for
316newlines and flush if the mode is LineBuffering.
317
318[note Buffer Flushing]
319
320** Flushing the Char buffer
321
322We must be able to flush the Char buffer, in order to implement
323hSetEncoding, and things like hGetBuf which want to read raw bytes.
324
325Flushing the Char buffer on a write Handle is easy: it is always empty.
326
327Flushing the Char buffer on a read Handle involves rewinding the byte
328buffer to the point representing the next Char in the Char buffer.
329This is done by
330
331 - remembering the state of the byte buffer *before* the last decode
332
333 - re-decoding the bytes that represent the chars already read from the
334   Char buffer.  This gives us the point in the byte buffer that
335   represents the *next* Char to be read.
336
337In order for this to work, after readTextHandle we must NOT MODIFY THE
338CONTENTS OF THE BYTE OR CHAR BUFFERS, except to remove characters from
339the Char buffer.
340
341** Flushing the byte buffer
342
343The byte buffer can be flushed if the Char buffer has already been
344flushed (see above).  For a read Handle, flushing the byte buffer
345means seeking the device back by the number of bytes in the buffer,
346and hence it is only possible on a seekable Handle.
347
348-}
349
350-- ---------------------------------------------------------------------------
351-- Newline translation
352
353-- | The representation of a newline in the external file or stream.
354data Newline = LF    -- ^ @\'\\n\'@
355             | CRLF  -- ^ @\'\\r\\n\'@
356             deriving ( Eq   -- ^ @since 4.2.0.0
357                      , Ord  -- ^ @since 4.3.0.0
358                      , Read -- ^ @since 4.3.0.0
359                      , Show -- ^ @since 4.3.0.0
360                      )
361
362-- | Specifies the translation, if any, of newline characters between
363-- internal Strings and the external file or stream.  Haskell Strings
364-- are assumed to represent newlines with the @\'\\n\'@ character; the
365-- newline mode specifies how to translate @\'\\n\'@ on output, and what to
366-- translate into @\'\\n\'@ on input.
367data NewlineMode
368  = NewlineMode { inputNL :: Newline,
369                    -- ^ the representation of newlines on input
370                  outputNL :: Newline
371                    -- ^ the representation of newlines on output
372                 }
373             deriving ( Eq   -- ^ @since 4.2.0.0
374                      , Ord  -- ^ @since 4.3.0.0
375                      , Read -- ^ @since 4.3.0.0
376                      , Show -- ^ @since 4.3.0.0
377                      )
378
379-- | The native newline representation for the current platform: 'LF'
380-- on Unix systems, 'CRLF' on Windows.
381nativeNewline :: Newline
382#if defined(mingw32_HOST_OS)
383nativeNewline = CRLF
384#else
385nativeNewline = LF
386#endif
387
388-- | Map @\'\\r\\n\'@ into @\'\\n\'@ on input, and @\'\\n\'@ to the native newline
389-- represetnation on output.  This mode can be used on any platform, and
390-- works with text files using any newline convention.  The downside is
391-- that @readFile >>= writeFile@ might yield a different file.
392--
393-- > universalNewlineMode  = NewlineMode { inputNL  = CRLF,
394-- >                                       outputNL = nativeNewline }
395--
396universalNewlineMode :: NewlineMode
397universalNewlineMode  = NewlineMode { inputNL  = CRLF,
398                                      outputNL = nativeNewline }
399
400-- | Use the native newline representation on both input and output
401--
402-- > nativeNewlineMode  = NewlineMode { inputNL  = nativeNewline
403-- >                                    outputNL = nativeNewline }
404--
405nativeNewlineMode    :: NewlineMode
406nativeNewlineMode     = NewlineMode { inputNL  = nativeNewline,
407                                      outputNL = nativeNewline }
408
409-- | Do no newline translation at all.
410--
411-- > noNewlineTranslation  = NewlineMode { inputNL  = LF, outputNL = LF }
412--
413noNewlineTranslation :: NewlineMode
414noNewlineTranslation  = NewlineMode { inputNL  = LF, outputNL = LF }
415
416-- ---------------------------------------------------------------------------
417-- Show instance for Handles
418
419-- handle types are 'show'n when printing error msgs, so
420-- we provide a more user-friendly Show instance for it
421-- than the derived one.
422
423-- | @since 4.1.0.0
424instance Show HandleType where
425  showsPrec _ t =
426    case t of
427      ClosedHandle      -> showString "closed"
428      SemiClosedHandle  -> showString "semi-closed"
429      ReadHandle        -> showString "readable"
430      WriteHandle       -> showString "writable"
431      AppendHandle      -> showString "writable (append)"
432      ReadWriteHandle   -> showString "read-writable"
433
434-- | @since 4.1.0.0
435instance Show Handle where
436  showsPrec _ (FileHandle   file _)   = showHandle file
437  showsPrec _ (DuplexHandle file _ _) = showHandle file
438
439showHandle :: FilePath -> String -> String
440showHandle file = showString "{handle: " . showString file . showString "}"
441
442