1{-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
2#if __GLASGOW_HASKELL__ >= 702
3{-# LANGUAGE Trustworthy #-}
4#endif
5-- |
6-- Module      : Data.Text.IO
7-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
8--               (c) 2009 Simon Marlow
9-- License     : BSD-style
10-- Maintainer  : bos@serpentine.com
11-- Portability : GHC
12--
13-- Efficient locale-sensitive support for text I\/O.
14--
15-- Skip past the synopsis for some important notes on performance and
16-- portability across different versions of GHC.
17
18module Data.Text.IO
19    (
20    -- * Performance
21    -- $performance
22
23    -- * Locale support
24    -- $locale
25    -- * File-at-a-time operations
26      readFile
27    , writeFile
28    , appendFile
29    -- * Operations on handles
30    , hGetContents
31    , hGetChunk
32    , hGetLine
33    , hPutStr
34    , hPutStrLn
35    -- * Special cases for standard input and output
36    , interact
37    , getContents
38    , getLine
39    , putStr
40    , putStrLn
41    ) where
42
43import Data.Text (Text)
44import Prelude hiding (appendFile, getContents, getLine, interact,
45                       putStr, putStrLn, readFile, writeFile)
46import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
47                  withFile)
48import qualified Control.Exception as E
49import Control.Monad (liftM2, when)
50import Data.IORef (readIORef, writeIORef)
51import qualified Data.Text as T
52import Data.Text.Internal.Fusion (stream)
53import Data.Text.Internal.Fusion.Types (Step(..), Stream(..))
54import Data.Text.Internal.IO (hGetLineWith, readChunk)
55import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer,
56                      RawCharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer,
57                      writeCharBuf)
58import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType))
59import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle,
60                                wantWritableHandle)
61import GHC.IO.Handle.Text (commitBuffer')
62import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..),
63                            HandleType(..), Newline(..))
64import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
65import System.IO.Error (isEOFError)
66
67-- $performance
68-- #performance#
69--
70-- The functions in this module obey the runtime system's locale,
71-- character set encoding, and line ending conversion settings.
72--
73-- If you know in advance that you will be working with data that has
74-- a specific encoding (e.g. UTF-8), and your application is highly
75-- performance sensitive, you may find that it is faster to perform
76-- I\/O with bytestrings and to encode and decode yourself than to use
77-- the functions in this module.
78--
79-- Whether this will hold depends on the version of GHC you are using,
80-- the platform you are working on, the data you are working with, and
81-- the encodings you are using, so be sure to test for yourself.
82
83-- | The 'readFile' function reads a file and returns the contents of
84-- the file as a string.  The entire file is read strictly, as with
85-- 'getContents'.
86readFile :: FilePath -> IO Text
87readFile name = openFile name ReadMode >>= hGetContents
88
89-- | Write a string to a file.  The file is truncated to zero length
90-- before writing begins.
91writeFile :: FilePath -> Text -> IO ()
92writeFile p = withFile p WriteMode . flip hPutStr
93
94-- | Write a string the end of a file.
95appendFile :: FilePath -> Text -> IO ()
96appendFile p = withFile p AppendMode . flip hPutStr
97
98catchError :: String -> Handle -> Handle__ -> IOError -> IO (Text, Bool)
99catchError caller h Handle__{..} err
100    | isEOFError err = do
101        buf <- readIORef haCharBuffer
102        return $ if isEmptyBuffer buf
103                 then (T.empty, True)
104                 else (T.singleton '\r', True)
105    | otherwise = E.throwIO (augmentIOError err caller h)
106
107-- | Wrap readChunk and return a value indicating if we're reached the EOF.
108-- This is needed because unpack_nl is unable to discern the difference
109-- between a buffer with just \r due to EOF or because not enough data was left
110-- for decoding. e.g. the final character decoded from the byte buffer was \r.
111readChunkEof :: Handle__ -> CharBuffer -> IO (Text, Bool)
112readChunkEof hh buf = do t <- readChunk hh buf
113                         return (t, False)
114
115-- | /Experimental./ Read a single chunk of strict text from a
116-- 'Handle'. The size of the chunk depends on the amount of input
117-- currently buffered.
118--
119-- This function blocks only if there is no data available, and EOF
120-- has not yet been reached. Once EOF is reached, this function
121-- returns an empty string instead of throwing an exception.
122hGetChunk :: Handle -> IO Text
123hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk
124 where
125  readSingleChunk hh@Handle__{..} = do
126    buf <- readIORef haCharBuffer
127    (t, _) <- readChunkEof hh buf `E.catch` catchError "hGetChunk" h hh
128    return (hh, t)
129
130-- | Read the remaining contents of a 'Handle' as a string.  The
131-- 'Handle' is closed once the contents have been read, or if an
132-- exception is thrown.
133--
134-- Internally, this function reads a chunk at a time from the
135-- lower-level buffering abstraction, and concatenates the chunks into
136-- a single string once the entire file has been read.
137--
138-- As a result, it requires approximately twice as much memory as its
139-- result to construct its result.  For files more than a half of
140-- available RAM in size, this may result in memory exhaustion.
141hGetContents :: Handle -> IO Text
142hGetContents h = do
143  chooseGoodBuffering h
144  wantReadableHandle "hGetContents" h readAll
145 where
146  readAll hh@Handle__{..} = do
147    let readChunks = do
148          buf <- readIORef haCharBuffer
149          (t, eof) <- readChunkEof hh buf
150                         `E.catch` catchError "hGetContents" h hh
151          if eof
152            then return [t]
153            else (t:) `fmap` readChunks
154    ts <- readChunks
155    (hh', _) <- hClose_help hh
156    return (hh'{haType=ClosedHandle}, T.concat ts)
157
158-- | Use a more efficient buffer size if we're reading in
159-- block-buffered mode with the default buffer size.  When we can
160-- determine the size of the handle we're reading, set the buffer size
161-- to that, so that we can read the entire file in one chunk.
162-- Otherwise, use a buffer size of at least 16KB.
163chooseGoodBuffering :: Handle -> IO ()
164chooseGoodBuffering h = do
165  bufMode <- hGetBuffering h
166  case bufMode of
167    BlockBuffering Nothing -> do
168      d <- E.catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
169           if ioe_type e == InappropriateType
170           then return 16384 -- faster than the 2KB default
171           else E.throwIO e
172      when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d
173    _ -> return ()
174
175-- | Read a single line from a handle.
176hGetLine :: Handle -> IO Text
177hGetLine = hGetLineWith T.concat
178
179-- | Write a string to a handle.
180hPutStr :: Handle -> Text -> IO ()
181-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
182hPutStr h t = do
183  (buffer_mode, nl) <-
184       wantWritableHandle "hPutStr" h $ \h_ -> do
185                     bmode <- getSpareBuffer h_
186                     return (bmode, haOutputNL h_)
187  let str = stream t
188  case buffer_mode of
189     (NoBuffering, _)        -> hPutChars h str
190     (LineBuffering, buf)    -> writeLines h nl buf str
191     (BlockBuffering _, buf)
192         | nl == CRLF        -> writeBlocksCRLF h buf str
193         | otherwise         -> writeBlocksRaw h buf str
194
195hPutChars :: Handle -> Stream Char -> IO ()
196hPutChars h (Stream next0 s0 _len) = loop s0
197  where
198    loop !s = case next0 s of
199                Done       -> return ()
200                Skip s'    -> loop s'
201                Yield x s' -> hPutChar h x >> loop s'
202
203-- The following functions are largely lifted from GHC.IO.Handle.Text,
204-- but adapted to a coinductive stream of data instead of an inductive
205-- list.
206--
207-- We have several variations of more or less the same code for
208-- performance reasons.  Splitting the original buffered write
209-- function into line- and block-oriented versions gave us a 2.1x
210-- performance improvement.  Lifting out the raw/cooked newline
211-- handling gave a few more percent on top.
212
213writeLines :: Handle -> Newline -> Buffer CharBufElem -> Stream Char -> IO ()
214writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0
215 where
216  outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
217   where
218    inner !s !n =
219      case next0 s of
220        Done -> commit n False{-no flush-} True{-release-} >> return ()
221        Skip s' -> inner s' n
222        Yield x s'
223          | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
224          | x == '\n'    -> do
225                   n' <- if nl == CRLF
226                         then do n1 <- writeCharBuf raw n '\r'
227                                 writeCharBuf raw n1 '\n'
228                         else writeCharBuf raw n x
229                   commit n' True{-needs flush-} False >>= outer s'
230          | otherwise    -> writeCharBuf raw n x >>= inner s'
231    commit = commitBuffer h raw len
232
233writeBlocksCRLF :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
234writeBlocksCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0
235 where
236  outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
237   where
238    inner !s !n =
239      case next0 s of
240        Done -> commit n False{-no flush-} True{-release-} >> return ()
241        Skip s' -> inner s' n
242        Yield x s'
243          | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
244          | x == '\n'    -> do n1 <- writeCharBuf raw n '\r'
245                               writeCharBuf raw n1 '\n' >>= inner s'
246          | otherwise    -> writeCharBuf raw n x >>= inner s'
247    commit = commitBuffer h raw len
248
249writeBlocksRaw :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
250writeBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0
251 where
252  outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
253   where
254    inner !s !n =
255      case next0 s of
256        Done -> commit n False{-no flush-} True{-release-} >> return ()
257        Skip s' -> inner s' n
258        Yield x s'
259          | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
260          | otherwise    -> writeCharBuf raw n x >>= inner s'
261    commit = commitBuffer h raw len
262
263-- This function is completely lifted from GHC.IO.Handle.Text.
264getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer)
265getSpareBuffer Handle__{haCharBuffer=ref,
266                        haBuffers=spare_ref,
267                        haBufferMode=mode}
268 = do
269   case mode of
270     NoBuffering -> return (mode, error "no buffer!")
271     _ -> do
272          bufs <- readIORef spare_ref
273          buf  <- readIORef ref
274          case bufs of
275            BufferListCons b rest -> do
276                writeIORef spare_ref rest
277                return ( mode, emptyBuffer b (bufSize buf) WriteBuffer)
278            BufferListNil -> do
279                new_buf <- newCharBuffer (bufSize buf) WriteBuffer
280                return (mode, new_buf)
281
282
283-- This function is completely lifted from GHC.IO.Handle.Text.
284commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool
285             -> IO CharBuffer
286commitBuffer hdl !raw !sz !count flush release =
287  wantWritableHandle "commitAndReleaseBuffer" hdl $
288     commitBuffer' raw sz count flush release
289{-# INLINE commitBuffer #-}
290
291-- | Write a string to a handle, followed by a newline.
292hPutStrLn :: Handle -> Text -> IO ()
293hPutStrLn h t = hPutStr h t >> hPutChar h '\n'
294
295-- | The 'interact' function takes a function of type @Text -> Text@
296-- as its argument. The entire input from the standard input device is
297-- passed to this function as its argument, and the resulting string
298-- is output on the standard output device.
299interact :: (Text -> Text) -> IO ()
300interact f = putStr . f =<< getContents
301
302-- | Read all user input on 'stdin' as a single string.
303getContents :: IO Text
304getContents = hGetContents stdin
305
306-- | Read a single line of user input from 'stdin'.
307getLine :: IO Text
308getLine = hGetLine stdin
309
310-- | Write a string to 'stdout'.
311putStr :: Text -> IO ()
312putStr = hPutStr stdout
313
314-- | Write a string to 'stdout', followed by a newline.
315putStrLn :: Text -> IO ()
316putStrLn = hPutStrLn stdout
317
318-- $locale
319--
320-- /Note/: The behaviour of functions in this module depends on the
321-- version of GHC you are using.
322--
323-- Beginning with GHC 6.12, text I\/O is performed using the system or
324-- handle's current locale and line ending conventions.
325--
326-- Under GHC 6.10 and earlier, the system I\/O libraries do not
327-- support locale-sensitive I\/O or line ending conversion.  On these
328-- versions of GHC, functions in this library all use UTF-8.  What
329-- does this mean in practice?
330--
331-- * All data that is read will be decoded as UTF-8.
332--
333-- * Before data is written, it is first encoded as UTF-8.
334--
335-- * On both reading and writing, the platform's native newline
336--   conversion is performed.
337--
338-- If you must use a non-UTF-8 locale on an older version of GHC, you
339-- will have to perform the transcoding yourself, e.g. as follows:
340--
341-- > import qualified Data.ByteString as B
342-- > import Data.Text (Text)
343-- > import Data.Text.Encoding (encodeUtf16)
344-- >
345-- > putStr_Utf16LE :: Text -> IO ()
346-- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t)
347--
348-- On transcoding errors, an 'IOError' exception is thrown. You can
349-- use the API in "Data.Text.Encoding" if you need more control over
350-- error handling or transcoding.
351