1{-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-} 2#if __GLASGOW_HASKELL__ >= 702 3{-# LANGUAGE Trustworthy #-} 4#endif 5-- | 6-- Module : Data.Text.IO 7-- Copyright : (c) 2009, 2010 Bryan O'Sullivan, 8-- (c) 2009 Simon Marlow 9-- License : BSD-style 10-- Maintainer : bos@serpentine.com 11-- Portability : GHC 12-- 13-- Efficient locale-sensitive support for text I\/O. 14-- 15-- Skip past the synopsis for some important notes on performance and 16-- portability across different versions of GHC. 17 18module Data.Text.IO 19 ( 20 -- * Performance 21 -- $performance 22 23 -- * Locale support 24 -- $locale 25 -- * File-at-a-time operations 26 readFile 27 , writeFile 28 , appendFile 29 -- * Operations on handles 30 , hGetContents 31 , hGetChunk 32 , hGetLine 33 , hPutStr 34 , hPutStrLn 35 -- * Special cases for standard input and output 36 , interact 37 , getContents 38 , getLine 39 , putStr 40 , putStrLn 41 ) where 42 43import Data.Text (Text) 44import Prelude hiding (appendFile, getContents, getLine, interact, 45 putStr, putStrLn, readFile, writeFile) 46import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout, 47 withFile) 48import qualified Control.Exception as E 49import Control.Monad (liftM2, when) 50import Data.IORef (readIORef, writeIORef) 51import qualified Data.Text as T 52import Data.Text.Internal.Fusion (stream) 53import Data.Text.Internal.Fusion.Types (Step(..), Stream(..)) 54import Data.Text.Internal.IO (hGetLineWith, readChunk) 55import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer, 56 RawCharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer, 57 writeCharBuf) 58import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType)) 59import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle, 60 wantWritableHandle) 61import GHC.IO.Handle.Text (commitBuffer') 62import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), 63 HandleType(..), Newline(..)) 64import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell) 65import System.IO.Error (isEOFError) 66 67-- $performance 68-- #performance# 69-- 70-- The functions in this module obey the runtime system's locale, 71-- character set encoding, and line ending conversion settings. 72-- 73-- If you know in advance that you will be working with data that has 74-- a specific encoding (e.g. UTF-8), and your application is highly 75-- performance sensitive, you may find that it is faster to perform 76-- I\/O with bytestrings and to encode and decode yourself than to use 77-- the functions in this module. 78-- 79-- Whether this will hold depends on the version of GHC you are using, 80-- the platform you are working on, the data you are working with, and 81-- the encodings you are using, so be sure to test for yourself. 82 83-- | The 'readFile' function reads a file and returns the contents of 84-- the file as a string. The entire file is read strictly, as with 85-- 'getContents'. 86readFile :: FilePath -> IO Text 87readFile name = openFile name ReadMode >>= hGetContents 88 89-- | Write a string to a file. The file is truncated to zero length 90-- before writing begins. 91writeFile :: FilePath -> Text -> IO () 92writeFile p = withFile p WriteMode . flip hPutStr 93 94-- | Write a string the end of a file. 95appendFile :: FilePath -> Text -> IO () 96appendFile p = withFile p AppendMode . flip hPutStr 97 98catchError :: String -> Handle -> Handle__ -> IOError -> IO (Text, Bool) 99catchError caller h Handle__{..} err 100 | isEOFError err = do 101 buf <- readIORef haCharBuffer 102 return $ if isEmptyBuffer buf 103 then (T.empty, True) 104 else (T.singleton '\r', True) 105 | otherwise = E.throwIO (augmentIOError err caller h) 106 107-- | Wrap readChunk and return a value indicating if we're reached the EOF. 108-- This is needed because unpack_nl is unable to discern the difference 109-- between a buffer with just \r due to EOF or because not enough data was left 110-- for decoding. e.g. the final character decoded from the byte buffer was \r. 111readChunkEof :: Handle__ -> CharBuffer -> IO (Text, Bool) 112readChunkEof hh buf = do t <- readChunk hh buf 113 return (t, False) 114 115-- | /Experimental./ Read a single chunk of strict text from a 116-- 'Handle'. The size of the chunk depends on the amount of input 117-- currently buffered. 118-- 119-- This function blocks only if there is no data available, and EOF 120-- has not yet been reached. Once EOF is reached, this function 121-- returns an empty string instead of throwing an exception. 122hGetChunk :: Handle -> IO Text 123hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk 124 where 125 readSingleChunk hh@Handle__{..} = do 126 buf <- readIORef haCharBuffer 127 (t, _) <- readChunkEof hh buf `E.catch` catchError "hGetChunk" h hh 128 return (hh, t) 129 130-- | Read the remaining contents of a 'Handle' as a string. The 131-- 'Handle' is closed once the contents have been read, or if an 132-- exception is thrown. 133-- 134-- Internally, this function reads a chunk at a time from the 135-- lower-level buffering abstraction, and concatenates the chunks into 136-- a single string once the entire file has been read. 137-- 138-- As a result, it requires approximately twice as much memory as its 139-- result to construct its result. For files more than a half of 140-- available RAM in size, this may result in memory exhaustion. 141hGetContents :: Handle -> IO Text 142hGetContents h = do 143 chooseGoodBuffering h 144 wantReadableHandle "hGetContents" h readAll 145 where 146 readAll hh@Handle__{..} = do 147 let readChunks = do 148 buf <- readIORef haCharBuffer 149 (t, eof) <- readChunkEof hh buf 150 `E.catch` catchError "hGetContents" h hh 151 if eof 152 then return [t] 153 else (t:) `fmap` readChunks 154 ts <- readChunks 155 (hh', _) <- hClose_help hh 156 return (hh'{haType=ClosedHandle}, T.concat ts) 157 158-- | Use a more efficient buffer size if we're reading in 159-- block-buffered mode with the default buffer size. When we can 160-- determine the size of the handle we're reading, set the buffer size 161-- to that, so that we can read the entire file in one chunk. 162-- Otherwise, use a buffer size of at least 16KB. 163chooseGoodBuffering :: Handle -> IO () 164chooseGoodBuffering h = do 165 bufMode <- hGetBuffering h 166 case bufMode of 167 BlockBuffering Nothing -> do 168 d <- E.catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) -> 169 if ioe_type e == InappropriateType 170 then return 16384 -- faster than the 2KB default 171 else E.throwIO e 172 when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d 173 _ -> return () 174 175-- | Read a single line from a handle. 176hGetLine :: Handle -> IO Text 177hGetLine = hGetLineWith T.concat 178 179-- | Write a string to a handle. 180hPutStr :: Handle -> Text -> IO () 181-- This function is lifted almost verbatim from GHC.IO.Handle.Text. 182hPutStr h t = do 183 (buffer_mode, nl) <- 184 wantWritableHandle "hPutStr" h $ \h_ -> do 185 bmode <- getSpareBuffer h_ 186 return (bmode, haOutputNL h_) 187 let str = stream t 188 case buffer_mode of 189 (NoBuffering, _) -> hPutChars h str 190 (LineBuffering, buf) -> writeLines h nl buf str 191 (BlockBuffering _, buf) 192 | nl == CRLF -> writeBlocksCRLF h buf str 193 | otherwise -> writeBlocksRaw h buf str 194 195hPutChars :: Handle -> Stream Char -> IO () 196hPutChars h (Stream next0 s0 _len) = loop s0 197 where 198 loop !s = case next0 s of 199 Done -> return () 200 Skip s' -> loop s' 201 Yield x s' -> hPutChar h x >> loop s' 202 203-- The following functions are largely lifted from GHC.IO.Handle.Text, 204-- but adapted to a coinductive stream of data instead of an inductive 205-- list. 206-- 207-- We have several variations of more or less the same code for 208-- performance reasons. Splitting the original buffered write 209-- function into line- and block-oriented versions gave us a 2.1x 210-- performance improvement. Lifting out the raw/cooked newline 211-- handling gave a few more percent on top. 212 213writeLines :: Handle -> Newline -> Buffer CharBufElem -> Stream Char -> IO () 214writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0 215 where 216 outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) 217 where 218 inner !s !n = 219 case next0 s of 220 Done -> commit n False{-no flush-} True{-release-} >> return () 221 Skip s' -> inner s' n 222 Yield x s' 223 | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s 224 | x == '\n' -> do 225 n' <- if nl == CRLF 226 then do n1 <- writeCharBuf raw n '\r' 227 writeCharBuf raw n1 '\n' 228 else writeCharBuf raw n x 229 commit n' True{-needs flush-} False >>= outer s' 230 | otherwise -> writeCharBuf raw n x >>= inner s' 231 commit = commitBuffer h raw len 232 233writeBlocksCRLF :: Handle -> Buffer CharBufElem -> Stream Char -> IO () 234writeBlocksCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0 235 where 236 outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) 237 where 238 inner !s !n = 239 case next0 s of 240 Done -> commit n False{-no flush-} True{-release-} >> return () 241 Skip s' -> inner s' n 242 Yield x s' 243 | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s 244 | x == '\n' -> do n1 <- writeCharBuf raw n '\r' 245 writeCharBuf raw n1 '\n' >>= inner s' 246 | otherwise -> writeCharBuf raw n x >>= inner s' 247 commit = commitBuffer h raw len 248 249writeBlocksRaw :: Handle -> Buffer CharBufElem -> Stream Char -> IO () 250writeBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0 251 where 252 outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) 253 where 254 inner !s !n = 255 case next0 s of 256 Done -> commit n False{-no flush-} True{-release-} >> return () 257 Skip s' -> inner s' n 258 Yield x s' 259 | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s 260 | otherwise -> writeCharBuf raw n x >>= inner s' 261 commit = commitBuffer h raw len 262 263-- This function is completely lifted from GHC.IO.Handle.Text. 264getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer) 265getSpareBuffer Handle__{haCharBuffer=ref, 266 haBuffers=spare_ref, 267 haBufferMode=mode} 268 = do 269 case mode of 270 NoBuffering -> return (mode, error "no buffer!") 271 _ -> do 272 bufs <- readIORef spare_ref 273 buf <- readIORef ref 274 case bufs of 275 BufferListCons b rest -> do 276 writeIORef spare_ref rest 277 return ( mode, emptyBuffer b (bufSize buf) WriteBuffer) 278 BufferListNil -> do 279 new_buf <- newCharBuffer (bufSize buf) WriteBuffer 280 return (mode, new_buf) 281 282 283-- This function is completely lifted from GHC.IO.Handle.Text. 284commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool 285 -> IO CharBuffer 286commitBuffer hdl !raw !sz !count flush release = 287 wantWritableHandle "commitAndReleaseBuffer" hdl $ 288 commitBuffer' raw sz count flush release 289{-# INLINE commitBuffer #-} 290 291-- | Write a string to a handle, followed by a newline. 292hPutStrLn :: Handle -> Text -> IO () 293hPutStrLn h t = hPutStr h t >> hPutChar h '\n' 294 295-- | The 'interact' function takes a function of type @Text -> Text@ 296-- as its argument. The entire input from the standard input device is 297-- passed to this function as its argument, and the resulting string 298-- is output on the standard output device. 299interact :: (Text -> Text) -> IO () 300interact f = putStr . f =<< getContents 301 302-- | Read all user input on 'stdin' as a single string. 303getContents :: IO Text 304getContents = hGetContents stdin 305 306-- | Read a single line of user input from 'stdin'. 307getLine :: IO Text 308getLine = hGetLine stdin 309 310-- | Write a string to 'stdout'. 311putStr :: Text -> IO () 312putStr = hPutStr stdout 313 314-- | Write a string to 'stdout', followed by a newline. 315putStrLn :: Text -> IO () 316putStrLn = hPutStrLn stdout 317 318-- $locale 319-- 320-- /Note/: The behaviour of functions in this module depends on the 321-- version of GHC you are using. 322-- 323-- Beginning with GHC 6.12, text I\/O is performed using the system or 324-- handle's current locale and line ending conventions. 325-- 326-- Under GHC 6.10 and earlier, the system I\/O libraries do not 327-- support locale-sensitive I\/O or line ending conversion. On these 328-- versions of GHC, functions in this library all use UTF-8. What 329-- does this mean in practice? 330-- 331-- * All data that is read will be decoded as UTF-8. 332-- 333-- * Before data is written, it is first encoded as UTF-8. 334-- 335-- * On both reading and writing, the platform's native newline 336-- conversion is performed. 337-- 338-- If you must use a non-UTF-8 locale on an older version of GHC, you 339-- will have to perform the transcoding yourself, e.g. as follows: 340-- 341-- > import qualified Data.ByteString as B 342-- > import Data.Text (Text) 343-- > import Data.Text.Encoding (encodeUtf16) 344-- > 345-- > putStr_Utf16LE :: Text -> IO () 346-- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t) 347-- 348-- On transcoding errors, an 'IOError' exception is thrown. You can 349-- use the API in "Data.Text.Encoding" if you need more control over 350-- error handling or transcoding. 351