1-- | Text IO using the UTF8 character encoding.
2
3module Agda.Utils.IO.UTF8
4  ( readTextFile
5  , Agda.Utils.IO.UTF8.writeFile
6  , writeTextToFile
7  ) where
8
9import Data.Text.Lazy (Text)
10import qualified Data.Text.Lazy as T
11import qualified Data.Text.Lazy.IO as T
12import qualified System.IO as IO
13
14-- | Converts many character sequences which may be interpreted as
15-- line or paragraph separators into '\n'.
16--
17-- Note that '\r\n' is assumed to have already been converted to '\n'.
18
19convertLineEndings :: Text -> Text
20convertLineEndings = T.map convert
21  where
22  -- ASCII:
23  convert '\x000D' = '\n'  -- CR  (Carriage return)
24  convert '\x000C' = '\n'  -- FF  (Form feed)
25  -- Unicode:
26  convert '\x0085' = '\n'  -- NEXT LINE
27  convert '\x2028' = '\n'  -- LINE SEPARATOR
28  convert '\x2029' = '\n'  -- PARAGRAPH SEPARATOR
29  -- Not a line ending (or '\x000A'):
30  convert c        = c
31
32-- | Reads a UTF8-encoded text file and converts many character
33-- sequences which may be interpreted as line or paragraph separators
34-- into '\n'.
35
36readTextFile :: FilePath -> IO Text
37readTextFile file = convertLineEndings <$> do
38  h <- IO.openFile file IO.ReadMode
39  IO.hSetNewlineMode h $
40    IO.NewlineMode { IO.inputNL = IO.CRLF, IO.outputNL = IO.LF }
41  IO.hSetEncoding h IO.utf8
42  T.hGetContents h
43
44-- | Writes a UTF8-encoded text file. The native convention for line
45-- endings is used.
46
47writeFile :: FilePath -> String -> IO ()
48writeFile file s = IO.withFile file IO.WriteMode $ \h -> do
49  IO.hSetEncoding h IO.utf8
50  IO.hPutStr h s
51
52-- | Writes a UTF8-encoded text file. The native convention for line
53-- endings is used.
54
55writeTextToFile :: FilePath -> Text -> IO ()
56writeTextToFile file s = IO.withFile file IO.WriteMode $ \h -> do
57  IO.hSetEncoding h IO.utf8
58  T.hPutStr h s
59