1{-# LANGUAGE OverloadedStrings #-}
2-- |
3-- Copyright   : (c) 2011 Simon Meier
4-- License     : BSD3-style (see LICENSE)
5--
6-- Maintainer  : Leon P Smith <leon@melding-monads.com>
7-- Stability   : experimental
8-- Portability : tested on GHC only
9--
10-- Benchmarking IO output speed of writing a string in Utf8 encoding to a file.
11module Utf8IO (main)  where
12
13import           Control.Monad
14import           Control.Exception (evaluate)
15
16import qualified Codec.Binary.UTF8.Light as Utf8Light
17
18import           Data.Char (chr)
19import           Data.Time.Clock
20import qualified Data.ByteString.Lazy as L
21import qualified Data.ByteString.Lazy.UTF8 as Utf8String
22import qualified Data.Text.Lazy          as TL
23import qualified Data.Text.Lazy.Encoding as TL
24
25import           System.IO
26import           System.Environment
27
28import           Blaze.ByteString.Builder
29import           Blaze.ByteString.Builder.Internal (defaultBufferSize)
30import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
31
32
33-- | Write using the standard text utf8 encoding function built into 'base'.
34writeUtf8_base :: String -> FilePath -> IO ()
35writeUtf8_base cs file =
36    withFile file WriteMode $ \h -> do
37        hSetEncoding h utf8
38        hPutStr h cs
39
40-- | Write using utf8 encoding as provided by the 'blaze-builder' library.
41writeUtf8_blaze :: String -> FilePath -> IO ()
42writeUtf8_blaze cs file = L.writeFile file $ toLazyByteString $ Blaze.fromString cs
43
44-- | Write using utf8 encoding as provided by the 'text' library.
45writeUtf8_text :: TL.Text -> FilePath -> IO ()
46writeUtf8_text tx file = L.writeFile file $ TL.encodeUtf8 tx
47
48-- | Write using utf8 encoding as provided by the 'utf8-string' library.
49writeUtf8_string :: String -> FilePath -> IO ()
50writeUtf8_string cs file = L.writeFile file $ Utf8String.fromString cs
51
52-- | Write using utf8 encoding as provided by the 'utf8-light' library. Note
53-- that this library only allows encoding the whole string as a strict
54-- bytestring. That might make it unusable in some circumstances.
55{-# NOINLINE writeUtf8_light #-}
56writeUtf8_light :: String -> FilePath -> IO ()
57writeUtf8_light cs file = Utf8Light.writeUTF8File file cs
58
59
60main :: IO ()
61main = do
62    [how, len, file] <- getArgs
63    let blocksize = 32000
64        block     = map chr [0..blocksize]
65        n         = read len
66        cs        = take n $ cycle $ block
67        tx        = TL.pack cs
68    writer <- case how of
69        "base"        -> return $ writeUtf8_base cs
70        "blaze"       -> return $ writeUtf8_blaze cs
71        "utf8-string" -> return $ writeUtf8_string cs
72
73        -- utf8-light is missing support for lazy bytestrings => test 100 times
74        -- writing a 100 times smaller string to avoid out-of-memory errors.
75        "utf8-light"  -> return $ \f -> sequence_ $ replicate 100 $
76                                        writeUtf8_light (take (n `div` 100) cs) f
77
78        "via-text"    -> do return $ writeUtf8_text tx
79
80        -- Here, we ensure that the text tx is already packed before timing.
81        "text"        -> do _ <- evaluate (TL.length tx)
82                            return $ writeUtf8_text tx
83        _             -> error $ "unknown writer '" ++ how ++ "'"
84    t <- timed_ $ writer file
85    putStrLn $ how ++ ": " ++ show t
86
87------------------------------------------------------------------------------
88-- Timing
89------------------------------------------------------------------------------
90
91-- | Execute an IO action and return its result plus the time it took to execute it.
92timed :: IO a -> IO (a, NominalDiffTime)
93timed io = do
94  t0 <- getCurrentTime
95  x <- io
96  t1 <- getCurrentTime
97  return (x, diffUTCTime t1 t0)
98
99-- | Execute an IO action and return the time it took to execute it.
100timed_ :: IO a -> IO NominalDiffTime
101timed_ = (snd `liftM`) . timed
102
103