1{-# LANGUAGE OverloadedStrings #-}
2-- |
3-- Module      : StringAndText
4-- Copyright   : (c) 2010 Simon Meier
5-- License     : BSD3-style (see LICENSE)
6--
7-- Maintainer  : Leon P Smith <leon@melding-monads.com>
8-- Stability   : experimental
9-- Portability : tested on GHC only
10--
11-- Benchmarking of String and Text serialization.
12module StringAndText (main)  where
13
14import Data.Char (ord)
15import Data.Monoid
16
17import Criterion.Main
18
19import Foreign (plusPtr)
20import qualified Data.ByteString      as S
21import qualified Data.ByteString.Lazy as L
22import qualified Data.Text               as TS
23import qualified Data.Text.Encoding      as TS
24import qualified Data.Text.Lazy          as TL
25import qualified Data.Text.Lazy.Encoding as TL
26
27import qualified Blaze.ByteString.Builder           as Blaze
28import qualified Data.ByteString.Builder.Internal   as Blaze
29import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
30import qualified Blaze.ByteString.Builder.Html.Utf8 as Blaze
31
32main :: IO ()
33main = defaultMain
34    [ bench "TL.unpack :: LazyText -> String" $ nf
35        TL.unpack benchLazyText
36
37    , bench "TL.foldr  :: LazyText -> String" $ nf
38        (TL.foldr (:) []) benchLazyText
39
40    , bench "fromString :: String --[Utf8 encoding]--> L.ByteString" $ whnf
41        (L.length . Blaze.toLazyByteString . Blaze.fromString) benchString
42
43    , bench "fromStrictTextUnpacked :: StrictText --[Utf8 encoding]--> L.ByteString" $ whnf
44        (L.length . Blaze.toLazyByteString . Blaze.fromText) benchStrictText
45
46    -- , bench "fromStrictTextFolded :: StrictText --[Utf8 encoding]--> L.ByteString" $ whnf
47        -- (L.length . Blaze.toLazyByteString . fromStrictTextFolded) benchStrictText
48
49    , bench "TS.encodeUtf8 :: StrictText --[Utf8 encoding]--> S.ByteString" $ whnf
50        (TS.encodeUtf8) benchStrictText
51
52    , bench "fromLazyTextUnpacked :: LazyText --[Utf8 encoding]--> L.ByteString" $ whnf
53        (L.length . Blaze.toLazyByteString . Blaze.fromLazyText) benchLazyText
54
55    -- , bench "fromLazyTextFolded :: LazyText --[Utf8 encoding]--> L.ByteString" $ whnf
56        -- (L.length . Blaze.toLazyByteString . fromLazyTextFolded) benchLazyText
57
58    , bench "TL.encodeUtf8 :: LazyText --[Utf8 encoding]--> L.ByteString" $ whnf
59        (L.length . TL.encodeUtf8) benchLazyText
60
61    , bench "fromHtmlEscapedString :: String --[Html esc. & Utf8 encoding]--> L.ByteString" $ whnf
62        (L.length . Blaze.toLazyByteString . Blaze.fromHtmlEscapedString) benchString
63
64    , bench "fromHtmlEscapedStrictTextUnpacked :: StrictText --[HTML esc. & Utf8 encoding]--> L.ByteString" $ whnf
65        (L.length . Blaze.toLazyByteString . Blaze.fromHtmlEscapedText) benchStrictText
66
67    , bench "fromHtmlEscapedLazyTextUnpacked :: LazyText --[HTML esc. & Utf8 encoding]--> L.ByteString" $ whnf
68        (L.length . Blaze.toLazyByteString . Blaze.fromHtmlEscapedLazyText) benchLazyText
69
70    ]
71
72n :: Int
73n = 100000
74
75benchString :: String
76benchString = take n $ concatMap show [(1::Int)..]
77{-# NOINLINE benchString #-}
78
79benchStrictText :: TS.Text
80benchStrictText = TS.pack benchString
81{-# NOINLINE benchStrictText #-}
82
83benchLazyText :: TL.Text
84benchLazyText = TL.pack benchString
85{-# NOINLINE benchLazyText #-}
86
87{-
88
89-- | Encode the 'TS.Text' as UTF-8 by folding it and filling the raw buffer
90-- directly.
91fromStrictTextFolded :: TS.Text -> Blaze.Builder
92fromStrictTextFolded t = Blaze.fromBuildStepCont $ \k -> TS.foldr step k t
93  where
94    step c k pf pe
95      | pf' <= pe = do
96          io pf
97          k pf' pe  -- here it would be great, if we wouldn't have to pass
98                    -- around pe: requires a more powerful fold for StrictText.
99      | otherwise =
100          return $ Blaze.bufferFull size pf $ \(Blaze.BufRange pfNew peNew) -> do
101            let !br' = Blaze.BufRange (pfNew `plusPtr` size) peNew
102            io pfNew
103            k br'
104      where
105        pf' = pf `plusPtr` size
106        Blaze.Write size io = Blaze.writeChar c
107{-# INLINE fromStrictTextFolded #-}
108
109-- | Encode the 'TL.Text' as UTF-8 by folding it and filling the raw buffer
110-- directly.
111fromLazyTextFolded :: TL.Text -> Blaze.Builder
112fromLazyTextFolded t = Blaze.fromBuildStepContBuilder $ \k -> TL.foldr step k t
113  where
114    step c k pf pe
115      | pf' <= pe = do
116          io pf
117          k pf' pe  -- here it would be great, if we wouldn't have to pass
118                    -- around pe: requires a more powerful fold for StrictText.
119      | otherwise =
120          return $ Blaze.bufferFull size pf $ \(Blaze.BufRange pfNew peNew) -> do
121            let !br' = Blaze.BufRange (pfNew `plusPtr` size) peNew
122            io pfNew
123            k br'
124      where
125        pf' = pf `plusPtr` size
126        Blaze.Write size io = Blaze.writeChar c
127{-# INLINE fromLazyTextFolded #-}
128-}
129