1{-# LANGUAGE CPP, BangPatterns #-} 2-- | 3-- Module : BoundedWrite 4-- Copyright : (c) 2010 Simon Meier 5-- License : BSD3-style (see LICENSE) 6-- 7-- Maintainer : Leon P Smith <leon@melding-monads.com> 8-- Stability : experimental 9-- Portability : tested on GHC only 10-- 11-- A more general/efficient write type. 12-- 13module BoundedWrite (main) where 14 15import Foreign 16import Data.Monoid 17import Data.Char 18 19import Foreign.UPtr 20 21import qualified Data.ByteString.Internal as S 22import qualified Data.ByteString.Lazy as L 23 24import Blaze.ByteString.Builder.Internal 25import Blaze.ByteString.Builder.Write 26import Blaze.ByteString.Builder.Word 27 28import Criterion.Main 29 30------------------------------------------------------------------------------ 31-- Benchmarks 32------------------------------------------------------------------------------ 33 34main :: IO () 35main = defaultMain $ concat 36 {- 37 [ benchmark "mconcat . map (fromWriteSingleton writeChar)" 38 bfrom3Chars 39 from3Chars 40 chars3 41 ] 42 -} 43 [ benchmark "mconcat . map fromWord8" 44 (mconcat . map bfromWord8) 45 (mconcat . map fromWord8) 46 word8s 47 ] 48 where 49 benchmark name boundedF staticF x = 50 [ bench (name ++ " <- bounded write") $ 51 whnf (L.length . toLazyByteString . boundedF) x 52 , bench (name ++ " <- static write") $ 53 whnf (L.length . toLazyByteString . staticF) x 54 ] 55 56word8s :: [Word8] 57word8s = take 100000 $ cycle [0..] 58{-# NOINLINE word8s #-} 59 60chars :: [Char] 61chars = take 100000 $ ['\0'..] 62{-# NOINLINE chars #-} 63 64chars2 :: [(Char,Char)] 65chars2 = zip chars chars 66{-# NOINLINE chars2 #-} 67 68chars3 :: [(Char, Char, Char)] 69chars3 = zip3 chars (reverse chars) (reverse chars) 70{-# NOINLINE chars3 #-} 71 72bfromChars = (mconcat . map (fromBWriteSingleton bwriteChar)) 73{-# NOINLINE bfromChars #-} 74 75fromChars = (mconcat . map (fromWriteSingleton writeChar)) 76{-# NOINLINE fromChars #-} 77 78bfrom2Chars = (mconcat . map (fromBWriteSingleton (\(c1, c2) -> bwriteChar c1 `mappend` bwriteChar c2))) 79{-# NOINLINE bfrom2Chars #-} 80 81from2Chars = (mconcat . map (fromWriteSingleton (\(c1, c2) -> writeChar c1 `mappend` writeChar c2))) 82{-# NOINLINE from2Chars #-} 83 84bfrom3Chars = (mconcat . map (fromBWriteSingleton (\(c1, c2, c3) -> bwriteChar c1 `mappend` bwriteChar c2 `mappend` bwriteChar c3))) 85{-# NOINLINE bfrom3Chars #-} 86 87from3Chars = (mconcat . map (fromWriteSingleton (\(c1, c2, c3) -> writeChar c1 `mappend` writeChar c2 `mappend` writeChar c3))) 88{-# NOINLINE from3Chars #-} 89 90------------------------------------------------------------------------------ 91-- The Bounded Write Type 92------------------------------------------------------------------------------ 93 94-- * GRRR* GHC is too 'clever'... code where we branch and each branch should 95-- execute a few IO actions and then return a value cannot be taught to GHC. 96-- At least not such that it returns the value of the branches unpacked. 97-- 98-- Hmm.. at least he behaves much better for the Monoid instance of BWrite 99-- than the one for Write. Serializing UTF-8 chars gets a slowdown of a 100-- factor 2 when 2 chars are composed. Perhaps I should try out the writeList 101-- instances also, as they may be more sensitive to to much work per Char. 102-- 103data BWrite = BWrite {-# UNPACK #-} !Int (UPtr -> UPtr) 104 105newtype UWrite = UWrite { runUWrite :: UPtr -> UPtr } 106 107instance Monoid UWrite where 108 mempty = UWrite $ \x -> x 109 {-# INLINE mempty #-} 110 (UWrite uw1) `mappend` (UWrite uw2) = UWrite (\up -> uw2 (uw1 up)) 111 {-# INLINE mappend #-} 112 113instance Monoid BWrite where 114 mempty = BWrite 0 (\x -> x) 115 {-# INLINE mempty #-} 116 (BWrite b1 io1) `mappend` (BWrite b2 io2) = 117 BWrite (b1 + b2) (\op -> io2 (io1 op)) 118 {-# INLINE mappend #-} 119 120execWrite :: IO () -> UPtr -> UPtr 121execWrite io op' = S.inlinePerformIO io `seq` op' 122{-# INLINE execWrite #-} 123 124execWriteSize :: (Ptr Word8 -> IO ()) -> Int -> UPtr -> UPtr 125execWriteSize io size op = execWrite (io (uptrToPtr op)) (op `plusUPtr` size) 126{-# INLINE execWriteSize #-} 127 128staticBWrite :: Int -> (Ptr Word8 -> IO ()) -> BWrite 129staticBWrite size io = BWrite size (execWriteSize io size) 130{-# INLINE staticBWrite #-} 131 132bwriteWord8 :: Word8 -> BWrite 133bwriteWord8 x = staticBWrite 1 (`poke` x) 134{-# INLINE bwriteWord8 #-} 135 136fromBWrite :: BWrite -> Builder 137fromBWrite (BWrite size io) = 138 Builder step 139 where 140 step k !pf !pe 141 | pf `plusPtr` size <= pe = do 142 let !pf' = io (ptrToUPtr pf) 143 k (uptrToPtr pf') pe 144 | otherwise = return $ BufferFull size pf (step k) 145{-# INLINE fromBWrite #-} 146 147fromBWriteSingleton :: (a -> BWrite) -> a -> Builder 148fromBWriteSingleton write = 149 mkPut 150 where 151 mkPut x = Builder step 152 where 153 step k !pf !pe 154 | pf `plusPtr` size <= pe = do 155 let !pf' = io (ptrToUPtr pf) 156 k (uptrToPtr pf') pe 157 | otherwise = return $ BufferFull size pf (step k) 158 where 159 BWrite size io = write x 160{-# INLINE fromBWriteSingleton #-} 161 162bfromWord8 :: Word8 -> Builder 163bfromWord8 = fromBWriteSingleton bwriteWord8 164 165-- Utf-8 encoding 166----------------- 167 168bwriteChar :: Char -> BWrite 169bwriteChar c = BWrite 4 (encodeCharUtf8 f1 f2 f3 f4 c) 170 where 171 f1 x = \uptr -> execWrite (do let !ptr = uptrToPtr uptr 172 poke ptr x ) 173 (uptr `plusUPtr` 1) 174 175 f2 x1 x2 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr 176 poke ptr x1 177 poke (ptr `plusPtr` 1) x2 ) 178 (uptr `plusUPtr` 2) 179 180 f3 x1 x2 x3 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr 181 poke ptr x1 182 poke (ptr `plusPtr` 1) x2 183 poke (ptr `plusPtr` 2) x3 ) 184 (uptr `plusUPtr` 3) 185 186 f4 x1 x2 x3 x4 = \uptr -> execWrite (do let !ptr = uptrToPtr uptr 187 poke ptr x1 188 poke (ptr `plusPtr` 1) x2 189 poke (ptr `plusPtr` 2) x3 190 poke (ptr `plusPtr` 3) x4 ) 191 (uptr `plusUPtr` 4) 192{-# INLINE bwriteChar #-} 193 194writeChar :: Char -> Write 195writeChar = encodeCharUtf8 f1 f2 f3 f4 196 where 197 f1 x = Write 1 $ \ptr -> poke ptr x 198 199 f2 x1 x2 = Write 2 $ \ptr -> do poke ptr x1 200 poke (ptr `plusPtr` 1) x2 201 202 f3 x1 x2 x3 = Write 3 $ \ptr -> do poke ptr x1 203 poke (ptr `plusPtr` 1) x2 204 poke (ptr `plusPtr` 2) x3 205 206 f4 x1 x2 x3 x4 = Write 4 $ \ptr -> do poke ptr x1 207 poke (ptr `plusPtr` 1) x2 208 poke (ptr `plusPtr` 2) x3 209 poke (ptr `plusPtr` 3) x4 210{-# INLINE writeChar #-} 211 212-- | Encode a Unicode character to another datatype, using UTF-8. This function 213-- acts as an abstract way of encoding characters, as it is unaware of what 214-- needs to happen with the resulting bytes: you have to specify functions to 215-- deal with those. 216-- 217encodeCharUtf8 :: (Word8 -> a) -- ^ 1-byte UTF-8 218 -> (Word8 -> Word8 -> a) -- ^ 2-byte UTF-8 219 -> (Word8 -> Word8 -> Word8 -> a) -- ^ 3-byte UTF-8 220 -> (Word8 -> Word8 -> Word8 -> Word8 -> a) -- ^ 4-byte UTF-8 221 -> Char -- ^ Input 'Char' 222 -> a -- ^ Result 223encodeCharUtf8 f1 f2 f3 f4 c = case ord c of 224 x | x <= 0x7F -> f1 $ fromIntegral x 225 | x <= 0x07FF -> 226 let x1 = fromIntegral $ (x `shiftR` 6) + 0xC0 227 x2 = fromIntegral $ (x .&. 0x3F) + 0x80 228 in f2 x1 x2 229 | x <= 0xFFFF -> 230 let x1 = fromIntegral $ (x `shiftR` 12) + 0xE0 231 x2 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80 232 x3 = fromIntegral $ (x .&. 0x3F) + 0x80 233 in f3 x1 x2 x3 234 | otherwise -> 235 let x1 = fromIntegral $ (x `shiftR` 18) + 0xF0 236 x2 = fromIntegral $ ((x `shiftR` 12) .&. 0x3F) + 0x80 237 x3 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80 238 x4 = fromIntegral $ (x .&. 0x3F) + 0x80 239 in f4 x1 x2 x3 x4 240{-# INLINE encodeCharUtf8 #-} 241 242