1{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards, 2 UnboxedTuples #-} 3 4-- | 5-- Module : Data.Attoparsec.Text.Buffer 6-- Copyright : Bryan O'Sullivan 2007-2015 7-- License : BSD3 8-- 9-- Maintainer : bos@serpentine.com 10-- Stability : experimental 11-- Portability : GHC 12-- 13-- An immutable buffer that supports cheap appends. 14 15-- A Buffer is divided into an immutable read-only zone, followed by a 16-- mutable area that we've preallocated, but not yet written to. 17-- 18-- We overallocate at the end of a Buffer so that we can cheaply 19-- append. Since a user of an existing Buffer cannot see past the end 20-- of its immutable zone into the data that will change during an 21-- append, this is safe. 22-- 23-- Once we run out of space at the end of a Buffer, we do the usual 24-- doubling of the buffer size. 25 26module Data.Attoparsec.Text.Buffer 27 ( 28 Buffer 29 , buffer 30 , unbuffer 31 , unbufferAt 32 , length 33 , pappend 34 , iter 35 , iter_ 36 , substring 37 , dropWord16 38 ) where 39 40import Control.Exception (assert) 41import Data.Bits (shiftR) 42import Data.List (foldl1') 43import Data.Monoid as Mon (Monoid(..)) 44import Data.Semigroup (Semigroup(..)) 45import Data.Text () 46import Data.Text.Internal (Text(..)) 47import Data.Text.Internal.Encoding.Utf16 (chr2) 48import Data.Text.Internal.Unsafe.Char (unsafeChr) 49import Data.Text.Unsafe (Iter(..)) 50import Foreign.Storable (sizeOf) 51import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#) 52import GHC.ST (ST(..), runST) 53import Prelude hiding (length) 54import qualified Data.Text.Array as A 55 56-- If _cap is zero, this buffer is empty. 57data Buffer = Buf { 58 _arr :: {-# UNPACK #-} !A.Array 59 , _off :: {-# UNPACK #-} !Int 60 , _len :: {-# UNPACK #-} !Int 61 , _cap :: {-# UNPACK #-} !Int 62 , _gen :: {-# UNPACK #-} !Int 63 } 64 65instance Show Buffer where 66 showsPrec p = showsPrec p . unbuffer 67 68-- | The initial 'Buffer' has no mutable zone, so we can avoid all 69-- copies in the (hopefully) common case of no further input being fed 70-- to us. 71buffer :: Text -> Buffer 72buffer (Text arr off len) = Buf arr off len len 0 73 74unbuffer :: Buffer -> Text 75unbuffer (Buf arr off len _ _) = Text arr off len 76 77unbufferAt :: Int -> Buffer -> Text 78unbufferAt s (Buf arr off len _ _) = 79 assert (s >= 0 && s <= len) $ 80 Text arr (off+s) (len-s) 81 82instance Semigroup Buffer where 83 (Buf _ _ _ 0 _) <> b = b 84 a <> (Buf _ _ _ 0 _) = a 85 buf <> (Buf arr off len _ _) = append buf arr off len 86 {-# INLINE (<>) #-} 87 88instance Monoid Buffer where 89 mempty = Buf A.empty 0 0 0 0 90 {-# INLINE mempty #-} 91 92 mappend = (<>) 93 94 mconcat [] = Mon.mempty 95 mconcat xs = foldl1' (<>) xs 96 97pappend :: Buffer -> Text -> Buffer 98pappend (Buf _ _ _ 0 _) t = buffer t 99pappend buf (Text arr off len) = append buf arr off len 100 101append :: Buffer -> A.Array -> Int -> Int -> Buffer 102append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do 103 let woff = sizeOf (0::Int) `shiftR` 1 104 newlen = len0 + len1 105 !gen = if gen0 == 0 then 0 else readGen arr0 106 if gen == gen0 && newlen <= cap0 107 then do 108 let newgen = gen + 1 109 marr <- unsafeThaw arr0 110 writeGen marr newgen 111 A.copyI marr (off0+len0) arr1 off1 (off0+newlen) 112 arr2 <- A.unsafeFreeze marr 113 return (Buf arr2 off0 newlen cap0 newgen) 114 else do 115 let newcap = newlen * 2 116 newgen = 1 117 marr <- A.new (newcap + woff) 118 writeGen marr newgen 119 A.copyI marr woff arr0 off0 (woff+len0) 120 A.copyI marr (woff+len0) arr1 off1 (woff+newlen) 121 arr2 <- A.unsafeFreeze marr 122 return (Buf arr2 woff newlen newcap newgen) 123 124length :: Buffer -> Int 125length (Buf _ _ len _ _) = len 126{-# INLINE length #-} 127 128substring :: Int -> Int -> Buffer -> Text 129substring s l (Buf arr off len _ _) = 130 assert (s >= 0 && s <= len) . 131 assert (l >= 0 && l <= len-s) $ 132 Text arr (off+s) l 133{-# INLINE substring #-} 134 135dropWord16 :: Int -> Buffer -> Text 136dropWord16 s (Buf arr off len _ _) = 137 assert (s >= 0 && s <= len) $ 138 Text arr (off+s) (len-s) 139{-# INLINE dropWord16 #-} 140 141-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16 142-- array, returning the current character and the delta to add to give 143-- the next offset to iterate at. 144iter :: Buffer -> Int -> Iter 145iter (Buf arr off _ _ _) i 146 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1 147 | otherwise = Iter (chr2 m n) 2 148 where m = A.unsafeIndex arr j 149 n = A.unsafeIndex arr k 150 j = off + i 151 k = j + 1 152{-# INLINE iter #-} 153 154-- | /O(1)/ Iterate one step through a UTF-16 array, returning the 155-- delta to add to give the next offset to iterate at. 156iter_ :: Buffer -> Int -> Int 157iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1 158 | otherwise = 2 159 where m = A.unsafeIndex arr (off+i) 160{-# INLINE iter_ #-} 161 162unsafeThaw :: A.Array -> ST s (A.MArray s) 163unsafeThaw A.Array{..} = ST $ \s# -> 164 (# s#, A.MArray (unsafeCoerce# aBA) #) 165 166readGen :: A.Array -> Int 167readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r# 168 169writeGen :: A.MArray s -> Int -> ST s () 170writeGen a (I# gen#) = ST $ \s0# -> 171 case writeIntArray# (A.maBA a) 0# gen# s0# of 172 s1# -> (# s1#, () #) 173