1{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
2    UnboxedTuples #-}
3
4-- |
5-- Module      :  Data.Attoparsec.Text.Buffer
6-- Copyright   :  Bryan O'Sullivan 2007-2015
7-- License     :  BSD3
8--
9-- Maintainer  :  bos@serpentine.com
10-- Stability   :  experimental
11-- Portability :  GHC
12--
13-- An immutable buffer that supports cheap appends.
14
15-- A Buffer is divided into an immutable read-only zone, followed by a
16-- mutable area that we've preallocated, but not yet written to.
17--
18-- We overallocate at the end of a Buffer so that we can cheaply
19-- append.  Since a user of an existing Buffer cannot see past the end
20-- of its immutable zone into the data that will change during an
21-- append, this is safe.
22--
23-- Once we run out of space at the end of a Buffer, we do the usual
24-- doubling of the buffer size.
25
26module Data.Attoparsec.Text.Buffer
27    (
28      Buffer
29    , buffer
30    , unbuffer
31    , unbufferAt
32    , length
33    , pappend
34    , iter
35    , iter_
36    , substring
37    , dropWord16
38    ) where
39
40import Control.Exception (assert)
41import Data.Bits (shiftR)
42import Data.List (foldl1')
43import Data.Monoid as Mon (Monoid(..))
44import Data.Semigroup (Semigroup(..))
45import Data.Text ()
46import Data.Text.Internal (Text(..))
47import Data.Text.Internal.Encoding.Utf16 (chr2)
48import Data.Text.Internal.Unsafe.Char (unsafeChr)
49import Data.Text.Unsafe (Iter(..))
50import Foreign.Storable (sizeOf)
51import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
52import GHC.ST (ST(..), runST)
53import Prelude hiding (length)
54import qualified Data.Text.Array as A
55
56-- If _cap is zero, this buffer is empty.
57data Buffer = Buf {
58      _arr :: {-# UNPACK #-} !A.Array
59    , _off :: {-# UNPACK #-} !Int
60    , _len :: {-# UNPACK #-} !Int
61    , _cap :: {-# UNPACK #-} !Int
62    , _gen :: {-# UNPACK #-} !Int
63    }
64
65instance Show Buffer where
66    showsPrec p = showsPrec p . unbuffer
67
68-- | The initial 'Buffer' has no mutable zone, so we can avoid all
69-- copies in the (hopefully) common case of no further input being fed
70-- to us.
71buffer :: Text -> Buffer
72buffer (Text arr off len) = Buf arr off len len 0
73
74unbuffer :: Buffer -> Text
75unbuffer (Buf arr off len _ _) = Text arr off len
76
77unbufferAt :: Int -> Buffer -> Text
78unbufferAt s (Buf arr off len _ _) =
79  assert (s >= 0 && s <= len) $
80  Text arr (off+s) (len-s)
81
82instance Semigroup Buffer where
83    (Buf _ _ _ 0 _) <> b                     = b
84    a               <> (Buf _ _ _ 0 _)       = a
85    buf             <> (Buf arr off len _ _) = append buf arr off len
86    {-# INLINE (<>) #-}
87
88instance Monoid Buffer where
89    mempty = Buf A.empty 0 0 0 0
90    {-# INLINE mempty #-}
91
92    mappend = (<>)
93
94    mconcat [] = Mon.mempty
95    mconcat xs = foldl1' (<>) xs
96
97pappend :: Buffer -> Text -> Buffer
98pappend (Buf _ _ _ 0 _) t      = buffer t
99pappend buf (Text arr off len) = append buf arr off len
100
101append :: Buffer -> A.Array -> Int -> Int -> Buffer
102append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
103  let woff    = sizeOf (0::Int) `shiftR` 1
104      newlen  = len0 + len1
105      !gen    = if gen0 == 0 then 0 else readGen arr0
106  if gen == gen0 && newlen <= cap0
107    then do
108      let newgen = gen + 1
109      marr <- unsafeThaw arr0
110      writeGen marr newgen
111      A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
112      arr2 <- A.unsafeFreeze marr
113      return (Buf arr2 off0 newlen cap0 newgen)
114    else do
115      let newcap = newlen * 2
116          newgen = 1
117      marr <- A.new (newcap + woff)
118      writeGen marr newgen
119      A.copyI marr woff arr0 off0 (woff+len0)
120      A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
121      arr2 <- A.unsafeFreeze marr
122      return (Buf arr2 woff newlen newcap newgen)
123
124length :: Buffer -> Int
125length (Buf _ _ len _ _) = len
126{-# INLINE length #-}
127
128substring :: Int -> Int -> Buffer -> Text
129substring s l (Buf arr off len _ _) =
130  assert (s >= 0 && s <= len) .
131  assert (l >= 0 && l <= len-s) $
132  Text arr (off+s) l
133{-# INLINE substring #-}
134
135dropWord16 :: Int -> Buffer -> Text
136dropWord16 s (Buf arr off len _ _) =
137  assert (s >= 0 && s <= len) $
138  Text arr (off+s) (len-s)
139{-# INLINE dropWord16 #-}
140
141-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
142-- array, returning the current character and the delta to add to give
143-- the next offset to iterate at.
144iter :: Buffer -> Int -> Iter
145iter (Buf arr off _ _ _) i
146    | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
147    | otherwise                = Iter (chr2 m n) 2
148  where m = A.unsafeIndex arr j
149        n = A.unsafeIndex arr k
150        j = off + i
151        k = j + 1
152{-# INLINE iter #-}
153
154-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
155-- delta to add to give the next offset to iterate at.
156iter_ :: Buffer -> Int -> Int
157iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
158                                | otherwise                = 2
159  where m = A.unsafeIndex arr (off+i)
160{-# INLINE iter_ #-}
161
162unsafeThaw :: A.Array -> ST s (A.MArray s)
163unsafeThaw A.Array{..} = ST $ \s# ->
164                          (# s#, A.MArray (unsafeCoerce# aBA) #)
165
166readGen :: A.Array -> Int
167readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
168
169writeGen :: A.MArray s -> Int -> ST s ()
170writeGen a (I# gen#) = ST $ \s0# ->
171  case writeIntArray# (A.maBA a) 0# gen# s0# of
172    s1# -> (# s1#, () #)
173