1{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
2-- |
3-- Module      : Data.Text.Unsafe
4-- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan
5-- License     : BSD-style
6-- Maintainer  : bos@serpentine.com
7-- Portability : portable
8--
9-- A module containing unsafe 'Text' operations, for very very careful
10-- use in heavily tested code.
11module Data.Text.Unsafe
12    (
13      inlineInterleaveST
14    , inlinePerformIO
15    , unsafeDupablePerformIO
16    , Iter(..)
17    , iter
18    , iter_
19    , reverseIter
20    , reverseIter_
21    , unsafeHead
22    , unsafeTail
23    , lengthWord16
24    , takeWord16
25    , dropWord16
26    ) where
27
28#if defined(ASSERTS)
29import Control.Exception (assert)
30#endif
31import Data.Text.Internal.Encoding.Utf16 (chr2)
32import Data.Text.Internal (Text(..))
33import Data.Text.Internal.Unsafe (inlineInterleaveST, inlinePerformIO)
34import Data.Text.Internal.Unsafe.Char (unsafeChr)
35import qualified Data.Text.Array as A
36import GHC.IO (unsafeDupablePerformIO)
37
38-- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
39-- omits the check for the empty case, so there is an obligation on
40-- the programmer to provide a proof that the 'Text' is non-empty.
41unsafeHead :: Text -> Char
42unsafeHead (Text arr off _len)
43    | m < 0xD800 || m > 0xDBFF = unsafeChr m
44    | otherwise                = chr2 m n
45    where m = A.unsafeIndex arr off
46          n = A.unsafeIndex arr (off+1)
47{-# INLINE unsafeHead #-}
48
49-- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeTail'
50-- omits the check for the empty case, so there is an obligation on
51-- the programmer to provide a proof that the 'Text' is non-empty.
52unsafeTail :: Text -> Text
53unsafeTail t@(Text arr off len) =
54#if defined(ASSERTS)
55    assert (d <= len) $
56#endif
57    Text arr (off+d) (len-d)
58  where d = iter_ t 0
59{-# INLINE unsafeTail #-}
60
61data Iter = Iter {-# UNPACK #-} !Char {-# UNPACK #-} !Int
62
63-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
64-- array, returning the current character and the delta to add to give
65-- the next offset to iterate at.
66iter :: Text -> Int -> Iter
67iter (Text arr off _len) i
68    | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
69    | otherwise                = Iter (chr2 m n) 2
70  where m = A.unsafeIndex arr j
71        n = A.unsafeIndex arr k
72        j = off + i
73        k = j + 1
74{-# INLINE iter #-}
75
76-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
77-- delta to add to give the next offset to iterate at.
78iter_ :: Text -> Int -> Int
79iter_ (Text arr off _len) i | m < 0xD800 || m > 0xDBFF = 1
80                            | otherwise                = 2
81  where m = A.unsafeIndex arr (off+i)
82{-# INLINE iter_ #-}
83
84-- | /O(1)/ Iterate one step backwards through a UTF-16 array,
85-- returning the current character and the delta to add (i.e. a
86-- negative number) to give the next offset to iterate at.
87reverseIter :: Text -> Int -> (Char,Int)
88reverseIter (Text arr off _len) i
89    | m < 0xDC00 || m > 0xDFFF = (unsafeChr m, -1)
90    | otherwise                = (chr2 n m,    -2)
91  where m = A.unsafeIndex arr j
92        n = A.unsafeIndex arr k
93        j = off + i
94        k = j - 1
95{-# INLINE reverseIter #-}
96
97-- | /O(1)/ Iterate one step backwards through a UTF-16 array,
98-- returning the delta to add (i.e. a negative number) to give the
99-- next offset to iterate at.
100--
101-- @since 1.1.1.0
102reverseIter_ :: Text -> Int -> Int
103reverseIter_ (Text arr off _len) i
104    | m < 0xDC00 || m > 0xDFFF = -1
105    | otherwise                = -2
106  where m = A.unsafeIndex arr (off+i)
107{-# INLINE reverseIter_ #-}
108
109-- | /O(1)/ Return the length of a 'Text' in units of 'Word16'.  This
110-- is useful for sizing a target array appropriately before using
111-- 'unsafeCopyToPtr'.
112lengthWord16 :: Text -> Int
113lengthWord16 (Text _arr _off len) = len
114{-# INLINE lengthWord16 #-}
115
116-- | /O(1)/ Unchecked take of 'k' 'Word16's from the front of a 'Text'.
117takeWord16 :: Int -> Text -> Text
118takeWord16 k (Text arr off _len) = Text arr off k
119{-# INLINE takeWord16 #-}
120
121-- | /O(1)/ Unchecked drop of 'k' 'Word16's from the front of a 'Text'.
122dropWord16 :: Int -> Text -> Text
123dropWord16 k (Text arr off len) = Text arr (off+k) (len-k)
124{-# INLINE dropWord16 #-}
125