1{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-} 2-- | 3-- Module : Data.Text.Unsafe 4-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan 5-- License : BSD-style 6-- Maintainer : bos@serpentine.com 7-- Portability : portable 8-- 9-- A module containing unsafe 'Text' operations, for very very careful 10-- use in heavily tested code. 11module Data.Text.Unsafe 12 ( 13 inlineInterleaveST 14 , inlinePerformIO 15 , unsafeDupablePerformIO 16 , Iter(..) 17 , iter 18 , iter_ 19 , reverseIter 20 , reverseIter_ 21 , unsafeHead 22 , unsafeTail 23 , lengthWord16 24 , takeWord16 25 , dropWord16 26 ) where 27 28#if defined(ASSERTS) 29import Control.Exception (assert) 30#endif 31import Data.Text.Internal.Encoding.Utf16 (chr2) 32import Data.Text.Internal (Text(..)) 33import Data.Text.Internal.Unsafe (inlineInterleaveST, inlinePerformIO) 34import Data.Text.Internal.Unsafe.Char (unsafeChr) 35import qualified Data.Text.Array as A 36import GHC.IO (unsafeDupablePerformIO) 37 38-- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead' 39-- omits the check for the empty case, so there is an obligation on 40-- the programmer to provide a proof that the 'Text' is non-empty. 41unsafeHead :: Text -> Char 42unsafeHead (Text arr off _len) 43 | m < 0xD800 || m > 0xDBFF = unsafeChr m 44 | otherwise = chr2 m n 45 where m = A.unsafeIndex arr off 46 n = A.unsafeIndex arr (off+1) 47{-# INLINE unsafeHead #-} 48 49-- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeTail' 50-- omits the check for the empty case, so there is an obligation on 51-- the programmer to provide a proof that the 'Text' is non-empty. 52unsafeTail :: Text -> Text 53unsafeTail t@(Text arr off len) = 54#if defined(ASSERTS) 55 assert (d <= len) $ 56#endif 57 Text arr (off+d) (len-d) 58 where d = iter_ t 0 59{-# INLINE unsafeTail #-} 60 61data Iter = Iter {-# UNPACK #-} !Char {-# UNPACK #-} !Int 62 63-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16 64-- array, returning the current character and the delta to add to give 65-- the next offset to iterate at. 66iter :: Text -> Int -> Iter 67iter (Text arr off _len) i 68 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1 69 | otherwise = Iter (chr2 m n) 2 70 where m = A.unsafeIndex arr j 71 n = A.unsafeIndex arr k 72 j = off + i 73 k = j + 1 74{-# INLINE iter #-} 75 76-- | /O(1)/ Iterate one step through a UTF-16 array, returning the 77-- delta to add to give the next offset to iterate at. 78iter_ :: Text -> Int -> Int 79iter_ (Text arr off _len) i | m < 0xD800 || m > 0xDBFF = 1 80 | otherwise = 2 81 where m = A.unsafeIndex arr (off+i) 82{-# INLINE iter_ #-} 83 84-- | /O(1)/ Iterate one step backwards through a UTF-16 array, 85-- returning the current character and the delta to add (i.e. a 86-- negative number) to give the next offset to iterate at. 87reverseIter :: Text -> Int -> (Char,Int) 88reverseIter (Text arr off _len) i 89 | m < 0xDC00 || m > 0xDFFF = (unsafeChr m, -1) 90 | otherwise = (chr2 n m, -2) 91 where m = A.unsafeIndex arr j 92 n = A.unsafeIndex arr k 93 j = off + i 94 k = j - 1 95{-# INLINE reverseIter #-} 96 97-- | /O(1)/ Iterate one step backwards through a UTF-16 array, 98-- returning the delta to add (i.e. a negative number) to give the 99-- next offset to iterate at. 100-- 101-- @since 1.1.1.0 102reverseIter_ :: Text -> Int -> Int 103reverseIter_ (Text arr off _len) i 104 | m < 0xDC00 || m > 0xDFFF = -1 105 | otherwise = -2 106 where m = A.unsafeIndex arr (off+i) 107{-# INLINE reverseIter_ #-} 108 109-- | /O(1)/ Return the length of a 'Text' in units of 'Word16'. This 110-- is useful for sizing a target array appropriately before using 111-- 'unsafeCopyToPtr'. 112lengthWord16 :: Text -> Int 113lengthWord16 (Text _arr _off len) = len 114{-# INLINE lengthWord16 #-} 115 116-- | /O(1)/ Unchecked take of 'k' 'Word16's from the front of a 'Text'. 117takeWord16 :: Int -> Text -> Text 118takeWord16 k (Text arr off _len) = Text arr off k 119{-# INLINE takeWord16 #-} 120 121-- | /O(1)/ Unchecked drop of 'k' 'Word16's from the front of a 'Text'. 122dropWord16 :: Int -> Text -> Text 123dropWord16 k (Text arr off len) = Text arr (off+k) (len-k) 124{-# INLINE dropWord16 #-} 125