1{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples, TypeFamilies #-} 2{-# OPTIONS_GHC -fno-warn-orphans #-} 3#if __GLASGOW_HASKELL__ >= 702 4{-# LANGUAGE Trustworthy #-} 5#endif 6-- Using TemplateHaskell in text unconditionally is unacceptable, as 7-- it's a GHC boot library. TemplateHaskellQuotes was added in 8.0, so 8-- this would seem to be a problem. However, GHC's policy of only 9-- needing to be able to compile itself from the last few releases 10-- allows us to use full-fat TH on older versions, while using THQ for 11-- GHC versions that may be used for bootstrapping. 12#if __GLASGOW_HASKELL__ >= 800 13{-# LANGUAGE TemplateHaskellQuotes #-} 14#else 15{-# LANGUAGE TemplateHaskell #-} 16#endif 17 18-- | 19-- Module : Data.Text 20-- Copyright : (c) 2009, 2010, 2011, 2012 Bryan O'Sullivan, 21-- (c) 2009 Duncan Coutts, 22-- (c) 2008, 2009 Tom Harper 23-- 24-- License : BSD-style 25-- Maintainer : bos@serpentine.com 26-- Portability : GHC 27-- 28-- A time and space-efficient implementation of Unicode text. 29-- Suitable for performance critical use, both in terms of large data 30-- quantities and high speed. 31-- 32-- /Note/: Read below the synopsis for important notes on the use of 33-- this module. 34-- 35-- This module is intended to be imported @qualified@, to avoid name 36-- clashes with "Prelude" functions, e.g. 37-- 38-- > import qualified Data.Text as T 39-- 40-- To use an extended and very rich family of functions for working 41-- with Unicode text (including normalization, regular expressions, 42-- non-standard encodings, text breaking, and locales), see the 43-- <http://hackage.haskell.org/package/text-icu text-icu package >. 44-- 45 46module Data.Text 47 ( 48 -- * Strict vs lazy types 49 -- $strict 50 51 -- * Acceptable data 52 -- $replacement 53 54 -- * Definition of character 55 -- $character_definition 56 57 -- * Fusion 58 -- $fusion 59 60 -- * Types 61 Text 62 63 -- * Creation and elimination 64 , pack 65 , unpack 66 , singleton 67 , empty 68 69 -- * Basic interface 70 , cons 71 , snoc 72 , append 73 , uncons 74 , unsnoc 75 , head 76 , last 77 , tail 78 , init 79 , null 80 , length 81 , compareLength 82 83 -- * Transformations 84 , map 85 , intercalate 86 , intersperse 87 , transpose 88 , reverse 89 , replace 90 91 -- ** Case conversion 92 -- $case 93 , toCaseFold 94 , toLower 95 , toUpper 96 , toTitle 97 98 -- ** Justification 99 , justifyLeft 100 , justifyRight 101 , center 102 103 -- * Folds 104 , foldl 105 , foldl' 106 , foldl1 107 , foldl1' 108 , foldr 109 , foldr1 110 111 -- ** Special folds 112 , concat 113 , concatMap 114 , any 115 , all 116 , maximum 117 , minimum 118 119 -- * Construction 120 121 -- ** Scans 122 , scanl 123 , scanl1 124 , scanr 125 , scanr1 126 127 -- ** Accumulating maps 128 , mapAccumL 129 , mapAccumR 130 131 -- ** Generation and unfolding 132 , replicate 133 , unfoldr 134 , unfoldrN 135 136 -- * Substrings 137 138 -- ** Breaking strings 139 , take 140 , takeEnd 141 , drop 142 , dropEnd 143 , takeWhile 144 , takeWhileEnd 145 , dropWhile 146 , dropWhileEnd 147 , dropAround 148 , strip 149 , stripStart 150 , stripEnd 151 , splitAt 152 , breakOn 153 , breakOnEnd 154 , break 155 , span 156 , group 157 , groupBy 158 , inits 159 , tails 160 161 -- ** Breaking into many substrings 162 -- $split 163 , splitOn 164 , split 165 , chunksOf 166 167 -- ** Breaking into lines and words 168 , lines 169 --, lines' 170 , words 171 , unlines 172 , unwords 173 174 -- * Predicates 175 , isPrefixOf 176 , isSuffixOf 177 , isInfixOf 178 179 -- ** View patterns 180 , stripPrefix 181 , stripSuffix 182 , commonPrefixes 183 184 -- * Searching 185 , filter 186 , breakOnAll 187 , find 188 , partition 189 190 -- , findSubstring 191 192 -- * Indexing 193 -- $index 194 , index 195 , findIndex 196 , count 197 198 -- * Zipping 199 , zip 200 , zipWith 201 202 -- -* Ordered text 203 -- , sort 204 205 -- * Low level operations 206 , copy 207 , unpackCString# 208 ) where 209 210import Prelude (Char, Bool(..), Int, Maybe(..), String, 211 Eq(..), Ord(..), Ordering(..), (++), 212 Read(..), 213 (&&), (||), (+), (-), (.), ($), ($!), (>>), 214 not, return, otherwise, quot) 215import Control.DeepSeq (NFData(rnf)) 216#if defined(ASSERTS) 217import Control.Exception (assert) 218#endif 219import Data.Char (isSpace) 220import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex, 221 Constr, mkConstr, DataType, mkDataType, Fixity(Prefix)) 222import Control.Monad (foldM) 223import Control.Monad.ST (ST) 224import qualified Data.Text.Array as A 225import qualified Data.List as L 226import Data.Binary (Binary(get, put)) 227import Data.Monoid (Monoid(..)) 228#if MIN_VERSION_base(4,9,0) 229import Data.Semigroup (Semigroup(..)) 230#endif 231import Data.String (IsString(..)) 232import qualified Data.Text.Internal.Fusion as S 233import qualified Data.Text.Internal.Fusion.Common as S 234import Data.Text.Encoding (decodeUtf8', encodeUtf8) 235import Data.Text.Internal.Fusion (stream, reverseStream, unstream) 236import Data.Text.Internal.Private (span_) 237import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text) 238import Data.Text.Show (singleton, unpack, unpackCString#) 239import qualified Prelude as P 240import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord16, reverseIter, 241 reverseIter_, unsafeHead, unsafeTail) 242import Data.Text.Internal.Unsafe.Char (unsafeChr) 243import qualified Data.Text.Internal.Functions as F 244import qualified Data.Text.Internal.Encoding.Utf16 as U16 245import Data.Text.Internal.Search (indices) 246import Data.Text.Internal.Unsafe.Shift (UnsafeShift(..)) 247#if defined(__HADDOCK__) 248import Data.ByteString (ByteString) 249import qualified Data.Text.Lazy as L 250import Data.Int (Int64) 251#endif 252import GHC.Base (eqInt, neInt, gtInt, geInt, ltInt, leInt) 253#if MIN_VERSION_base(4,7,0) 254import qualified GHC.Exts as Exts 255#endif 256import qualified Language.Haskell.TH.Lib as TH 257import qualified Language.Haskell.TH.Syntax as TH 258#if MIN_VERSION_base(4,7,0) 259import Text.Printf (PrintfArg, formatArg, formatString) 260#endif 261 262-- $setup 263-- >>> import Data.Text 264-- >>> import qualified Data.Text as T 265-- >>> :seti -XOverloadedStrings 266 267-- $character_definition 268-- 269-- This package uses the term /character/ to denote Unicode /code points/. 270-- 271-- Note that this is not the same thing as a grapheme (e.g. a 272-- composition of code points that form one visual symbol). For 273-- instance, consider the grapheme \"ä\". This symbol has two 274-- Unicode representations: a single code-point representation 275-- @U+00E4@ (the @LATIN SMALL LETTER A WITH DIAERESIS@ code point), 276-- and a two code point representation @U+0061@ (the \"@A@\" code 277-- point) and @U+0308@ (the @COMBINING DIAERESIS@ code point). 278 279-- $strict 280-- 281-- This package provides both strict and lazy 'Text' types. The 282-- strict type is provided by the "Data.Text" module, while the lazy 283-- type is provided by the "Data.Text.Lazy" module. Internally, the 284-- lazy @Text@ type consists of a list of strict chunks. 285-- 286-- The strict 'Text' type requires that an entire string fit into 287-- memory at once. The lazy 'Data.Text.Lazy.Text' type is capable of 288-- streaming strings that are larger than memory using a small memory 289-- footprint. In many cases, the overhead of chunked streaming makes 290-- the lazy 'Data.Text.Lazy.Text' type slower than its strict 291-- counterpart, but this is not always the case. Sometimes, the time 292-- complexity of a function in one module may be different from the 293-- other, due to their differing internal structures. 294-- 295-- Each module provides an almost identical API, with the main 296-- difference being that the strict module uses 'Int' values for 297-- lengths and counts, while the lazy module uses 'Data.Int.Int64' 298-- lengths. 299 300-- $replacement 301-- 302-- A 'Text' value is a sequence of Unicode scalar values, as defined 303-- in 304-- <http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=35 §3.9, definition D76 of the Unicode 5.2 standard >. 305-- As such, a 'Text' cannot contain values in the range U+D800 to 306-- U+DFFF inclusive. Haskell implementations admit all Unicode code 307-- points 308-- (<http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=13 §3.4, definition D10 >) 309-- as 'Char' values, including code points from this invalid range. 310-- This means that there are some 'Char' values that are not valid 311-- Unicode scalar values, and the functions in this module must handle 312-- those cases. 313-- 314-- Within this module, many functions construct a 'Text' from one or 315-- more 'Char' values. Those functions will substitute 'Char' values 316-- that are not valid Unicode scalar values with the replacement 317-- character \"�\" (U+FFFD). Functions that perform this 318-- inspection and replacement are documented with the phrase 319-- \"Performs replacement on invalid scalar values\". 320-- 321-- (One reason for this policy of replacement is that internally, a 322-- 'Text' value is represented as packed UTF-16 data. Values in the 323-- range U+D800 through U+DFFF are used by UTF-16 to denote surrogate 324-- code points, and so cannot be represented. The functions replace 325-- invalid scalar values, instead of dropping them, as a security 326-- measure. For details, see 327-- <http://unicode.org/reports/tr36/#Deletion_of_Noncharacters Unicode Technical Report 36, §3.5 >.) 328 329-- $fusion 330-- 331-- Most of the functions in this module are subject to /fusion/, 332-- meaning that a pipeline of such functions will usually allocate at 333-- most one 'Text' value. 334-- 335-- As an example, consider the following pipeline: 336-- 337-- > import Data.Text as T 338-- > import Data.Text.Encoding as E 339-- > import Data.ByteString (ByteString) 340-- > 341-- > countChars :: ByteString -> Int 342-- > countChars = T.length . T.toUpper . E.decodeUtf8 343-- 344-- From the type signatures involved, this looks like it should 345-- allocate one 'Data.ByteString.ByteString' value, and two 'Text' 346-- values. However, when a module is compiled with optimisation 347-- enabled under GHC, the two intermediate 'Text' values will be 348-- optimised away, and the function will be compiled down to a single 349-- loop over the source 'Data.ByteString.ByteString'. 350-- 351-- Functions that can be fused by the compiler are documented with the 352-- phrase \"Subject to fusion\". 353 354instance Eq Text where 355 Text arrA offA lenA == Text arrB offB lenB 356 | lenA == lenB = A.equal arrA offA arrB offB lenA 357 | otherwise = False 358 {-# INLINE (==) #-} 359 360instance Ord Text where 361 compare = compareText 362 363instance Read Text where 364 readsPrec p str = [(pack x,y) | (x,y) <- readsPrec p str] 365 366#if MIN_VERSION_base(4,9,0) 367-- | Non-orphan 'Semigroup' instance only defined for 368-- @base-4.9.0.0@ and later; orphan instances for older GHCs are 369-- provided by 370-- the [semigroups](http://hackage.haskell.org/package/semigroups) 371-- package 372-- 373-- @since 1.2.2.0 374instance Semigroup Text where 375 (<>) = append 376#endif 377 378instance Monoid Text where 379 mempty = empty 380#if MIN_VERSION_base(4,9,0) 381 mappend = (<>) -- future-proof definition 382#else 383 mappend = append 384#endif 385 mconcat = concat 386 387instance IsString Text where 388 fromString = pack 389 390#if MIN_VERSION_base(4,7,0) 391-- | @since 1.2.0.0 392instance Exts.IsList Text where 393 type Item Text = Char 394 fromList = pack 395 toList = unpack 396#endif 397 398instance NFData Text where rnf !_ = () 399 400-- | @since 1.2.1.0 401instance Binary Text where 402 put t = put (encodeUtf8 t) 403 get = do 404 bs <- get 405 case decodeUtf8' bs of 406 P.Left exn -> P.fail (P.show exn) 407 P.Right a -> P.return a 408 409-- | This instance preserves data abstraction at the cost of inefficiency. 410-- We omit reflection services for the sake of data abstraction. 411-- 412-- This instance was created by copying the updated behavior of 413-- @"Data.Set".@'Data.Set.Set' and @"Data.Map".@'Data.Map.Map'. If you 414-- feel a mistake has been made, please feel free to submit 415-- improvements. 416-- 417-- The original discussion is archived here: 418-- <https://mail.haskell.org/pipermail/haskell-cafe/2010-January/072379.html could we get a Data instance for Data.Text.Text? > 419-- 420-- The followup discussion that changed the behavior of 'Data.Set.Set' 421-- and 'Data.Map.Map' is archived here: 422-- <http://markmail.org/message/trovdc6zkphyi3cr#query:+page:1+mid:a46der3iacwjcf6n+state:results Proposal: Allow gunfold for Data.Map, ... > 423 424instance Data Text where 425 gfoldl f z txt = z pack `f` (unpack txt) 426 toConstr _ = packConstr 427 gunfold k z c = case constrIndex c of 428 1 -> k (z pack) 429 _ -> P.error "gunfold" 430 dataTypeOf _ = textDataType 431 432-- | This instance has similar considerations to the 'Data' instance: 433-- it preserves abstraction at the cost of inefficiency. 434-- 435-- @since 1.2.4.0 436instance TH.Lift Text where 437 lift = TH.appE (TH.varE 'pack) . TH.stringE . unpack 438#if MIN_VERSION_template_haskell(2,17,0) 439 liftTyped = TH.unsafeCodeCoerce . TH.lift 440#elif MIN_VERSION_template_haskell(2,16,0) 441 liftTyped = TH.unsafeTExpCoerce . TH.lift 442#endif 443 444#if MIN_VERSION_base(4,7,0) 445-- | Only defined for @base-4.7.0.0@ and later 446-- 447-- @since 1.2.2.0 448instance PrintfArg Text where 449 formatArg txt = formatString $ unpack txt 450#endif 451 452packConstr :: Constr 453packConstr = mkConstr textDataType "pack" [] Prefix 454 455textDataType :: DataType 456textDataType = mkDataType "Data.Text.Text" [packConstr] 457 458-- | /O(n)/ Compare two 'Text' values lexicographically. 459compareText :: Text -> Text -> Ordering 460compareText ta@(Text _arrA _offA lenA) tb@(Text _arrB _offB lenB) 461 | lenA == 0 && lenB == 0 = EQ 462 | otherwise = go 0 0 463 where 464 go !i !j 465 | i >= lenA || j >= lenB = compare lenA lenB 466 | a < b = LT 467 | a > b = GT 468 | otherwise = go (i+di) (j+dj) 469 where Iter a di = iter ta i 470 Iter b dj = iter tb j 471 472-- ----------------------------------------------------------------------------- 473-- * Conversion to/from 'Text' 474 475-- | /O(n)/ Convert a 'String' into a 'Text'. Subject to 476-- fusion. Performs replacement on invalid scalar values. 477pack :: String -> Text 478pack = unstream . S.map safe . S.streamList 479{-# INLINE [1] pack #-} 480 481-- ----------------------------------------------------------------------------- 482-- * Basic functions 483 484-- | /O(n)/ Adds a character to the front of a 'Text'. This function 485-- is more costly than its 'List' counterpart because it requires 486-- copying a new array. Subject to fusion. Performs replacement on 487-- invalid scalar values. 488cons :: Char -> Text -> Text 489cons c t = unstream (S.cons (safe c) (stream t)) 490{-# INLINE cons #-} 491 492infixr 5 `cons` 493 494-- | /O(n)/ Adds a character to the end of a 'Text'. This copies the 495-- entire array in the process, unless fused. Subject to fusion. 496-- Performs replacement on invalid scalar values. 497snoc :: Text -> Char -> Text 498snoc t c = unstream (S.snoc (stream t) (safe c)) 499{-# INLINE snoc #-} 500 501-- | /O(n)/ Appends one 'Text' to the other by copying both of them 502-- into a new 'Text'. Subject to fusion. 503append :: Text -> Text -> Text 504append a@(Text arr1 off1 len1) b@(Text arr2 off2 len2) 505 | len1 == 0 = b 506 | len2 == 0 = a 507 | len > 0 = Text (A.run x) 0 len 508 | otherwise = overflowError "append" 509 where 510 len = len1+len2 511 x :: ST s (A.MArray s) 512 x = do 513 arr <- A.new len 514 A.copyI arr 0 arr1 off1 len1 515 A.copyI arr len1 arr2 off2 len 516 return arr 517{-# NOINLINE append #-} 518 519{-# RULES 520"TEXT append -> fused" [~1] forall t1 t2. 521 append t1 t2 = unstream (S.append (stream t1) (stream t2)) 522"TEXT append -> unfused" [1] forall t1 t2. 523 unstream (S.append (stream t1) (stream t2)) = append t1 t2 524 #-} 525 526-- | /O(1)/ Returns the first character of a 'Text', which must be 527-- non-empty. Subject to fusion. 528head :: Text -> Char 529head t = S.head (stream t) 530{-# INLINE head #-} 531 532-- | /O(1)/ Returns the first character and rest of a 'Text', or 533-- 'Nothing' if empty. Subject to fusion. 534uncons :: Text -> Maybe (Char, Text) 535uncons t@(Text arr off len) 536 | len <= 0 = Nothing 537 | otherwise = Just $ let !(Iter c d) = iter t 0 538 in (c, text arr (off+d) (len-d)) 539{-# INLINE [1] uncons #-} 540 541-- | Lifted from Control.Arrow and specialized. 542second :: (b -> c) -> (a,b) -> (a,c) 543second f (a, b) = (a, f b) 544 545-- | /O(1)/ Returns the last character of a 'Text', which must be 546-- non-empty. Subject to fusion. 547last :: Text -> Char 548last (Text arr off len) 549 | len <= 0 = emptyError "last" 550 | n < 0xDC00 || n > 0xDFFF = unsafeChr n 551 | otherwise = U16.chr2 n0 n 552 where n = A.unsafeIndex arr (off+len-1) 553 n0 = A.unsafeIndex arr (off+len-2) 554{-# INLINE [1] last #-} 555 556{-# RULES 557"TEXT last -> fused" [~1] forall t. 558 last t = S.last (stream t) 559"TEXT last -> unfused" [1] forall t. 560 S.last (stream t) = last t 561 #-} 562 563-- | /O(1)/ Returns all characters after the head of a 'Text', which 564-- must be non-empty. Subject to fusion. 565tail :: Text -> Text 566tail t@(Text arr off len) 567 | len <= 0 = emptyError "tail" 568 | otherwise = text arr (off+d) (len-d) 569 where d = iter_ t 0 570{-# INLINE [1] tail #-} 571 572{-# RULES 573"TEXT tail -> fused" [~1] forall t. 574 tail t = unstream (S.tail (stream t)) 575"TEXT tail -> unfused" [1] forall t. 576 unstream (S.tail (stream t)) = tail t 577 #-} 578 579-- | /O(1)/ Returns all but the last character of a 'Text', which must 580-- be non-empty. Subject to fusion. 581init :: Text -> Text 582init (Text arr off len) | len <= 0 = emptyError "init" 583 | n >= 0xDC00 && n <= 0xDFFF = text arr off (len-2) 584 | otherwise = text arr off (len-1) 585 where 586 n = A.unsafeIndex arr (off+len-1) 587{-# INLINE [1] init #-} 588 589{-# RULES 590"TEXT init -> fused" [~1] forall t. 591 init t = unstream (S.init (stream t)) 592"TEXT init -> unfused" [1] forall t. 593 unstream (S.init (stream t)) = init t 594 #-} 595 596-- | /O(1)/ Returns all but the last character and the last character of a 597-- 'Text', or 'Nothing' if empty. 598-- 599-- @since 1.2.3.0 600unsnoc :: Text -> Maybe (Text, Char) 601unsnoc (Text arr off len) 602 | len <= 0 = Nothing 603 | n < 0xDC00 || n > 0xDFFF = Just (text arr off (len-1), unsafeChr n) 604 | otherwise = Just (text arr off (len-2), U16.chr2 n0 n) 605 where n = A.unsafeIndex arr (off+len-1) 606 n0 = A.unsafeIndex arr (off+len-2) 607{-# INLINE [1] unsnoc #-} 608 609-- | /O(1)/ Tests whether a 'Text' is empty or not. Subject to 610-- fusion. 611null :: Text -> Bool 612null (Text _arr _off len) = 613#if defined(ASSERTS) 614 assert (len >= 0) $ 615#endif 616 len <= 0 617{-# INLINE [1] null #-} 618 619{-# RULES 620"TEXT null -> fused" [~1] forall t. 621 null t = S.null (stream t) 622"TEXT null -> unfused" [1] forall t. 623 S.null (stream t) = null t 624 #-} 625 626-- | /O(1)/ Tests whether a 'Text' contains exactly one character. 627-- Subject to fusion. 628isSingleton :: Text -> Bool 629isSingleton = S.isSingleton . stream 630{-# INLINE isSingleton #-} 631 632-- | /O(n)/ Returns the number of characters in a 'Text'. 633-- Subject to fusion. 634length :: Text -> Int 635length t = S.length (stream t) 636{-# INLINE [1] length #-} 637-- length needs to be phased after the compareN/length rules otherwise 638-- it may inline before the rules have an opportunity to fire. 639 640-- | /O(n)/ Compare the count of characters in a 'Text' to a number. 641-- Subject to fusion. 642-- 643-- This function gives the same answer as comparing against the result 644-- of 'length', but can short circuit if the count of characters is 645-- greater than the number, and hence be more efficient. 646compareLength :: Text -> Int -> Ordering 647compareLength t n = S.compareLengthI (stream t) n 648{-# INLINE [1] compareLength #-} 649 650{-# RULES 651"TEXT compareN/length -> compareLength" [~1] forall t n. 652 compare (length t) n = compareLength t n 653 #-} 654 655{-# RULES 656"TEXT ==N/length -> compareLength/==EQ" [~1] forall t n. 657 eqInt (length t) n = compareLength t n == EQ 658 #-} 659 660{-# RULES 661"TEXT /=N/length -> compareLength//=EQ" [~1] forall t n. 662 neInt (length t) n = compareLength t n /= EQ 663 #-} 664 665{-# RULES 666"TEXT <N/length -> compareLength/==LT" [~1] forall t n. 667 ltInt (length t) n = compareLength t n == LT 668 #-} 669 670{-# RULES 671"TEXT <=N/length -> compareLength//=GT" [~1] forall t n. 672 leInt (length t) n = compareLength t n /= GT 673 #-} 674 675{-# RULES 676"TEXT >N/length -> compareLength/==GT" [~1] forall t n. 677 gtInt (length t) n = compareLength t n == GT 678 #-} 679 680{-# RULES 681"TEXT >=N/length -> compareLength//=LT" [~1] forall t n. 682 geInt (length t) n = compareLength t n /= LT 683 #-} 684 685-- ----------------------------------------------------------------------------- 686-- * Transformations 687-- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to 688-- each element of @t@. 689-- 690-- Example: 691-- 692-- >>> let message = pack "I am not angry. Not at all." 693-- >>> T.map (\c -> if c == '.' then '!' else c) message 694-- "I am not angry! Not at all!" 695-- 696-- Subject to fusion. Performs replacement on invalid scalar values. 697map :: (Char -> Char) -> Text -> Text 698map f t = unstream (S.map (safe . f) (stream t)) 699{-# INLINE [1] map #-} 700 701-- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of 702-- 'Text's and concatenates the list after interspersing the first 703-- argument between each element of the list. 704-- 705-- Example: 706-- 707-- >>> T.intercalate "NI!" ["We", "seek", "the", "Holy", "Grail"] 708-- "WeNI!seekNI!theNI!HolyNI!Grail" 709intercalate :: Text -> [Text] -> Text 710intercalate t = concat . (F.intersperse t) 711{-# INLINE intercalate #-} 712 713-- | /O(n)/ The 'intersperse' function takes a character and places it 714-- between the characters of a 'Text'. 715-- 716-- Example: 717-- 718-- >>> T.intersperse '.' "SHIELD" 719-- "S.H.I.E.L.D" 720-- 721-- Subject to fusion. Performs replacement on invalid scalar values. 722intersperse :: Char -> Text -> Text 723intersperse c t = unstream (S.intersperse (safe c) (stream t)) 724{-# INLINE intersperse #-} 725 726-- | /O(n)/ Reverse the characters of a string. 727-- 728-- Example: 729-- 730-- >>> T.reverse "desrever" 731-- "reversed" 732-- 733-- Subject to fusion (fuses with its argument). 734reverse :: Text -> Text 735reverse t = S.reverse (stream t) 736{-# INLINE reverse #-} 737 738-- | /O(m+n)/ Replace every non-overlapping occurrence of @needle@ in 739-- @haystack@ with @replacement@. 740-- 741-- This function behaves as though it was defined as follows: 742-- 743-- @ 744-- replace needle replacement haystack = 745-- 'intercalate' replacement ('splitOn' needle haystack) 746-- @ 747-- 748-- As this suggests, each occurrence is replaced exactly once. So if 749-- @needle@ occurs in @replacement@, that occurrence will /not/ itself 750-- be replaced recursively: 751-- 752-- >>> replace "oo" "foo" "oo" 753-- "foo" 754-- 755-- In cases where several instances of @needle@ overlap, only the 756-- first one will be replaced: 757-- 758-- >>> replace "ofo" "bar" "ofofo" 759-- "barfo" 760-- 761-- In (unlikely) bad cases, this function's time complexity degrades 762-- towards /O(n*m)/. 763replace :: Text 764 -- ^ @needle@ to search for. If this string is empty, an 765 -- error will occur. 766 -> Text 767 -- ^ @replacement@ to replace @needle@ with. 768 -> Text 769 -- ^ @haystack@ in which to search. 770 -> Text 771replace needle@(Text _ _ neeLen) 772 (Text repArr repOff repLen) 773 haystack@(Text hayArr hayOff hayLen) 774 | neeLen == 0 = emptyError "replace" 775 | L.null ixs = haystack 776 | len > 0 = Text (A.run x) 0 len 777 | otherwise = empty 778 where 779 ixs = indices needle haystack 780 len = hayLen - (neeLen - repLen) `mul` L.length ixs 781 x :: ST s (A.MArray s) 782 x = do 783 marr <- A.new len 784 let loop (i:is) o d = do 785 let d0 = d + i - o 786 d1 = d0 + repLen 787 A.copyI marr d hayArr (hayOff+o) d0 788 A.copyI marr d0 repArr repOff d1 789 loop is (i + neeLen) d1 790 loop [] o d = A.copyI marr d hayArr (hayOff+o) len 791 loop ixs 0 0 792 return marr 793 794-- ---------------------------------------------------------------------------- 795-- ** Case conversions (folds) 796 797-- $case 798-- 799-- When case converting 'Text' values, do not use combinators like 800-- @map toUpper@ to case convert each character of a string 801-- individually, as this gives incorrect results according to the 802-- rules of some writing systems. The whole-string case conversion 803-- functions from this module, such as @toUpper@, obey the correct 804-- case conversion rules. As a result, these functions may map one 805-- input character to two or three output characters. For examples, 806-- see the documentation of each function. 807-- 808-- /Note/: In some languages, case conversion is a locale- and 809-- context-dependent operation. The case conversion functions in this 810-- module are /not/ locale sensitive. Programs that require locale 811-- sensitivity should use appropriate versions of the 812-- <http://hackage.haskell.org/package/text-icu-0.6.3.7/docs/Data-Text-ICU.html#g:4 case mapping functions from the text-icu package >. 813 814-- | /O(n)/ Convert a string to folded case. Subject to fusion. 815-- 816-- This function is mainly useful for performing caseless (also known 817-- as case insensitive) string comparisons. 818-- 819-- A string @x@ is a caseless match for a string @y@ if and only if: 820-- 821-- @toCaseFold x == toCaseFold y@ 822-- 823-- The result string may be longer than the input string, and may 824-- differ from applying 'toLower' to the input string. For instance, 825-- the Armenian small ligature \"ﬓ\" (men now, U+FB13) is case 826-- folded to the sequence \"մ\" (men, U+0574) followed by 827-- \"ն\" (now, U+0576), while the Greek \"µ\" (micro sign, 828-- U+00B5) is case folded to \"μ\" (small letter mu, U+03BC) 829-- instead of itself. 830toCaseFold :: Text -> Text 831toCaseFold t = unstream (S.toCaseFold (stream t)) 832{-# INLINE toCaseFold #-} 833 834-- | /O(n)/ Convert a string to lower case, using simple case 835-- conversion. Subject to fusion. 836-- 837-- The result string may be longer than the input string. For 838-- instance, \"İ\" (Latin capital letter I with dot above, 839-- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069) 840-- followed by \" ̇\" (combining dot above, U+0307). 841toLower :: Text -> Text 842toLower t = unstream (S.toLower (stream t)) 843{-# INLINE toLower #-} 844 845-- | /O(n)/ Convert a string to upper case, using simple case 846-- conversion. Subject to fusion. 847-- 848-- The result string may be longer than the input string. For 849-- instance, the German \"ß\" (eszett, U+00DF) maps to the 850-- two-letter sequence \"SS\". 851toUpper :: Text -> Text 852toUpper t = unstream (S.toUpper (stream t)) 853{-# INLINE toUpper #-} 854 855-- | /O(n)/ Convert a string to title case, using simple case 856-- conversion. Subject to fusion. 857-- 858-- The first letter of the input is converted to title case, as is 859-- every subsequent letter that immediately follows a non-letter. 860-- Every letter that immediately follows another letter is converted 861-- to lower case. 862-- 863-- The result string may be longer than the input string. For example, 864-- the Latin small ligature fl (U+FB02) is converted to the 865-- sequence Latin capital letter F (U+0046) followed by Latin small 866-- letter l (U+006C). 867-- 868-- /Note/: this function does not take language or culture specific 869-- rules into account. For instance, in English, different style 870-- guides disagree on whether the book name \"The Hill of the Red 871-- Fox\" is correctly title cased—but this function will 872-- capitalize /every/ word. 873-- 874-- @since 1.0.0.0 875toTitle :: Text -> Text 876toTitle t = unstream (S.toTitle (stream t)) 877{-# INLINE toTitle #-} 878 879-- | /O(n)/ Left-justify a string to the given length, using the 880-- specified fill character on the right. Subject to fusion. 881-- Performs replacement on invalid scalar values. 882-- 883-- Examples: 884-- 885-- >>> justifyLeft 7 'x' "foo" 886-- "fooxxxx" 887-- 888-- >>> justifyLeft 3 'x' "foobar" 889-- "foobar" 890justifyLeft :: Int -> Char -> Text -> Text 891justifyLeft k c t 892 | len >= k = t 893 | otherwise = t `append` replicateChar (k-len) c 894 where len = length t 895{-# INLINE [1] justifyLeft #-} 896 897{-# RULES 898"TEXT justifyLeft -> fused" [~1] forall k c t. 899 justifyLeft k c t = unstream (S.justifyLeftI k c (stream t)) 900"TEXT justifyLeft -> unfused" [1] forall k c t. 901 unstream (S.justifyLeftI k c (stream t)) = justifyLeft k c t 902 #-} 903 904-- | /O(n)/ Right-justify a string to the given length, using the 905-- specified fill character on the left. Performs replacement on 906-- invalid scalar values. 907-- 908-- Examples: 909-- 910-- >>> justifyRight 7 'x' "bar" 911-- "xxxxbar" 912-- 913-- >>> justifyRight 3 'x' "foobar" 914-- "foobar" 915justifyRight :: Int -> Char -> Text -> Text 916justifyRight k c t 917 | len >= k = t 918 | otherwise = replicateChar (k-len) c `append` t 919 where len = length t 920{-# INLINE justifyRight #-} 921 922-- | /O(n)/ Center a string to the given length, using the specified 923-- fill character on either side. Performs replacement on invalid 924-- scalar values. 925-- 926-- Examples: 927-- 928-- >>> center 8 'x' "HS" 929-- "xxxHSxxx" 930center :: Int -> Char -> Text -> Text 931center k c t 932 | len >= k = t 933 | otherwise = replicateChar l c `append` t `append` replicateChar r c 934 where len = length t 935 d = k - len 936 r = d `quot` 2 937 l = d - r 938{-# INLINE center #-} 939 940-- | /O(n)/ The 'transpose' function transposes the rows and columns 941-- of its 'Text' argument. Note that this function uses 'pack', 942-- 'unpack', and the list version of transpose, and is thus not very 943-- efficient. 944-- 945-- Examples: 946-- 947-- >>> transpose ["green","orange"] 948-- ["go","rr","ea","en","ng","e"] 949-- 950-- >>> transpose ["blue","red"] 951-- ["br","le","ud","e"] 952transpose :: [Text] -> [Text] 953transpose ts = P.map pack (L.transpose (P.map unpack ts)) 954 955-- ----------------------------------------------------------------------------- 956-- * Reducing 'Text's (folds) 957 958-- | /O(n)/ 'foldl', applied to a binary operator, a starting value 959-- (typically the left-identity of the operator), and a 'Text', 960-- reduces the 'Text' using the binary operator, from left to right. 961-- Subject to fusion. 962foldl :: (a -> Char -> a) -> a -> Text -> a 963foldl f z t = S.foldl f z (stream t) 964{-# INLINE foldl #-} 965 966-- | /O(n)/ A strict version of 'foldl'. Subject to fusion. 967foldl' :: (a -> Char -> a) -> a -> Text -> a 968foldl' f z t = S.foldl' f z (stream t) 969{-# INLINE foldl' #-} 970 971-- | /O(n)/ A variant of 'foldl' that has no starting value argument, 972-- and thus must be applied to a non-empty 'Text'. Subject to fusion. 973foldl1 :: (Char -> Char -> Char) -> Text -> Char 974foldl1 f t = S.foldl1 f (stream t) 975{-# INLINE foldl1 #-} 976 977-- | /O(n)/ A strict version of 'foldl1'. Subject to fusion. 978foldl1' :: (Char -> Char -> Char) -> Text -> Char 979foldl1' f t = S.foldl1' f (stream t) 980{-# INLINE foldl1' #-} 981 982-- | /O(n)/ 'foldr', applied to a binary operator, a starting value 983-- (typically the right-identity of the operator), and a 'Text', 984-- reduces the 'Text' using the binary operator, from right to left. 985-- Subject to fusion. 986foldr :: (Char -> a -> a) -> a -> Text -> a 987foldr f z t = S.foldr f z (stream t) 988{-# INLINE foldr #-} 989 990-- | /O(n)/ A variant of 'foldr' that has no starting value argument, 991-- and thus must be applied to a non-empty 'Text'. Subject to 992-- fusion. 993foldr1 :: (Char -> Char -> Char) -> Text -> Char 994foldr1 f t = S.foldr1 f (stream t) 995{-# INLINE foldr1 #-} 996 997-- ----------------------------------------------------------------------------- 998-- ** Special folds 999 1000-- | /O(n)/ Concatenate a list of 'Text's. 1001concat :: [Text] -> Text 1002concat ts = case ts' of 1003 [] -> empty 1004 [t] -> t 1005 _ -> Text (A.run go) 0 len 1006 where 1007 ts' = L.filter (not . null) ts 1008 len = sumP "concat" $ L.map lengthWord16 ts' 1009 go :: ST s (A.MArray s) 1010 go = do 1011 arr <- A.new len 1012 let step i (Text a o l) = 1013 let !j = i + l in A.copyI arr i a o j >> return j 1014 foldM step 0 ts' >> return arr 1015 1016-- | /O(n)/ Map a function over a 'Text' that results in a 'Text', and 1017-- concatenate the results. 1018concatMap :: (Char -> Text) -> Text -> Text 1019concatMap f = concat . foldr ((:) . f) [] 1020{-# INLINE concatMap #-} 1021 1022-- | /O(n)/ 'any' @p@ @t@ determines whether any character in the 1023-- 'Text' @t@ satisfies the predicate @p@. Subject to fusion. 1024any :: (Char -> Bool) -> Text -> Bool 1025any p t = S.any p (stream t) 1026{-# INLINE any #-} 1027 1028-- | /O(n)/ 'all' @p@ @t@ determines whether all characters in the 1029-- 'Text' @t@ satisfy the predicate @p@. Subject to fusion. 1030all :: (Char -> Bool) -> Text -> Bool 1031all p t = S.all p (stream t) 1032{-# INLINE all #-} 1033 1034-- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which 1035-- must be non-empty. Subject to fusion. 1036maximum :: Text -> Char 1037maximum t = S.maximum (stream t) 1038{-# INLINE maximum #-} 1039 1040-- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which 1041-- must be non-empty. Subject to fusion. 1042minimum :: Text -> Char 1043minimum t = S.minimum (stream t) 1044{-# INLINE minimum #-} 1045 1046-- ----------------------------------------------------------------------------- 1047-- * Building 'Text's 1048 1049-- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of 1050-- successive reduced values from the left. Subject to fusion. 1051-- Performs replacement on invalid scalar values. 1052-- 1053-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 1054-- 1055-- Note that 1056-- 1057-- > last (scanl f z xs) == foldl f z xs. 1058scanl :: (Char -> Char -> Char) -> Char -> Text -> Text 1059scanl f z t = unstream (S.scanl g z (stream t)) 1060 where g a b = safe (f a b) 1061{-# INLINE scanl #-} 1062 1063-- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting 1064-- value argument. Performs replacement on invalid scalar values. 1065-- 1066-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] 1067scanl1 :: (Char -> Char -> Char) -> Text -> Text 1068scanl1 f t | null t = empty 1069 | otherwise = scanl f (unsafeHead t) (unsafeTail t) 1070{-# INLINE scanl1 #-} 1071 1072-- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'. Performs 1073-- replacement on invalid scalar values. 1074-- 1075-- > scanr f v == reverse . scanl (flip f) v . reverse 1076scanr :: (Char -> Char -> Char) -> Char -> Text -> Text 1077scanr f z = S.reverse . S.reverseScanr g z . reverseStream 1078 where g a b = safe (f a b) 1079{-# INLINE scanr #-} 1080 1081-- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting 1082-- value argument. Performs replacement on invalid scalar values. 1083scanr1 :: (Char -> Char -> Char) -> Text -> Text 1084scanr1 f t | null t = empty 1085 | otherwise = scanr f (last t) (init t) 1086{-# INLINE scanr1 #-} 1087 1088-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a 1089-- function to each element of a 'Text', passing an accumulating 1090-- parameter from left to right, and returns a final 'Text'. Performs 1091-- replacement on invalid scalar values. 1092mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text) 1093mapAccumL f z0 = S.mapAccumL g z0 . stream 1094 where g a b = second safe (f a b) 1095{-# INLINE mapAccumL #-} 1096 1097-- | The 'mapAccumR' function behaves like a combination of 'map' and 1098-- a strict 'foldr'; it applies a function to each element of a 1099-- 'Text', passing an accumulating parameter from right to left, and 1100-- returning a final value of this accumulator together with the new 1101-- 'Text'. 1102-- Performs replacement on invalid scalar values. 1103mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text) 1104mapAccumR f z0 = second reverse . S.mapAccumL g z0 . reverseStream 1105 where g a b = second safe (f a b) 1106{-# INLINE mapAccumR #-} 1107 1108-- ----------------------------------------------------------------------------- 1109-- ** Generating and unfolding 'Text's 1110 1111-- | /O(n*m)/ 'replicate' @n@ @t@ is a 'Text' consisting of the input 1112-- @t@ repeated @n@ times. 1113replicate :: Int -> Text -> Text 1114replicate n t@(Text a o l) 1115 | n <= 0 || l <= 0 = empty 1116 | n == 1 = t 1117 | isSingleton t = replicateChar n (unsafeHead t) 1118 | otherwise = Text (A.run x) 0 len 1119 where 1120 len = l `mul` n -- TODO: detect overflows 1121 x :: ST s (A.MArray s) 1122 x = do 1123 arr <- A.new len 1124 A.copyI arr 0 a o l 1125 let loop !l1 = 1126 let rest = len - l1 in 1127 if rest <= l1 then A.copyM arr l1 arr 0 rest >> return arr 1128 else A.copyM arr l1 arr 0 l1 >> loop (l1 `shiftL` 1) 1129 loop l 1130{-# INLINE [1] replicate #-} 1131 1132 1133{-# RULES 1134"TEXT replicate/singleton -> replicateChar" [~1] forall n c. 1135 replicate n (singleton c) = replicateChar n c 1136 #-} 1137 1138-- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the 1139-- value of every element. Subject to fusion. 1140replicateChar :: Int -> Char -> Text 1141replicateChar n c = unstream (S.replicateCharI n (safe c)) 1142{-# INLINE replicateChar #-} 1143 1144-- | /O(n)/, where @n@ is the length of the result. The 'unfoldr' 1145-- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a 1146-- 'Text' from a seed value. The function takes the element and 1147-- returns 'Nothing' if it is done producing the 'Text', otherwise 1148-- 'Just' @(a,b)@. In this case, @a@ is the next 'Char' in the 1149-- string, and @b@ is the seed value for further production. Subject 1150-- to fusion. Performs replacement on invalid scalar values. 1151unfoldr :: (a -> Maybe (Char,a)) -> a -> Text 1152unfoldr f s = unstream (S.unfoldr (firstf safe . f) s) 1153{-# INLINE unfoldr #-} 1154 1155-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed 1156-- value. However, the length of the result should be limited by the 1157-- first argument to 'unfoldrN'. This function is more efficient than 1158-- 'unfoldr' when the maximum length of the result is known and 1159-- correct, otherwise its performance is similar to 'unfoldr'. Subject 1160-- to fusion. Performs replacement on invalid scalar values. 1161unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Text 1162unfoldrN n f s = unstream (S.unfoldrN n (firstf safe . f) s) 1163{-# INLINE unfoldrN #-} 1164 1165-- ----------------------------------------------------------------------------- 1166-- * Substrings 1167 1168-- | /O(n)/ 'take' @n@, applied to a 'Text', returns the prefix of the 1169-- 'Text' of length @n@, or the 'Text' itself if @n@ is greater than 1170-- the length of the Text. Subject to fusion. 1171take :: Int -> Text -> Text 1172take n t@(Text arr off len) 1173 | n <= 0 = empty 1174 | n >= len = t 1175 | otherwise = text arr off (iterN n t) 1176{-# INLINE [1] take #-} 1177 1178iterN :: Int -> Text -> Int 1179iterN n t@(Text _arr _off len) = loop 0 0 1180 where loop !i !cnt 1181 | i >= len || cnt >= n = i 1182 | otherwise = loop (i+d) (cnt+1) 1183 where d = iter_ t i 1184 1185{-# RULES 1186"TEXT take -> fused" [~1] forall n t. 1187 take n t = unstream (S.take n (stream t)) 1188"TEXT take -> unfused" [1] forall n t. 1189 unstream (S.take n (stream t)) = take n t 1190 #-} 1191 1192-- | /O(n)/ 'takeEnd' @n@ @t@ returns the suffix remaining after 1193-- taking @n@ characters from the end of @t@. 1194-- 1195-- Examples: 1196-- 1197-- >>> takeEnd 3 "foobar" 1198-- "bar" 1199-- 1200-- @since 1.1.1.0 1201takeEnd :: Int -> Text -> Text 1202takeEnd n t@(Text arr off len) 1203 | n <= 0 = empty 1204 | n >= len = t 1205 | otherwise = text arr (off+i) (len-i) 1206 where i = iterNEnd n t 1207 1208iterNEnd :: Int -> Text -> Int 1209iterNEnd n t@(Text _arr _off len) = loop (len-1) n 1210 where loop i !m 1211 | m <= 0 = i+1 1212 | i <= 0 = 0 1213 | otherwise = loop (i+d) (m-1) 1214 where d = reverseIter_ t i 1215 1216-- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the 1217-- 'Text' after the first @n@ characters, or the empty 'Text' if @n@ 1218-- is greater than the length of the 'Text'. Subject to fusion. 1219drop :: Int -> Text -> Text 1220drop n t@(Text arr off len) 1221 | n <= 0 = t 1222 | n >= len = empty 1223 | otherwise = text arr (off+i) (len-i) 1224 where i = iterN n t 1225{-# INLINE [1] drop #-} 1226 1227{-# RULES 1228"TEXT drop -> fused" [~1] forall n t. 1229 drop n t = unstream (S.drop n (stream t)) 1230"TEXT drop -> unfused" [1] forall n t. 1231 unstream (S.drop n (stream t)) = drop n t 1232"TEXT take . drop -> unfused" [1] forall len off t. 1233 unstream (S.take len (S.drop off (stream t))) = take len (drop off t) 1234 #-} 1235 1236-- | /O(n)/ 'dropEnd' @n@ @t@ returns the prefix remaining after 1237-- dropping @n@ characters from the end of @t@. 1238-- 1239-- Examples: 1240-- 1241-- >>> dropEnd 3 "foobar" 1242-- "foo" 1243-- 1244-- @since 1.1.1.0 1245dropEnd :: Int -> Text -> Text 1246dropEnd n t@(Text arr off len) 1247 | n <= 0 = t 1248 | n >= len = empty 1249 | otherwise = text arr off (iterNEnd n t) 1250 1251-- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text', 1252-- returns the longest prefix (possibly empty) of elements that 1253-- satisfy @p@. Subject to fusion. 1254takeWhile :: (Char -> Bool) -> Text -> Text 1255takeWhile p t@(Text arr off len) = loop 0 1256 where loop !i | i >= len = t 1257 | p c = loop (i+d) 1258 | otherwise = text arr off i 1259 where Iter c d = iter t i 1260{-# INLINE [1] takeWhile #-} 1261 1262{-# RULES 1263"TEXT takeWhile -> fused" [~1] forall p t. 1264 takeWhile p t = unstream (S.takeWhile p (stream t)) 1265"TEXT takeWhile -> unfused" [1] forall p t. 1266 unstream (S.takeWhile p (stream t)) = takeWhile p t 1267 #-} 1268 1269-- | /O(n)/ 'takeWhileEnd', applied to a predicate @p@ and a 'Text', 1270-- returns the longest suffix (possibly empty) of elements that 1271-- satisfy @p@. 1272-- Examples: 1273-- 1274-- >>> takeWhileEnd (=='o') "foo" 1275-- "oo" 1276-- 1277-- @since 1.2.2.0 1278takeWhileEnd :: (Char -> Bool) -> Text -> Text 1279takeWhileEnd p t@(Text arr off len) = loop (len-1) len 1280 where loop !i !l | l <= 0 = t 1281 | p c = loop (i+d) (l+d) 1282 | otherwise = text arr (off+l) (len-l) 1283 where (c,d) = reverseIter t i 1284{-# INLINE [1] takeWhileEnd #-} 1285 1286-- | /O(n)/ 'dropWhile' @p@ @t@ returns the suffix remaining after 1287-- 'takeWhile' @p@ @t@. Subject to fusion. 1288dropWhile :: (Char -> Bool) -> Text -> Text 1289dropWhile p t@(Text arr off len) = loop 0 0 1290 where loop !i !l | l >= len = empty 1291 | p c = loop (i+d) (l+d) 1292 | otherwise = Text arr (off+i) (len-l) 1293 where Iter c d = iter t i 1294{-# INLINE [1] dropWhile #-} 1295 1296{-# RULES 1297"TEXT dropWhile -> fused" [~1] forall p t. 1298 dropWhile p t = unstream (S.dropWhile p (stream t)) 1299"TEXT dropWhile -> unfused" [1] forall p t. 1300 unstream (S.dropWhile p (stream t)) = dropWhile p t 1301 #-} 1302 1303-- | /O(n)/ 'dropWhileEnd' @p@ @t@ returns the prefix remaining after 1304-- dropping characters that satisfy the predicate @p@ from the end of 1305-- @t@. 1306-- 1307-- Examples: 1308-- 1309-- >>> dropWhileEnd (=='.') "foo..." 1310-- "foo" 1311dropWhileEnd :: (Char -> Bool) -> Text -> Text 1312dropWhileEnd p t@(Text arr off len) = loop (len-1) len 1313 where loop !i !l | l <= 0 = empty 1314 | p c = loop (i+d) (l+d) 1315 | otherwise = Text arr off l 1316 where (c,d) = reverseIter t i 1317{-# INLINE [1] dropWhileEnd #-} 1318 1319-- | /O(n)/ 'dropAround' @p@ @t@ returns the substring remaining after 1320-- dropping characters that satisfy the predicate @p@ from both the 1321-- beginning and end of @t@. Subject to fusion. 1322dropAround :: (Char -> Bool) -> Text -> Text 1323dropAround p = dropWhile p . dropWhileEnd p 1324{-# INLINE [1] dropAround #-} 1325 1326-- | /O(n)/ Remove leading white space from a string. Equivalent to: 1327-- 1328-- > dropWhile isSpace 1329stripStart :: Text -> Text 1330stripStart = dropWhile isSpace 1331{-# INLINE stripStart #-} 1332 1333-- | /O(n)/ Remove trailing white space from a string. Equivalent to: 1334-- 1335-- > dropWhileEnd isSpace 1336stripEnd :: Text -> Text 1337stripEnd = dropWhileEnd isSpace 1338{-# INLINE [1] stripEnd #-} 1339 1340-- | /O(n)/ Remove leading and trailing white space from a string. 1341-- Equivalent to: 1342-- 1343-- > dropAround isSpace 1344strip :: Text -> Text 1345strip = dropAround isSpace 1346{-# INLINE [1] strip #-} 1347 1348-- | /O(n)/ 'splitAt' @n t@ returns a pair whose first element is a 1349-- prefix of @t@ of length @n@, and whose second is the remainder of 1350-- the string. It is equivalent to @('take' n t, 'drop' n t)@. 1351splitAt :: Int -> Text -> (Text, Text) 1352splitAt n t@(Text arr off len) 1353 | n <= 0 = (empty, t) 1354 | n >= len = (t, empty) 1355 | otherwise = let k = iterN n t 1356 in (text arr off k, text arr (off+k) (len-k)) 1357 1358-- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns 1359-- a pair whose first element is the longest prefix (possibly empty) 1360-- of @t@ of elements that satisfy @p@, and whose second is the 1361-- remainder of the list. 1362-- 1363-- >>> T.span (=='0') "000AB" 1364-- ("000","AB") 1365span :: (Char -> Bool) -> Text -> (Text, Text) 1366span p t = case span_ p t of 1367 (# hd,tl #) -> (hd,tl) 1368{-# INLINE span #-} 1369 1370-- | /O(n)/ 'break' is like 'span', but the prefix returned is 1371-- over elements that fail the predicate @p@. 1372-- 1373-- >>> T.break (=='c') "180cm" 1374-- ("180","cm") 1375break :: (Char -> Bool) -> Text -> (Text, Text) 1376break p = span (not . p) 1377{-# INLINE break #-} 1378 1379-- | /O(n)/ Group characters in a string according to a predicate. 1380groupBy :: (Char -> Char -> Bool) -> Text -> [Text] 1381groupBy p = loop 1382 where 1383 loop t@(Text arr off len) 1384 | null t = [] 1385 | otherwise = text arr off n : loop (text arr (off+n) (len-n)) 1386 where Iter c d = iter t 0 1387 n = d + findAIndexOrEnd (not . p c) (Text arr (off+d) (len-d)) 1388 1389-- | Returns the /array/ index (in units of 'Word16') at which a 1390-- character may be found. This is /not/ the same as the logical 1391-- index returned by e.g. 'findIndex'. 1392findAIndexOrEnd :: (Char -> Bool) -> Text -> Int 1393findAIndexOrEnd q t@(Text _arr _off len) = go 0 1394 where go !i | i >= len || q c = i 1395 | otherwise = go (i+d) 1396 where Iter c d = iter t i 1397 1398-- | /O(n)/ Group characters in a string by equality. 1399group :: Text -> [Text] 1400group = groupBy (==) 1401 1402-- | /O(n)/ Return all initial segments of the given 'Text', shortest 1403-- first. 1404inits :: Text -> [Text] 1405inits t@(Text arr off len) = loop 0 1406 where loop i | i >= len = [t] 1407 | otherwise = Text arr off i : loop (i + iter_ t i) 1408 1409-- | /O(n)/ Return all final segments of the given 'Text', longest 1410-- first. 1411tails :: Text -> [Text] 1412tails t | null t = [empty] 1413 | otherwise = t : tails (unsafeTail t) 1414 1415-- $split 1416-- 1417-- Splitting functions in this library do not perform character-wise 1418-- copies to create substrings; they just construct new 'Text's that 1419-- are slices of the original. 1420 1421-- | /O(m+n)/ Break a 'Text' into pieces separated by the first 'Text' 1422-- argument (which cannot be empty), consuming the delimiter. An empty 1423-- delimiter is invalid, and will cause an error to be raised. 1424-- 1425-- Examples: 1426-- 1427-- >>> splitOn "\r\n" "a\r\nb\r\nd\r\ne" 1428-- ["a","b","d","e"] 1429-- 1430-- >>> splitOn "aaa" "aaaXaaaXaaaXaaa" 1431-- ["","X","X","X",""] 1432-- 1433-- >>> splitOn "x" "x" 1434-- ["",""] 1435-- 1436-- and 1437-- 1438-- > intercalate s . splitOn s == id 1439-- > splitOn (singleton c) == split (==c) 1440-- 1441-- (Note: the string @s@ to split on above cannot be empty.) 1442-- 1443-- In (unlikely) bad cases, this function's time complexity degrades 1444-- towards /O(n*m)/. 1445splitOn :: Text 1446 -- ^ String to split on. If this string is empty, an error 1447 -- will occur. 1448 -> Text 1449 -- ^ Input text. 1450 -> [Text] 1451splitOn pat@(Text _ _ l) src@(Text arr off len) 1452 | l <= 0 = emptyError "splitOn" 1453 | isSingleton pat = split (== unsafeHead pat) src 1454 | otherwise = go 0 (indices pat src) 1455 where 1456 go !s (x:xs) = text arr (s+off) (x-s) : go (x+l) xs 1457 go s _ = [text arr (s+off) (len-s)] 1458{-# INLINE [1] splitOn #-} 1459 1460{-# RULES 1461"TEXT splitOn/singleton -> split/==" [~1] forall c t. 1462 splitOn (singleton c) t = split (==c) t 1463 #-} 1464 1465-- | /O(n)/ Splits a 'Text' into components delimited by separators, 1466-- where the predicate returns True for a separator element. The 1467-- resulting components do not contain the separators. Two adjacent 1468-- separators result in an empty component in the output. eg. 1469-- 1470-- >>> split (=='a') "aabbaca" 1471-- ["","","bb","c",""] 1472-- 1473-- >>> split (=='a') "" 1474-- [""] 1475split :: (Char -> Bool) -> Text -> [Text] 1476split _ t@(Text _off _arr 0) = [t] 1477split p t = loop t 1478 where loop s | null s' = [l] 1479 | otherwise = l : loop (unsafeTail s') 1480 where (# l, s' #) = span_ (not . p) s 1481{-# INLINE split #-} 1482 1483-- | /O(n)/ Splits a 'Text' into components of length @k@. The last 1484-- element may be shorter than the other chunks, depending on the 1485-- length of the input. Examples: 1486-- 1487-- >>> chunksOf 3 "foobarbaz" 1488-- ["foo","bar","baz"] 1489-- 1490-- >>> chunksOf 4 "haskell.org" 1491-- ["hask","ell.","org"] 1492chunksOf :: Int -> Text -> [Text] 1493chunksOf k = go 1494 where 1495 go t = case splitAt k t of 1496 (a,b) | null a -> [] 1497 | otherwise -> a : go b 1498{-# INLINE chunksOf #-} 1499 1500-- ---------------------------------------------------------------------------- 1501-- * Searching 1502 1503------------------------------------------------------------------------------- 1504-- ** Searching with a predicate 1505 1506-- | /O(n)/ The 'find' function takes a predicate and a 'Text', and 1507-- returns the first element matching the predicate, or 'Nothing' if 1508-- there is no such element. Subject to fusion. 1509find :: (Char -> Bool) -> Text -> Maybe Char 1510find p t = S.findBy p (stream t) 1511{-# INLINE find #-} 1512 1513-- | /O(n)/ The 'partition' function takes a predicate and a 'Text', 1514-- and returns the pair of 'Text's with elements which do and do not 1515-- satisfy the predicate, respectively; i.e. 1516-- 1517-- > partition p t == (filter p t, filter (not . p) t) 1518partition :: (Char -> Bool) -> Text -> (Text, Text) 1519partition p t = (filter p t, filter (not . p) t) 1520{-# INLINE partition #-} 1521 1522-- | /O(n)/ 'filter', applied to a predicate and a 'Text', 1523-- returns a 'Text' containing those characters that satisfy the 1524-- predicate. 1525filter :: (Char -> Bool) -> Text -> Text 1526filter p t = unstream (S.filter p (stream t)) 1527{-# INLINE filter #-} 1528 1529-- | /O(n+m)/ Find the first instance of @needle@ (which must be 1530-- non-'null') in @haystack@. The first element of the returned tuple 1531-- is the prefix of @haystack@ before @needle@ is matched. The second 1532-- is the remainder of @haystack@, starting with the match. 1533-- 1534-- Examples: 1535-- 1536-- >>> breakOn "::" "a::b::c" 1537-- ("a","::b::c") 1538-- 1539-- >>> breakOn "/" "foobar" 1540-- ("foobar","") 1541-- 1542-- Laws: 1543-- 1544-- > append prefix match == haystack 1545-- > where (prefix, match) = breakOn needle haystack 1546-- 1547-- If you need to break a string by a substring repeatedly (e.g. you 1548-- want to break on every instance of a substring), use 'breakOnAll' 1549-- instead, as it has lower startup overhead. 1550-- 1551-- In (unlikely) bad cases, this function's time complexity degrades 1552-- towards /O(n*m)/. 1553breakOn :: Text -> Text -> (Text, Text) 1554breakOn pat src@(Text arr off len) 1555 | null pat = emptyError "breakOn" 1556 | otherwise = case indices pat src of 1557 [] -> (src, empty) 1558 (x:_) -> (text arr off x, text arr (off+x) (len-x)) 1559{-# INLINE breakOn #-} 1560 1561-- | /O(n+m)/ Similar to 'breakOn', but searches from the end of the 1562-- string. 1563-- 1564-- The first element of the returned tuple is the prefix of @haystack@ 1565-- up to and including the last match of @needle@. The second is the 1566-- remainder of @haystack@, following the match. 1567-- 1568-- >>> breakOnEnd "::" "a::b::c" 1569-- ("a::b::","c") 1570breakOnEnd :: Text -> Text -> (Text, Text) 1571breakOnEnd pat src = (reverse b, reverse a) 1572 where (a,b) = breakOn (reverse pat) (reverse src) 1573{-# INLINE breakOnEnd #-} 1574 1575-- | /O(n+m)/ Find all non-overlapping instances of @needle@ in 1576-- @haystack@. Each element of the returned list consists of a pair: 1577-- 1578-- * The entire string prior to the /k/th match (i.e. the prefix) 1579-- 1580-- * The /k/th match, followed by the remainder of the string 1581-- 1582-- Examples: 1583-- 1584-- >>> breakOnAll "::" "" 1585-- [] 1586-- 1587-- >>> breakOnAll "/" "a/b/c/" 1588-- [("a","/b/c/"),("a/b","/c/"),("a/b/c","/")] 1589-- 1590-- In (unlikely) bad cases, this function's time complexity degrades 1591-- towards /O(n*m)/. 1592-- 1593-- The @needle@ parameter may not be empty. 1594breakOnAll :: Text -- ^ @needle@ to search for 1595 -> Text -- ^ @haystack@ in which to search 1596 -> [(Text, Text)] 1597breakOnAll pat src@(Text arr off slen) 1598 | null pat = emptyError "breakOnAll" 1599 | otherwise = L.map step (indices pat src) 1600 where 1601 step x = (chunk 0 x, chunk x (slen-x)) 1602 chunk !n !l = text arr (n+off) l 1603{-# INLINE breakOnAll #-} 1604 1605------------------------------------------------------------------------------- 1606-- ** Indexing 'Text's 1607 1608-- $index 1609-- 1610-- If you think of a 'Text' value as an array of 'Char' values (which 1611-- it is not), you run the risk of writing inefficient code. 1612-- 1613-- An idiom that is common in some languages is to find the numeric 1614-- offset of a character or substring, then use that number to split 1615-- or trim the searched string. With a 'Text' value, this approach 1616-- would require two /O(n)/ operations: one to perform the search, and 1617-- one to operate from wherever the search ended. 1618-- 1619-- For example, suppose you have a string that you want to split on 1620-- the substring @\"::\"@, such as @\"foo::bar::quux\"@. Instead of 1621-- searching for the index of @\"::\"@ and taking the substrings 1622-- before and after that index, you would instead use @breakOnAll \"::\"@. 1623 1624-- | /O(n)/ 'Text' index (subscript) operator, starting from 0. Subject to fusion. 1625index :: Text -> Int -> Char 1626index t n = S.index (stream t) n 1627{-# INLINE index #-} 1628 1629-- | /O(n)/ The 'findIndex' function takes a predicate and a 'Text' 1630-- and returns the index of the first element in the 'Text' satisfying 1631-- the predicate. Subject to fusion. 1632findIndex :: (Char -> Bool) -> Text -> Maybe Int 1633findIndex p t = S.findIndex p (stream t) 1634{-# INLINE findIndex #-} 1635 1636-- | /O(n+m)/ The 'count' function returns the number of times the 1637-- query string appears in the given 'Text'. An empty query string is 1638-- invalid, and will cause an error to be raised. 1639-- 1640-- In (unlikely) bad cases, this function's time complexity degrades 1641-- towards /O(n*m)/. 1642count :: Text -> Text -> Int 1643count pat src 1644 | null pat = emptyError "count" 1645 | isSingleton pat = countChar (unsafeHead pat) src 1646 | otherwise = L.length (indices pat src) 1647{-# INLINE [1] count #-} 1648 1649{-# RULES 1650"TEXT count/singleton -> countChar" [~1] forall c t. 1651 count (singleton c) t = countChar c t 1652 #-} 1653 1654-- | /O(n)/ The 'countChar' function returns the number of times the 1655-- query element appears in the given 'Text'. Subject to fusion. 1656countChar :: Char -> Text -> Int 1657countChar c t = S.countChar c (stream t) 1658{-# INLINE countChar #-} 1659 1660------------------------------------------------------------------------------- 1661-- * Zipping 1662 1663-- | /O(n)/ 'zip' takes two 'Text's and returns a list of 1664-- corresponding pairs of bytes. If one input 'Text' is short, 1665-- excess elements of the longer 'Text' are discarded. This is 1666-- equivalent to a pair of 'unpack' operations. 1667zip :: Text -> Text -> [(Char,Char)] 1668zip a b = S.unstreamList $ S.zipWith (,) (stream a) (stream b) 1669{-# INLINE zip #-} 1670 1671-- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function 1672-- given as the first argument, instead of a tupling function. 1673-- Performs replacement on invalid scalar values. 1674zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text 1675zipWith f t1 t2 = unstream (S.zipWith g (stream t1) (stream t2)) 1676 where g a b = safe (f a b) 1677{-# INLINE zipWith #-} 1678 1679-- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's 1680-- representing white space. 1681words :: Text -> [Text] 1682words t@(Text arr off len) = loop 0 0 1683 where 1684 loop !start !n 1685 | n >= len = if start == n 1686 then [] 1687 else [Text arr (start+off) (n-start)] 1688 | isSpace c = 1689 if start == n 1690 then loop (start+1) (start+1) 1691 else Text arr (start+off) (n-start) : loop (n+d) (n+d) 1692 | otherwise = loop start (n+d) 1693 where Iter c d = iter t n 1694{-# INLINE words #-} 1695 1696-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at 1697-- newline 'Char's. The resulting strings do not contain newlines. 1698lines :: Text -> [Text] 1699lines ps | null ps = [] 1700 | otherwise = h : if null t 1701 then [] 1702 else lines (unsafeTail t) 1703 where (# h,t #) = span_ (/= '\n') ps 1704{-# INLINE lines #-} 1705 1706{- 1707-- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line 1708-- boundaries. 1709-- 1710-- A line boundary is considered to be either a line feed, a carriage 1711-- return immediately followed by a line feed, or a carriage return. 1712-- This accounts for both Unix and Windows line ending conventions, 1713-- and for the old convention used on Mac OS 9 and earlier. 1714lines' :: Text -> [Text] 1715lines' ps | null ps = [] 1716 | otherwise = h : case uncons t of 1717 Nothing -> [] 1718 Just (c,t') 1719 | c == '\n' -> lines t' 1720 | c == '\r' -> case uncons t' of 1721 Just ('\n',t'') -> lines t'' 1722 _ -> lines t' 1723 where (h,t) = span notEOL ps 1724 notEOL c = c /= '\n' && c /= '\r' 1725{-# INLINE lines' #-} 1726-} 1727 1728-- | /O(n)/ Joins lines, after appending a terminating newline to 1729-- each. 1730unlines :: [Text] -> Text 1731unlines = concat . L.map (`snoc` '\n') 1732{-# INLINE unlines #-} 1733 1734-- | /O(n)/ Joins words using single space characters. 1735unwords :: [Text] -> Text 1736unwords = intercalate (singleton ' ') 1737{-# INLINE unwords #-} 1738 1739-- | /O(n)/ The 'isPrefixOf' function takes two 'Text's and returns 1740-- 'True' iff the first is a prefix of the second. Subject to fusion. 1741isPrefixOf :: Text -> Text -> Bool 1742isPrefixOf a@(Text _ _ alen) b@(Text _ _ blen) = 1743 alen <= blen && S.isPrefixOf (stream a) (stream b) 1744{-# INLINE [1] isPrefixOf #-} 1745 1746{-# RULES 1747"TEXT isPrefixOf -> fused" [~1] forall s t. 1748 isPrefixOf s t = S.isPrefixOf (stream s) (stream t) 1749 #-} 1750 1751-- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns 1752-- 'True' iff the first is a suffix of the second. 1753isSuffixOf :: Text -> Text -> Bool 1754isSuffixOf a@(Text _aarr _aoff alen) b@(Text barr boff blen) = 1755 d >= 0 && a == b' 1756 where d = blen - alen 1757 b' | d == 0 = b 1758 | otherwise = Text barr (boff+d) alen 1759{-# INLINE isSuffixOf #-} 1760 1761-- | /O(n+m)/ The 'isInfixOf' function takes two 'Text's and returns 1762-- 'True' iff the first is contained, wholly and intact, anywhere 1763-- within the second. 1764-- 1765-- In (unlikely) bad cases, this function's time complexity degrades 1766-- towards /O(n*m)/. 1767isInfixOf :: Text -> Text -> Bool 1768isInfixOf needle haystack 1769 | null needle = True 1770 | isSingleton needle = S.elem (unsafeHead needle) . S.stream $ haystack 1771 | otherwise = not . L.null . indices needle $ haystack 1772{-# INLINE [1] isInfixOf #-} 1773 1774{-# RULES 1775"TEXT isInfixOf/singleton -> S.elem/S.stream" [~1] forall n h. 1776 isInfixOf (singleton n) h = S.elem n (S.stream h) 1777 #-} 1778 1779------------------------------------------------------------------------------- 1780-- * View patterns 1781 1782-- | /O(n)/ Return the suffix of the second string if its prefix 1783-- matches the entire first string. 1784-- 1785-- Examples: 1786-- 1787-- >>> stripPrefix "foo" "foobar" 1788-- Just "bar" 1789-- 1790-- >>> stripPrefix "" "baz" 1791-- Just "baz" 1792-- 1793-- >>> stripPrefix "foo" "quux" 1794-- Nothing 1795-- 1796-- This is particularly useful with the @ViewPatterns@ extension to 1797-- GHC, as follows: 1798-- 1799-- > {-# LANGUAGE ViewPatterns #-} 1800-- > import Data.Text as T 1801-- > 1802-- > fnordLength :: Text -> Int 1803-- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf 1804-- > fnordLength _ = -1 1805stripPrefix :: Text -> Text -> Maybe Text 1806stripPrefix p@(Text _arr _off plen) t@(Text arr off len) 1807 | p `isPrefixOf` t = Just $! text arr (off+plen) (len-plen) 1808 | otherwise = Nothing 1809 1810-- | /O(n)/ Find the longest non-empty common prefix of two strings 1811-- and return it, along with the suffixes of each string at which they 1812-- no longer match. 1813-- 1814-- If the strings do not have a common prefix or either one is empty, 1815-- this function returns 'Nothing'. 1816-- 1817-- Examples: 1818-- 1819-- >>> commonPrefixes "foobar" "fooquux" 1820-- Just ("foo","bar","quux") 1821-- 1822-- >>> commonPrefixes "veeble" "fetzer" 1823-- Nothing 1824-- 1825-- >>> commonPrefixes "" "baz" 1826-- Nothing 1827commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text) 1828commonPrefixes t0@(Text arr0 off0 len0) t1@(Text arr1 off1 len1) = go 0 0 1829 where 1830 go !i !j | i < len0 && j < len1 && a == b = go (i+d0) (j+d1) 1831 | i > 0 = Just (Text arr0 off0 i, 1832 text arr0 (off0+i) (len0-i), 1833 text arr1 (off1+j) (len1-j)) 1834 | otherwise = Nothing 1835 where Iter a d0 = iter t0 i 1836 Iter b d1 = iter t1 j 1837 1838-- | /O(n)/ Return the prefix of the second string if its suffix 1839-- matches the entire first string. 1840-- 1841-- Examples: 1842-- 1843-- >>> stripSuffix "bar" "foobar" 1844-- Just "foo" 1845-- 1846-- >>> stripSuffix "" "baz" 1847-- Just "baz" 1848-- 1849-- >>> stripSuffix "foo" "quux" 1850-- Nothing 1851-- 1852-- This is particularly useful with the @ViewPatterns@ extension to 1853-- GHC, as follows: 1854-- 1855-- > {-# LANGUAGE ViewPatterns #-} 1856-- > import Data.Text as T 1857-- > 1858-- > quuxLength :: Text -> Int 1859-- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre 1860-- > quuxLength _ = -1 1861stripSuffix :: Text -> Text -> Maybe Text 1862stripSuffix p@(Text _arr _off plen) t@(Text arr off len) 1863 | p `isSuffixOf` t = Just $! text arr off (len-plen) 1864 | otherwise = Nothing 1865 1866-- | Add a list of non-negative numbers. Errors out on overflow. 1867sumP :: String -> [Int] -> Int 1868sumP fun = go 0 1869 where go !a (x:xs) 1870 | ax >= 0 = go ax xs 1871 | otherwise = overflowError fun 1872 where ax = a + x 1873 go a _ = a 1874 1875emptyError :: String -> a 1876emptyError fun = P.error $ "Data.Text." ++ fun ++ ": empty input" 1877 1878overflowError :: String -> a 1879overflowError fun = P.error $ "Data.Text." ++ fun ++ ": size overflow" 1880 1881-- | /O(n)/ Make a distinct copy of the given string, sharing no 1882-- storage with the original string. 1883-- 1884-- As an example, suppose you read a large string, of which you need 1885-- only a small portion. If you do not use 'copy', the entire original 1886-- array will be kept alive in memory by the smaller string. Making a 1887-- copy \"breaks the link\" to the original array, allowing it to be 1888-- garbage collected if there are no other live references to it. 1889copy :: Text -> Text 1890copy (Text arr off len) = Text (A.run go) 0 len 1891 where 1892 go :: ST s (A.MArray s) 1893 go = do 1894 marr <- A.new len 1895 A.copyI marr 0 arr off len 1896 return marr 1897 1898 1899------------------------------------------------- 1900-- NOTE: the named chunk below used by doctest; 1901-- verify the doctests via `doctest -fobject-code Data/Text.hs` 1902 1903-- $setup 1904-- >>> :set -XOverloadedStrings 1905-- >>> import qualified Data.Text as T 1906