1{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples, TypeFamilies #-}
2{-# OPTIONS_GHC -fno-warn-orphans #-}
3#if __GLASGOW_HASKELL__ >= 702
4{-# LANGUAGE Trustworthy #-}
5#endif
6-- Using TemplateHaskell in text unconditionally is unacceptable, as
7-- it's a GHC boot library. TemplateHaskellQuotes was added in 8.0, so
8-- this would seem to be a problem. However, GHC's policy of only
9-- needing to be able to compile itself from the last few releases
10-- allows us to use full-fat TH on older versions, while using THQ for
11-- GHC versions that may be used for bootstrapping.
12#if __GLASGOW_HASKELL__ >= 800
13{-# LANGUAGE TemplateHaskellQuotes #-}
14#else
15{-# LANGUAGE TemplateHaskell #-}
16#endif
17
18-- |
19-- Module      : Data.Text
20-- Copyright   : (c) 2009, 2010, 2011, 2012 Bryan O'Sullivan,
21--               (c) 2009 Duncan Coutts,
22--               (c) 2008, 2009 Tom Harper
23--
24-- License     : BSD-style
25-- Maintainer  : bos@serpentine.com
26-- Portability : GHC
27--
28-- A time and space-efficient implementation of Unicode text.
29-- Suitable for performance critical use, both in terms of large data
30-- quantities and high speed.
31--
32-- /Note/: Read below the synopsis for important notes on the use of
33-- this module.
34--
35-- This module is intended to be imported @qualified@, to avoid name
36-- clashes with "Prelude" functions, e.g.
37--
38-- > import qualified Data.Text as T
39--
40-- To use an extended and very rich family of functions for working
41-- with Unicode text (including normalization, regular expressions,
42-- non-standard encodings, text breaking, and locales), see the
43-- <http://hackage.haskell.org/package/text-icu text-icu package >.
44--
45
46module Data.Text
47    (
48    -- * Strict vs lazy types
49    -- $strict
50
51    -- * Acceptable data
52    -- $replacement
53
54    -- * Definition of character
55    -- $character_definition
56
57    -- * Fusion
58    -- $fusion
59
60    -- * Types
61      Text
62
63    -- * Creation and elimination
64    , pack
65    , unpack
66    , singleton
67    , empty
68
69    -- * Basic interface
70    , cons
71    , snoc
72    , append
73    , uncons
74    , unsnoc
75    , head
76    , last
77    , tail
78    , init
79    , null
80    , length
81    , compareLength
82
83    -- * Transformations
84    , map
85    , intercalate
86    , intersperse
87    , transpose
88    , reverse
89    , replace
90
91    -- ** Case conversion
92    -- $case
93    , toCaseFold
94    , toLower
95    , toUpper
96    , toTitle
97
98    -- ** Justification
99    , justifyLeft
100    , justifyRight
101    , center
102
103    -- * Folds
104    , foldl
105    , foldl'
106    , foldl1
107    , foldl1'
108    , foldr
109    , foldr1
110
111    -- ** Special folds
112    , concat
113    , concatMap
114    , any
115    , all
116    , maximum
117    , minimum
118
119    -- * Construction
120
121    -- ** Scans
122    , scanl
123    , scanl1
124    , scanr
125    , scanr1
126
127    -- ** Accumulating maps
128    , mapAccumL
129    , mapAccumR
130
131    -- ** Generation and unfolding
132    , replicate
133    , unfoldr
134    , unfoldrN
135
136    -- * Substrings
137
138    -- ** Breaking strings
139    , take
140    , takeEnd
141    , drop
142    , dropEnd
143    , takeWhile
144    , takeWhileEnd
145    , dropWhile
146    , dropWhileEnd
147    , dropAround
148    , strip
149    , stripStart
150    , stripEnd
151    , splitAt
152    , breakOn
153    , breakOnEnd
154    , break
155    , span
156    , group
157    , groupBy
158    , inits
159    , tails
160
161    -- ** Breaking into many substrings
162    -- $split
163    , splitOn
164    , split
165    , chunksOf
166
167    -- ** Breaking into lines and words
168    , lines
169    --, lines'
170    , words
171    , unlines
172    , unwords
173
174    -- * Predicates
175    , isPrefixOf
176    , isSuffixOf
177    , isInfixOf
178
179    -- ** View patterns
180    , stripPrefix
181    , stripSuffix
182    , commonPrefixes
183
184    -- * Searching
185    , filter
186    , breakOnAll
187    , find
188    , partition
189
190    -- , findSubstring
191
192    -- * Indexing
193    -- $index
194    , index
195    , findIndex
196    , count
197
198    -- * Zipping
199    , zip
200    , zipWith
201
202    -- -* Ordered text
203    -- , sort
204
205    -- * Low level operations
206    , copy
207    , unpackCString#
208    ) where
209
210import Prelude (Char, Bool(..), Int, Maybe(..), String,
211                Eq(..), Ord(..), Ordering(..), (++),
212                Read(..),
213                (&&), (||), (+), (-), (.), ($), ($!), (>>),
214                not, return, otherwise, quot)
215import Control.DeepSeq (NFData(rnf))
216#if defined(ASSERTS)
217import Control.Exception (assert)
218#endif
219import Data.Char (isSpace)
220import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex,
221                  Constr, mkConstr, DataType, mkDataType, Fixity(Prefix))
222import Control.Monad (foldM)
223import Control.Monad.ST (ST)
224import qualified Data.Text.Array as A
225import qualified Data.List as L
226import Data.Binary (Binary(get, put))
227import Data.Monoid (Monoid(..))
228#if MIN_VERSION_base(4,9,0)
229import Data.Semigroup (Semigroup(..))
230#endif
231import Data.String (IsString(..))
232import qualified Data.Text.Internal.Fusion as S
233import qualified Data.Text.Internal.Fusion.Common as S
234import Data.Text.Encoding (decodeUtf8', encodeUtf8)
235import Data.Text.Internal.Fusion (stream, reverseStream, unstream)
236import Data.Text.Internal.Private (span_)
237import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text)
238import Data.Text.Show (singleton, unpack, unpackCString#)
239import qualified Prelude as P
240import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord16, reverseIter,
241                         reverseIter_, unsafeHead, unsafeTail)
242import Data.Text.Internal.Unsafe.Char (unsafeChr)
243import qualified Data.Text.Internal.Functions as F
244import qualified Data.Text.Internal.Encoding.Utf16 as U16
245import Data.Text.Internal.Search (indices)
246import Data.Text.Internal.Unsafe.Shift (UnsafeShift(..))
247#if defined(__HADDOCK__)
248import Data.ByteString (ByteString)
249import qualified Data.Text.Lazy as L
250import Data.Int (Int64)
251#endif
252import GHC.Base (eqInt, neInt, gtInt, geInt, ltInt, leInt)
253#if MIN_VERSION_base(4,7,0)
254import qualified GHC.Exts as Exts
255#endif
256import qualified Language.Haskell.TH.Lib as TH
257import qualified Language.Haskell.TH.Syntax as TH
258#if MIN_VERSION_base(4,7,0)
259import Text.Printf (PrintfArg, formatArg, formatString)
260#endif
261
262-- $setup
263-- >>> import Data.Text
264-- >>> import qualified Data.Text as T
265-- >>> :seti -XOverloadedStrings
266
267-- $character_definition
268--
269-- This package uses the term /character/ to denote Unicode /code points/.
270--
271-- Note that this is not the same thing as a grapheme (e.g. a
272-- composition of code points that form one visual symbol). For
273-- instance, consider the grapheme \"&#x00e4;\". This symbol has two
274-- Unicode representations: a single code-point representation
275-- @U+00E4@ (the @LATIN SMALL LETTER A WITH DIAERESIS@ code point),
276-- and a two code point representation @U+0061@ (the \"@A@\" code
277-- point) and @U+0308@ (the @COMBINING DIAERESIS@ code point).
278
279-- $strict
280--
281-- This package provides both strict and lazy 'Text' types.  The
282-- strict type is provided by the "Data.Text" module, while the lazy
283-- type is provided by the "Data.Text.Lazy" module. Internally, the
284-- lazy @Text@ type consists of a list of strict chunks.
285--
286-- The strict 'Text' type requires that an entire string fit into
287-- memory at once.  The lazy 'Data.Text.Lazy.Text' type is capable of
288-- streaming strings that are larger than memory using a small memory
289-- footprint.  In many cases, the overhead of chunked streaming makes
290-- the lazy 'Data.Text.Lazy.Text' type slower than its strict
291-- counterpart, but this is not always the case.  Sometimes, the time
292-- complexity of a function in one module may be different from the
293-- other, due to their differing internal structures.
294--
295-- Each module provides an almost identical API, with the main
296-- difference being that the strict module uses 'Int' values for
297-- lengths and counts, while the lazy module uses 'Data.Int.Int64'
298-- lengths.
299
300-- $replacement
301--
302-- A 'Text' value is a sequence of Unicode scalar values, as defined
303-- in
304-- <http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=35 §3.9, definition D76 of the Unicode 5.2 standard >.
305-- As such, a 'Text' cannot contain values in the range U+D800 to
306-- U+DFFF inclusive. Haskell implementations admit all Unicode code
307-- points
308-- (<http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=13 §3.4, definition D10 >)
309-- as 'Char' values, including code points from this invalid range.
310-- This means that there are some 'Char' values that are not valid
311-- Unicode scalar values, and the functions in this module must handle
312-- those cases.
313--
314-- Within this module, many functions construct a 'Text' from one or
315-- more 'Char' values. Those functions will substitute 'Char' values
316-- that are not valid Unicode scalar values with the replacement
317-- character \"&#xfffd;\" (U+FFFD).  Functions that perform this
318-- inspection and replacement are documented with the phrase
319-- \"Performs replacement on invalid scalar values\".
320--
321-- (One reason for this policy of replacement is that internally, a
322-- 'Text' value is represented as packed UTF-16 data. Values in the
323-- range U+D800 through U+DFFF are used by UTF-16 to denote surrogate
324-- code points, and so cannot be represented. The functions replace
325-- invalid scalar values, instead of dropping them, as a security
326-- measure. For details, see
327-- <http://unicode.org/reports/tr36/#Deletion_of_Noncharacters Unicode Technical Report 36, §3.5 >.)
328
329-- $fusion
330--
331-- Most of the functions in this module are subject to /fusion/,
332-- meaning that a pipeline of such functions will usually allocate at
333-- most one 'Text' value.
334--
335-- As an example, consider the following pipeline:
336--
337-- > import Data.Text as T
338-- > import Data.Text.Encoding as E
339-- > import Data.ByteString (ByteString)
340-- >
341-- > countChars :: ByteString -> Int
342-- > countChars = T.length . T.toUpper . E.decodeUtf8
343--
344-- From the type signatures involved, this looks like it should
345-- allocate one 'Data.ByteString.ByteString' value, and two 'Text'
346-- values. However, when a module is compiled with optimisation
347-- enabled under GHC, the two intermediate 'Text' values will be
348-- optimised away, and the function will be compiled down to a single
349-- loop over the source 'Data.ByteString.ByteString'.
350--
351-- Functions that can be fused by the compiler are documented with the
352-- phrase \"Subject to fusion\".
353
354instance Eq Text where
355    Text arrA offA lenA == Text arrB offB lenB
356        | lenA == lenB = A.equal arrA offA arrB offB lenA
357        | otherwise    = False
358    {-# INLINE (==) #-}
359
360instance Ord Text where
361    compare = compareText
362
363instance Read Text where
364    readsPrec p str = [(pack x,y) | (x,y) <- readsPrec p str]
365
366#if MIN_VERSION_base(4,9,0)
367-- | Non-orphan 'Semigroup' instance only defined for
368-- @base-4.9.0.0@ and later; orphan instances for older GHCs are
369-- provided by
370-- the [semigroups](http://hackage.haskell.org/package/semigroups)
371-- package
372--
373-- @since 1.2.2.0
374instance Semigroup Text where
375    (<>) = append
376#endif
377
378instance Monoid Text where
379    mempty  = empty
380#if MIN_VERSION_base(4,9,0)
381    mappend = (<>) -- future-proof definition
382#else
383    mappend = append
384#endif
385    mconcat = concat
386
387instance IsString Text where
388    fromString = pack
389
390#if MIN_VERSION_base(4,7,0)
391-- | @since 1.2.0.0
392instance Exts.IsList Text where
393    type Item Text = Char
394    fromList       = pack
395    toList         = unpack
396#endif
397
398instance NFData Text where rnf !_ = ()
399
400-- | @since 1.2.1.0
401instance Binary Text where
402    put t = put (encodeUtf8 t)
403    get   = do
404      bs <- get
405      case decodeUtf8' bs of
406        P.Left exn -> P.fail (P.show exn)
407        P.Right a -> P.return a
408
409-- | This instance preserves data abstraction at the cost of inefficiency.
410-- We omit reflection services for the sake of data abstraction.
411--
412-- This instance was created by copying the updated behavior of
413-- @"Data.Set".@'Data.Set.Set' and @"Data.Map".@'Data.Map.Map'. If you
414-- feel a mistake has been made, please feel free to submit
415-- improvements.
416--
417-- The original discussion is archived here:
418-- <https://mail.haskell.org/pipermail/haskell-cafe/2010-January/072379.html could we get a Data instance for Data.Text.Text? >
419--
420-- The followup discussion that changed the behavior of 'Data.Set.Set'
421-- and 'Data.Map.Map' is archived here:
422-- <http://markmail.org/message/trovdc6zkphyi3cr#query:+page:1+mid:a46der3iacwjcf6n+state:results Proposal: Allow gunfold for Data.Map, ... >
423
424instance Data Text where
425  gfoldl f z txt = z pack `f` (unpack txt)
426  toConstr _ = packConstr
427  gunfold k z c = case constrIndex c of
428    1 -> k (z pack)
429    _ -> P.error "gunfold"
430  dataTypeOf _ = textDataType
431
432-- | This instance has similar considerations to the 'Data' instance:
433-- it preserves abstraction at the cost of inefficiency.
434--
435-- @since 1.2.4.0
436instance TH.Lift Text where
437  lift = TH.appE (TH.varE 'pack) . TH.stringE . unpack
438#if MIN_VERSION_template_haskell(2,17,0)
439  liftTyped = TH.unsafeCodeCoerce . TH.lift
440#elif MIN_VERSION_template_haskell(2,16,0)
441  liftTyped = TH.unsafeTExpCoerce . TH.lift
442#endif
443
444#if MIN_VERSION_base(4,7,0)
445-- | Only defined for @base-4.7.0.0@ and later
446--
447-- @since 1.2.2.0
448instance PrintfArg Text where
449  formatArg txt = formatString $ unpack txt
450#endif
451
452packConstr :: Constr
453packConstr = mkConstr textDataType "pack" [] Prefix
454
455textDataType :: DataType
456textDataType = mkDataType "Data.Text.Text" [packConstr]
457
458-- | /O(n)/ Compare two 'Text' values lexicographically.
459compareText :: Text -> Text -> Ordering
460compareText ta@(Text _arrA _offA lenA) tb@(Text _arrB _offB lenB)
461    | lenA == 0 && lenB == 0 = EQ
462    | otherwise              = go 0 0
463  where
464    go !i !j
465        | i >= lenA || j >= lenB = compare lenA lenB
466        | a < b                  = LT
467        | a > b                  = GT
468        | otherwise              = go (i+di) (j+dj)
469      where Iter a di = iter ta i
470            Iter b dj = iter tb j
471
472-- -----------------------------------------------------------------------------
473-- * Conversion to/from 'Text'
474
475-- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to
476-- fusion.  Performs replacement on invalid scalar values.
477pack :: String -> Text
478pack = unstream . S.map safe . S.streamList
479{-# INLINE [1] pack #-}
480
481-- -----------------------------------------------------------------------------
482-- * Basic functions
483
484-- | /O(n)/ Adds a character to the front of a 'Text'.  This function
485-- is more costly than its 'List' counterpart because it requires
486-- copying a new array.  Subject to fusion.  Performs replacement on
487-- invalid scalar values.
488cons :: Char -> Text -> Text
489cons c t = unstream (S.cons (safe c) (stream t))
490{-# INLINE cons #-}
491
492infixr 5 `cons`
493
494-- | /O(n)/ Adds a character to the end of a 'Text'.  This copies the
495-- entire array in the process, unless fused.  Subject to fusion.
496-- Performs replacement on invalid scalar values.
497snoc :: Text -> Char -> Text
498snoc t c = unstream (S.snoc (stream t) (safe c))
499{-# INLINE snoc #-}
500
501-- | /O(n)/ Appends one 'Text' to the other by copying both of them
502-- into a new 'Text'.  Subject to fusion.
503append :: Text -> Text -> Text
504append a@(Text arr1 off1 len1) b@(Text arr2 off2 len2)
505    | len1 == 0 = b
506    | len2 == 0 = a
507    | len > 0   = Text (A.run x) 0 len
508    | otherwise = overflowError "append"
509    where
510      len = len1+len2
511      x :: ST s (A.MArray s)
512      x = do
513        arr <- A.new len
514        A.copyI arr 0 arr1 off1 len1
515        A.copyI arr len1 arr2 off2 len
516        return arr
517{-# NOINLINE append #-}
518
519{-# RULES
520"TEXT append -> fused" [~1] forall t1 t2.
521    append t1 t2 = unstream (S.append (stream t1) (stream t2))
522"TEXT append -> unfused" [1] forall t1 t2.
523    unstream (S.append (stream t1) (stream t2)) = append t1 t2
524 #-}
525
526-- | /O(1)/ Returns the first character of a 'Text', which must be
527-- non-empty.  Subject to fusion.
528head :: Text -> Char
529head t = S.head (stream t)
530{-# INLINE head #-}
531
532-- | /O(1)/ Returns the first character and rest of a 'Text', or
533-- 'Nothing' if empty. Subject to fusion.
534uncons :: Text -> Maybe (Char, Text)
535uncons t@(Text arr off len)
536    | len <= 0  = Nothing
537    | otherwise = Just $ let !(Iter c d) = iter t 0
538                         in (c, text arr (off+d) (len-d))
539{-# INLINE [1] uncons #-}
540
541-- | Lifted from Control.Arrow and specialized.
542second :: (b -> c) -> (a,b) -> (a,c)
543second f (a, b) = (a, f b)
544
545-- | /O(1)/ Returns the last character of a 'Text', which must be
546-- non-empty.  Subject to fusion.
547last :: Text -> Char
548last (Text arr off len)
549    | len <= 0                 = emptyError "last"
550    | n < 0xDC00 || n > 0xDFFF = unsafeChr n
551    | otherwise                = U16.chr2 n0 n
552    where n  = A.unsafeIndex arr (off+len-1)
553          n0 = A.unsafeIndex arr (off+len-2)
554{-# INLINE [1] last #-}
555
556{-# RULES
557"TEXT last -> fused" [~1] forall t.
558    last t = S.last (stream t)
559"TEXT last -> unfused" [1] forall t.
560    S.last (stream t) = last t
561  #-}
562
563-- | /O(1)/ Returns all characters after the head of a 'Text', which
564-- must be non-empty.  Subject to fusion.
565tail :: Text -> Text
566tail t@(Text arr off len)
567    | len <= 0  = emptyError "tail"
568    | otherwise = text arr (off+d) (len-d)
569    where d = iter_ t 0
570{-# INLINE [1] tail #-}
571
572{-# RULES
573"TEXT tail -> fused" [~1] forall t.
574    tail t = unstream (S.tail (stream t))
575"TEXT tail -> unfused" [1] forall t.
576    unstream (S.tail (stream t)) = tail t
577 #-}
578
579-- | /O(1)/ Returns all but the last character of a 'Text', which must
580-- be non-empty.  Subject to fusion.
581init :: Text -> Text
582init (Text arr off len) | len <= 0                   = emptyError "init"
583                        | n >= 0xDC00 && n <= 0xDFFF = text arr off (len-2)
584                        | otherwise                  = text arr off (len-1)
585    where
586      n = A.unsafeIndex arr (off+len-1)
587{-# INLINE [1] init #-}
588
589{-# RULES
590"TEXT init -> fused" [~1] forall t.
591    init t = unstream (S.init (stream t))
592"TEXT init -> unfused" [1] forall t.
593    unstream (S.init (stream t)) = init t
594 #-}
595
596-- | /O(1)/ Returns all but the last character and the last character of a
597-- 'Text', or 'Nothing' if empty.
598--
599-- @since 1.2.3.0
600unsnoc :: Text -> Maybe (Text, Char)
601unsnoc (Text arr off len)
602    | len <= 0                 = Nothing
603    | n < 0xDC00 || n > 0xDFFF = Just (text arr off (len-1), unsafeChr n)
604    | otherwise                = Just (text arr off (len-2), U16.chr2 n0 n)
605    where n  = A.unsafeIndex arr (off+len-1)
606          n0 = A.unsafeIndex arr (off+len-2)
607{-# INLINE [1] unsnoc #-}
608
609-- | /O(1)/ Tests whether a 'Text' is empty or not.  Subject to
610-- fusion.
611null :: Text -> Bool
612null (Text _arr _off len) =
613#if defined(ASSERTS)
614    assert (len >= 0) $
615#endif
616    len <= 0
617{-# INLINE [1] null #-}
618
619{-# RULES
620"TEXT null -> fused" [~1] forall t.
621    null t = S.null (stream t)
622"TEXT null -> unfused" [1] forall t.
623    S.null (stream t) = null t
624 #-}
625
626-- | /O(1)/ Tests whether a 'Text' contains exactly one character.
627-- Subject to fusion.
628isSingleton :: Text -> Bool
629isSingleton = S.isSingleton . stream
630{-# INLINE isSingleton #-}
631
632-- | /O(n)/ Returns the number of characters in a 'Text'.
633-- Subject to fusion.
634length :: Text -> Int
635length t = S.length (stream t)
636{-# INLINE [1] length #-}
637-- length needs to be phased after the compareN/length rules otherwise
638-- it may inline before the rules have an opportunity to fire.
639
640-- | /O(n)/ Compare the count of characters in a 'Text' to a number.
641-- Subject to fusion.
642--
643-- This function gives the same answer as comparing against the result
644-- of 'length', but can short circuit if the count of characters is
645-- greater than the number, and hence be more efficient.
646compareLength :: Text -> Int -> Ordering
647compareLength t n = S.compareLengthI (stream t) n
648{-# INLINE [1] compareLength #-}
649
650{-# RULES
651"TEXT compareN/length -> compareLength" [~1] forall t n.
652    compare (length t) n = compareLength t n
653  #-}
654
655{-# RULES
656"TEXT ==N/length -> compareLength/==EQ" [~1] forall t n.
657    eqInt (length t) n = compareLength t n == EQ
658  #-}
659
660{-# RULES
661"TEXT /=N/length -> compareLength//=EQ" [~1] forall t n.
662    neInt (length t) n = compareLength t n /= EQ
663  #-}
664
665{-# RULES
666"TEXT <N/length -> compareLength/==LT" [~1] forall t n.
667    ltInt (length t) n = compareLength t n == LT
668  #-}
669
670{-# RULES
671"TEXT <=N/length -> compareLength//=GT" [~1] forall t n.
672    leInt (length t) n = compareLength t n /= GT
673  #-}
674
675{-# RULES
676"TEXT >N/length -> compareLength/==GT" [~1] forall t n.
677    gtInt (length t) n = compareLength t n == GT
678  #-}
679
680{-# RULES
681"TEXT >=N/length -> compareLength//=LT" [~1] forall t n.
682    geInt (length t) n = compareLength t n /= LT
683  #-}
684
685-- -----------------------------------------------------------------------------
686-- * Transformations
687-- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to
688-- each element of @t@.
689--
690-- Example:
691--
692-- >>> let message = pack "I am not angry. Not at all."
693-- >>> T.map (\c -> if c == '.' then '!' else c) message
694-- "I am not angry! Not at all!"
695--
696-- Subject to fusion.  Performs replacement on invalid scalar values.
697map :: (Char -> Char) -> Text -> Text
698map f t = unstream (S.map (safe . f) (stream t))
699{-# INLINE [1] map #-}
700
701-- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of
702-- 'Text's and concatenates the list after interspersing the first
703-- argument between each element of the list.
704--
705-- Example:
706--
707-- >>> T.intercalate "NI!" ["We", "seek", "the", "Holy", "Grail"]
708-- "WeNI!seekNI!theNI!HolyNI!Grail"
709intercalate :: Text -> [Text] -> Text
710intercalate t = concat . (F.intersperse t)
711{-# INLINE intercalate #-}
712
713-- | /O(n)/ The 'intersperse' function takes a character and places it
714-- between the characters of a 'Text'.
715--
716-- Example:
717--
718-- >>> T.intersperse '.' "SHIELD"
719-- "S.H.I.E.L.D"
720--
721-- Subject to fusion.  Performs replacement on invalid scalar values.
722intersperse     :: Char -> Text -> Text
723intersperse c t = unstream (S.intersperse (safe c) (stream t))
724{-# INLINE intersperse #-}
725
726-- | /O(n)/ Reverse the characters of a string.
727--
728-- Example:
729--
730-- >>> T.reverse "desrever"
731-- "reversed"
732--
733-- Subject to fusion (fuses with its argument).
734reverse :: Text -> Text
735reverse t = S.reverse (stream t)
736{-# INLINE reverse #-}
737
738-- | /O(m+n)/ Replace every non-overlapping occurrence of @needle@ in
739-- @haystack@ with @replacement@.
740--
741-- This function behaves as though it was defined as follows:
742--
743-- @
744-- replace needle replacement haystack =
745--   'intercalate' replacement ('splitOn' needle haystack)
746-- @
747--
748-- As this suggests, each occurrence is replaced exactly once.  So if
749-- @needle@ occurs in @replacement@, that occurrence will /not/ itself
750-- be replaced recursively:
751--
752-- >>> replace "oo" "foo" "oo"
753-- "foo"
754--
755-- In cases where several instances of @needle@ overlap, only the
756-- first one will be replaced:
757--
758-- >>> replace "ofo" "bar" "ofofo"
759-- "barfo"
760--
761-- In (unlikely) bad cases, this function's time complexity degrades
762-- towards /O(n*m)/.
763replace :: Text
764        -- ^ @needle@ to search for.  If this string is empty, an
765        -- error will occur.
766        -> Text
767        -- ^ @replacement@ to replace @needle@ with.
768        -> Text
769        -- ^ @haystack@ in which to search.
770        -> Text
771replace needle@(Text _      _      neeLen)
772               (Text repArr repOff repLen)
773      haystack@(Text hayArr hayOff hayLen)
774  | neeLen == 0 = emptyError "replace"
775  | L.null ixs  = haystack
776  | len > 0     = Text (A.run x) 0 len
777  | otherwise   = empty
778  where
779    ixs = indices needle haystack
780    len = hayLen - (neeLen - repLen) `mul` L.length ixs
781    x :: ST s (A.MArray s)
782    x = do
783      marr <- A.new len
784      let loop (i:is) o d = do
785            let d0 = d + i - o
786                d1 = d0 + repLen
787            A.copyI marr d  hayArr (hayOff+o) d0
788            A.copyI marr d0 repArr repOff d1
789            loop is (i + neeLen) d1
790          loop []     o d = A.copyI marr d hayArr (hayOff+o) len
791      loop ixs 0 0
792      return marr
793
794-- ----------------------------------------------------------------------------
795-- ** Case conversions (folds)
796
797-- $case
798--
799-- When case converting 'Text' values, do not use combinators like
800-- @map toUpper@ to case convert each character of a string
801-- individually, as this gives incorrect results according to the
802-- rules of some writing systems.  The whole-string case conversion
803-- functions from this module, such as @toUpper@, obey the correct
804-- case conversion rules.  As a result, these functions may map one
805-- input character to two or three output characters. For examples,
806-- see the documentation of each function.
807--
808-- /Note/: In some languages, case conversion is a locale- and
809-- context-dependent operation. The case conversion functions in this
810-- module are /not/ locale sensitive. Programs that require locale
811-- sensitivity should use appropriate versions of the
812-- <http://hackage.haskell.org/package/text-icu-0.6.3.7/docs/Data-Text-ICU.html#g:4 case mapping functions from the text-icu package >.
813
814-- | /O(n)/ Convert a string to folded case.  Subject to fusion.
815--
816-- This function is mainly useful for performing caseless (also known
817-- as case insensitive) string comparisons.
818--
819-- A string @x@ is a caseless match for a string @y@ if and only if:
820--
821-- @toCaseFold x == toCaseFold y@
822--
823-- The result string may be longer than the input string, and may
824-- differ from applying 'toLower' to the input string.  For instance,
825-- the Armenian small ligature \"&#xfb13;\" (men now, U+FB13) is case
826-- folded to the sequence \"&#x574;\" (men, U+0574) followed by
827-- \"&#x576;\" (now, U+0576), while the Greek \"&#xb5;\" (micro sign,
828-- U+00B5) is case folded to \"&#x3bc;\" (small letter mu, U+03BC)
829-- instead of itself.
830toCaseFold :: Text -> Text
831toCaseFold t = unstream (S.toCaseFold (stream t))
832{-# INLINE toCaseFold #-}
833
834-- | /O(n)/ Convert a string to lower case, using simple case
835-- conversion.  Subject to fusion.
836--
837-- The result string may be longer than the input string.  For
838-- instance, \"&#x130;\" (Latin capital letter I with dot above,
839-- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069)
840-- followed by \" &#x307;\" (combining dot above, U+0307).
841toLower :: Text -> Text
842toLower t = unstream (S.toLower (stream t))
843{-# INLINE toLower #-}
844
845-- | /O(n)/ Convert a string to upper case, using simple case
846-- conversion.  Subject to fusion.
847--
848-- The result string may be longer than the input string.  For
849-- instance, the German \"&#xdf;\" (eszett, U+00DF) maps to the
850-- two-letter sequence \"SS\".
851toUpper :: Text -> Text
852toUpper t = unstream (S.toUpper (stream t))
853{-# INLINE toUpper #-}
854
855-- | /O(n)/ Convert a string to title case, using simple case
856-- conversion. Subject to fusion.
857--
858-- The first letter of the input is converted to title case, as is
859-- every subsequent letter that immediately follows a non-letter.
860-- Every letter that immediately follows another letter is converted
861-- to lower case.
862--
863-- The result string may be longer than the input string. For example,
864-- the Latin small ligature &#xfb02; (U+FB02) is converted to the
865-- sequence Latin capital letter F (U+0046) followed by Latin small
866-- letter l (U+006C).
867--
868-- /Note/: this function does not take language or culture specific
869-- rules into account. For instance, in English, different style
870-- guides disagree on whether the book name \"The Hill of the Red
871-- Fox\" is correctly title cased&#x2014;but this function will
872-- capitalize /every/ word.
873--
874-- @since 1.0.0.0
875toTitle :: Text -> Text
876toTitle t = unstream (S.toTitle (stream t))
877{-# INLINE toTitle #-}
878
879-- | /O(n)/ Left-justify a string to the given length, using the
880-- specified fill character on the right. Subject to fusion.
881-- Performs replacement on invalid scalar values.
882--
883-- Examples:
884--
885-- >>> justifyLeft 7 'x' "foo"
886-- "fooxxxx"
887--
888-- >>> justifyLeft 3 'x' "foobar"
889-- "foobar"
890justifyLeft :: Int -> Char -> Text -> Text
891justifyLeft k c t
892    | len >= k  = t
893    | otherwise = t `append` replicateChar (k-len) c
894  where len = length t
895{-# INLINE [1] justifyLeft #-}
896
897{-# RULES
898"TEXT justifyLeft -> fused" [~1] forall k c t.
899    justifyLeft k c t = unstream (S.justifyLeftI k c (stream t))
900"TEXT justifyLeft -> unfused" [1] forall k c t.
901    unstream (S.justifyLeftI k c (stream t)) = justifyLeft k c t
902  #-}
903
904-- | /O(n)/ Right-justify a string to the given length, using the
905-- specified fill character on the left.  Performs replacement on
906-- invalid scalar values.
907--
908-- Examples:
909--
910-- >>> justifyRight 7 'x' "bar"
911-- "xxxxbar"
912--
913-- >>> justifyRight 3 'x' "foobar"
914-- "foobar"
915justifyRight :: Int -> Char -> Text -> Text
916justifyRight k c t
917    | len >= k  = t
918    | otherwise = replicateChar (k-len) c `append` t
919  where len = length t
920{-# INLINE justifyRight #-}
921
922-- | /O(n)/ Center a string to the given length, using the specified
923-- fill character on either side.  Performs replacement on invalid
924-- scalar values.
925--
926-- Examples:
927--
928-- >>> center 8 'x' "HS"
929-- "xxxHSxxx"
930center :: Int -> Char -> Text -> Text
931center k c t
932    | len >= k  = t
933    | otherwise = replicateChar l c `append` t `append` replicateChar r c
934  where len = length t
935        d   = k - len
936        r   = d `quot` 2
937        l   = d - r
938{-# INLINE center #-}
939
940-- | /O(n)/ The 'transpose' function transposes the rows and columns
941-- of its 'Text' argument.  Note that this function uses 'pack',
942-- 'unpack', and the list version of transpose, and is thus not very
943-- efficient.
944--
945-- Examples:
946--
947-- >>> transpose ["green","orange"]
948-- ["go","rr","ea","en","ng","e"]
949--
950-- >>> transpose ["blue","red"]
951-- ["br","le","ud","e"]
952transpose :: [Text] -> [Text]
953transpose ts = P.map pack (L.transpose (P.map unpack ts))
954
955-- -----------------------------------------------------------------------------
956-- * Reducing 'Text's (folds)
957
958-- | /O(n)/ 'foldl', applied to a binary operator, a starting value
959-- (typically the left-identity of the operator), and a 'Text',
960-- reduces the 'Text' using the binary operator, from left to right.
961-- Subject to fusion.
962foldl :: (a -> Char -> a) -> a -> Text -> a
963foldl f z t = S.foldl f z (stream t)
964{-# INLINE foldl #-}
965
966-- | /O(n)/ A strict version of 'foldl'.  Subject to fusion.
967foldl' :: (a -> Char -> a) -> a -> Text -> a
968foldl' f z t = S.foldl' f z (stream t)
969{-# INLINE foldl' #-}
970
971-- | /O(n)/ A variant of 'foldl' that has no starting value argument,
972-- and thus must be applied to a non-empty 'Text'.  Subject to fusion.
973foldl1 :: (Char -> Char -> Char) -> Text -> Char
974foldl1 f t = S.foldl1 f (stream t)
975{-# INLINE foldl1 #-}
976
977-- | /O(n)/ A strict version of 'foldl1'.  Subject to fusion.
978foldl1' :: (Char -> Char -> Char) -> Text -> Char
979foldl1' f t = S.foldl1' f (stream t)
980{-# INLINE foldl1' #-}
981
982-- | /O(n)/ 'foldr', applied to a binary operator, a starting value
983-- (typically the right-identity of the operator), and a 'Text',
984-- reduces the 'Text' using the binary operator, from right to left.
985-- Subject to fusion.
986foldr :: (Char -> a -> a) -> a -> Text -> a
987foldr f z t = S.foldr f z (stream t)
988{-# INLINE foldr #-}
989
990-- | /O(n)/ A variant of 'foldr' that has no starting value argument,
991-- and thus must be applied to a non-empty 'Text'.  Subject to
992-- fusion.
993foldr1 :: (Char -> Char -> Char) -> Text -> Char
994foldr1 f t = S.foldr1 f (stream t)
995{-# INLINE foldr1 #-}
996
997-- -----------------------------------------------------------------------------
998-- ** Special folds
999
1000-- | /O(n)/ Concatenate a list of 'Text's.
1001concat :: [Text] -> Text
1002concat ts = case ts' of
1003              [] -> empty
1004              [t] -> t
1005              _ -> Text (A.run go) 0 len
1006  where
1007    ts' = L.filter (not . null) ts
1008    len = sumP "concat" $ L.map lengthWord16 ts'
1009    go :: ST s (A.MArray s)
1010    go = do
1011      arr <- A.new len
1012      let step i (Text a o l) =
1013            let !j = i + l in A.copyI arr i a o j >> return j
1014      foldM step 0 ts' >> return arr
1015
1016-- | /O(n)/ Map a function over a 'Text' that results in a 'Text', and
1017-- concatenate the results.
1018concatMap :: (Char -> Text) -> Text -> Text
1019concatMap f = concat . foldr ((:) . f) []
1020{-# INLINE concatMap #-}
1021
1022-- | /O(n)/ 'any' @p@ @t@ determines whether any character in the
1023-- 'Text' @t@ satisfies the predicate @p@. Subject to fusion.
1024any :: (Char -> Bool) -> Text -> Bool
1025any p t = S.any p (stream t)
1026{-# INLINE any #-}
1027
1028-- | /O(n)/ 'all' @p@ @t@ determines whether all characters in the
1029-- 'Text' @t@ satisfy the predicate @p@. Subject to fusion.
1030all :: (Char -> Bool) -> Text -> Bool
1031all p t = S.all p (stream t)
1032{-# INLINE all #-}
1033
1034-- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which
1035-- must be non-empty. Subject to fusion.
1036maximum :: Text -> Char
1037maximum t = S.maximum (stream t)
1038{-# INLINE maximum #-}
1039
1040-- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which
1041-- must be non-empty. Subject to fusion.
1042minimum :: Text -> Char
1043minimum t = S.minimum (stream t)
1044{-# INLINE minimum #-}
1045
1046-- -----------------------------------------------------------------------------
1047-- * Building 'Text's
1048
1049-- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of
1050-- successive reduced values from the left. Subject to fusion.
1051-- Performs replacement on invalid scalar values.
1052--
1053-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
1054--
1055-- Note that
1056--
1057-- > last (scanl f z xs) == foldl f z xs.
1058scanl :: (Char -> Char -> Char) -> Char -> Text -> Text
1059scanl f z t = unstream (S.scanl g z (stream t))
1060    where g a b = safe (f a b)
1061{-# INLINE scanl #-}
1062
1063-- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting
1064-- value argument. Performs replacement on invalid scalar values.
1065--
1066-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
1067scanl1 :: (Char -> Char -> Char) -> Text -> Text
1068scanl1 f t | null t    = empty
1069           | otherwise = scanl f (unsafeHead t) (unsafeTail t)
1070{-# INLINE scanl1 #-}
1071
1072-- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'.  Performs
1073-- replacement on invalid scalar values.
1074--
1075-- > scanr f v == reverse . scanl (flip f) v . reverse
1076scanr :: (Char -> Char -> Char) -> Char -> Text -> Text
1077scanr f z = S.reverse . S.reverseScanr g z . reverseStream
1078    where g a b = safe (f a b)
1079{-# INLINE scanr #-}
1080
1081-- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting
1082-- value argument. Performs replacement on invalid scalar values.
1083scanr1 :: (Char -> Char -> Char) -> Text -> Text
1084scanr1 f t | null t    = empty
1085           | otherwise = scanr f (last t) (init t)
1086{-# INLINE scanr1 #-}
1087
1088-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
1089-- function to each element of a 'Text', passing an accumulating
1090-- parameter from left to right, and returns a final 'Text'.  Performs
1091-- replacement on invalid scalar values.
1092mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
1093mapAccumL f z0 = S.mapAccumL g z0 . stream
1094    where g a b = second safe (f a b)
1095{-# INLINE mapAccumL #-}
1096
1097-- | The 'mapAccumR' function behaves like a combination of 'map' and
1098-- a strict 'foldr'; it applies a function to each element of a
1099-- 'Text', passing an accumulating parameter from right to left, and
1100-- returning a final value of this accumulator together with the new
1101-- 'Text'.
1102-- Performs replacement on invalid scalar values.
1103mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
1104mapAccumR f z0 = second reverse . S.mapAccumL g z0 . reverseStream
1105    where g a b = second safe (f a b)
1106{-# INLINE mapAccumR #-}
1107
1108-- -----------------------------------------------------------------------------
1109-- ** Generating and unfolding 'Text's
1110
1111-- | /O(n*m)/ 'replicate' @n@ @t@ is a 'Text' consisting of the input
1112-- @t@ repeated @n@ times.
1113replicate :: Int -> Text -> Text
1114replicate n t@(Text a o l)
1115    | n <= 0 || l <= 0       = empty
1116    | n == 1                 = t
1117    | isSingleton t          = replicateChar n (unsafeHead t)
1118    | otherwise              = Text (A.run x) 0 len
1119  where
1120    len = l `mul` n -- TODO: detect overflows
1121    x :: ST s (A.MArray s)
1122    x = do
1123      arr <- A.new len
1124      A.copyI arr 0 a o l
1125      let loop !l1 =
1126            let rest = len - l1 in
1127            if rest <= l1 then A.copyM arr l1 arr 0 rest >> return arr
1128            else A.copyM arr l1 arr 0 l1 >> loop (l1 `shiftL` 1)
1129      loop l
1130{-# INLINE [1] replicate #-}
1131
1132
1133{-# RULES
1134"TEXT replicate/singleton -> replicateChar" [~1] forall n c.
1135    replicate n (singleton c) = replicateChar n c
1136  #-}
1137
1138-- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the
1139-- value of every element. Subject to fusion.
1140replicateChar :: Int -> Char -> Text
1141replicateChar n c = unstream (S.replicateCharI n (safe c))
1142{-# INLINE replicateChar #-}
1143
1144-- | /O(n)/, where @n@ is the length of the result. The 'unfoldr'
1145-- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a
1146-- 'Text' from a seed value. The function takes the element and
1147-- returns 'Nothing' if it is done producing the 'Text', otherwise
1148-- 'Just' @(a,b)@.  In this case, @a@ is the next 'Char' in the
1149-- string, and @b@ is the seed value for further production. Subject
1150-- to fusion.  Performs replacement on invalid scalar values.
1151unfoldr     :: (a -> Maybe (Char,a)) -> a -> Text
1152unfoldr f s = unstream (S.unfoldr (firstf safe . f) s)
1153{-# INLINE unfoldr #-}
1154
1155-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed
1156-- value. However, the length of the result should be limited by the
1157-- first argument to 'unfoldrN'. This function is more efficient than
1158-- 'unfoldr' when the maximum length of the result is known and
1159-- correct, otherwise its performance is similar to 'unfoldr'. Subject
1160-- to fusion.  Performs replacement on invalid scalar values.
1161unfoldrN     :: Int -> (a -> Maybe (Char,a)) -> a -> Text
1162unfoldrN n f s = unstream (S.unfoldrN n (firstf safe . f) s)
1163{-# INLINE unfoldrN #-}
1164
1165-- -----------------------------------------------------------------------------
1166-- * Substrings
1167
1168-- | /O(n)/ 'take' @n@, applied to a 'Text', returns the prefix of the
1169-- 'Text' of length @n@, or the 'Text' itself if @n@ is greater than
1170-- the length of the Text. Subject to fusion.
1171take :: Int -> Text -> Text
1172take n t@(Text arr off len)
1173    | n <= 0    = empty
1174    | n >= len  = t
1175    | otherwise = text arr off (iterN n t)
1176{-# INLINE [1] take #-}
1177
1178iterN :: Int -> Text -> Int
1179iterN n t@(Text _arr _off len) = loop 0 0
1180  where loop !i !cnt
1181            | i >= len || cnt >= n = i
1182            | otherwise            = loop (i+d) (cnt+1)
1183          where d = iter_ t i
1184
1185{-# RULES
1186"TEXT take -> fused" [~1] forall n t.
1187    take n t = unstream (S.take n (stream t))
1188"TEXT take -> unfused" [1] forall n t.
1189    unstream (S.take n (stream t)) = take n t
1190  #-}
1191
1192-- | /O(n)/ 'takeEnd' @n@ @t@ returns the suffix remaining after
1193-- taking @n@ characters from the end of @t@.
1194--
1195-- Examples:
1196--
1197-- >>> takeEnd 3 "foobar"
1198-- "bar"
1199--
1200-- @since 1.1.1.0
1201takeEnd :: Int -> Text -> Text
1202takeEnd n t@(Text arr off len)
1203    | n <= 0    = empty
1204    | n >= len  = t
1205    | otherwise = text arr (off+i) (len-i)
1206  where i = iterNEnd n t
1207
1208iterNEnd :: Int -> Text -> Int
1209iterNEnd n t@(Text _arr _off len) = loop (len-1) n
1210  where loop i !m
1211          | m <= 0    = i+1
1212          | i <= 0    = 0
1213          | otherwise = loop (i+d) (m-1)
1214          where d = reverseIter_ t i
1215
1216-- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the
1217-- 'Text' after the first @n@ characters, or the empty 'Text' if @n@
1218-- is greater than the length of the 'Text'. Subject to fusion.
1219drop :: Int -> Text -> Text
1220drop n t@(Text arr off len)
1221    | n <= 0    = t
1222    | n >= len  = empty
1223    | otherwise = text arr (off+i) (len-i)
1224  where i = iterN n t
1225{-# INLINE [1] drop #-}
1226
1227{-# RULES
1228"TEXT drop -> fused" [~1] forall n t.
1229    drop n t = unstream (S.drop n (stream t))
1230"TEXT drop -> unfused" [1] forall n t.
1231    unstream (S.drop n (stream t)) = drop n t
1232"TEXT take . drop -> unfused" [1] forall len off t.
1233    unstream (S.take len (S.drop off (stream t))) = take len (drop off t)
1234  #-}
1235
1236-- | /O(n)/ 'dropEnd' @n@ @t@ returns the prefix remaining after
1237-- dropping @n@ characters from the end of @t@.
1238--
1239-- Examples:
1240--
1241-- >>> dropEnd 3 "foobar"
1242-- "foo"
1243--
1244-- @since 1.1.1.0
1245dropEnd :: Int -> Text -> Text
1246dropEnd n t@(Text arr off len)
1247    | n <= 0    = t
1248    | n >= len  = empty
1249    | otherwise = text arr off (iterNEnd n t)
1250
1251-- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text',
1252-- returns the longest prefix (possibly empty) of elements that
1253-- satisfy @p@.  Subject to fusion.
1254takeWhile :: (Char -> Bool) -> Text -> Text
1255takeWhile p t@(Text arr off len) = loop 0
1256  where loop !i | i >= len    = t
1257                | p c         = loop (i+d)
1258                | otherwise   = text arr off i
1259            where Iter c d    = iter t i
1260{-# INLINE [1] takeWhile #-}
1261
1262{-# RULES
1263"TEXT takeWhile -> fused" [~1] forall p t.
1264    takeWhile p t = unstream (S.takeWhile p (stream t))
1265"TEXT takeWhile -> unfused" [1] forall p t.
1266    unstream (S.takeWhile p (stream t)) = takeWhile p t
1267  #-}
1268
1269-- | /O(n)/ 'takeWhileEnd', applied to a predicate @p@ and a 'Text',
1270-- returns the longest suffix (possibly empty) of elements that
1271-- satisfy @p@.
1272-- Examples:
1273--
1274-- >>> takeWhileEnd (=='o') "foo"
1275-- "oo"
1276--
1277-- @since 1.2.2.0
1278takeWhileEnd :: (Char -> Bool) -> Text -> Text
1279takeWhileEnd p t@(Text arr off len) = loop (len-1) len
1280  where loop !i !l | l <= 0    = t
1281                   | p c       = loop (i+d) (l+d)
1282                   | otherwise = text arr (off+l) (len-l)
1283            where (c,d)        = reverseIter t i
1284{-# INLINE [1] takeWhileEnd #-}
1285
1286-- | /O(n)/ 'dropWhile' @p@ @t@ returns the suffix remaining after
1287-- 'takeWhile' @p@ @t@. Subject to fusion.
1288dropWhile :: (Char -> Bool) -> Text -> Text
1289dropWhile p t@(Text arr off len) = loop 0 0
1290  where loop !i !l | l >= len  = empty
1291                   | p c       = loop (i+d) (l+d)
1292                   | otherwise = Text arr (off+i) (len-l)
1293            where Iter c d     = iter t i
1294{-# INLINE [1] dropWhile #-}
1295
1296{-# RULES
1297"TEXT dropWhile -> fused" [~1] forall p t.
1298    dropWhile p t = unstream (S.dropWhile p (stream t))
1299"TEXT dropWhile -> unfused" [1] forall p t.
1300    unstream (S.dropWhile p (stream t)) = dropWhile p t
1301  #-}
1302
1303-- | /O(n)/ 'dropWhileEnd' @p@ @t@ returns the prefix remaining after
1304-- dropping characters that satisfy the predicate @p@ from the end of
1305-- @t@.
1306--
1307-- Examples:
1308--
1309-- >>> dropWhileEnd (=='.') "foo..."
1310-- "foo"
1311dropWhileEnd :: (Char -> Bool) -> Text -> Text
1312dropWhileEnd p t@(Text arr off len) = loop (len-1) len
1313  where loop !i !l | l <= 0    = empty
1314                   | p c       = loop (i+d) (l+d)
1315                   | otherwise = Text arr off l
1316            where (c,d)        = reverseIter t i
1317{-# INLINE [1] dropWhileEnd #-}
1318
1319-- | /O(n)/ 'dropAround' @p@ @t@ returns the substring remaining after
1320-- dropping characters that satisfy the predicate @p@ from both the
1321-- beginning and end of @t@.  Subject to fusion.
1322dropAround :: (Char -> Bool) -> Text -> Text
1323dropAround p = dropWhile p . dropWhileEnd p
1324{-# INLINE [1] dropAround #-}
1325
1326-- | /O(n)/ Remove leading white space from a string.  Equivalent to:
1327--
1328-- > dropWhile isSpace
1329stripStart :: Text -> Text
1330stripStart = dropWhile isSpace
1331{-# INLINE stripStart #-}
1332
1333-- | /O(n)/ Remove trailing white space from a string.  Equivalent to:
1334--
1335-- > dropWhileEnd isSpace
1336stripEnd :: Text -> Text
1337stripEnd = dropWhileEnd isSpace
1338{-# INLINE [1] stripEnd #-}
1339
1340-- | /O(n)/ Remove leading and trailing white space from a string.
1341-- Equivalent to:
1342--
1343-- > dropAround isSpace
1344strip :: Text -> Text
1345strip = dropAround isSpace
1346{-# INLINE [1] strip #-}
1347
1348-- | /O(n)/ 'splitAt' @n t@ returns a pair whose first element is a
1349-- prefix of @t@ of length @n@, and whose second is the remainder of
1350-- the string. It is equivalent to @('take' n t, 'drop' n t)@.
1351splitAt :: Int -> Text -> (Text, Text)
1352splitAt n t@(Text arr off len)
1353    | n <= 0    = (empty, t)
1354    | n >= len  = (t, empty)
1355    | otherwise = let k = iterN n t
1356                  in (text arr off k, text arr (off+k) (len-k))
1357
1358-- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns
1359-- a pair whose first element is the longest prefix (possibly empty)
1360-- of @t@ of elements that satisfy @p@, and whose second is the
1361-- remainder of the list.
1362--
1363-- >>> T.span (=='0') "000AB"
1364-- ("000","AB")
1365span :: (Char -> Bool) -> Text -> (Text, Text)
1366span p t = case span_ p t of
1367             (# hd,tl #) -> (hd,tl)
1368{-# INLINE span #-}
1369
1370-- | /O(n)/ 'break' is like 'span', but the prefix returned is
1371-- over elements that fail the predicate @p@.
1372--
1373-- >>> T.break (=='c') "180cm"
1374-- ("180","cm")
1375break :: (Char -> Bool) -> Text -> (Text, Text)
1376break p = span (not . p)
1377{-# INLINE break #-}
1378
1379-- | /O(n)/ Group characters in a string according to a predicate.
1380groupBy :: (Char -> Char -> Bool) -> Text -> [Text]
1381groupBy p = loop
1382  where
1383    loop t@(Text arr off len)
1384        | null t    = []
1385        | otherwise = text arr off n : loop (text arr (off+n) (len-n))
1386        where Iter c d = iter t 0
1387              n     = d + findAIndexOrEnd (not . p c) (Text arr (off+d) (len-d))
1388
1389-- | Returns the /array/ index (in units of 'Word16') at which a
1390-- character may be found.  This is /not/ the same as the logical
1391-- index returned by e.g. 'findIndex'.
1392findAIndexOrEnd :: (Char -> Bool) -> Text -> Int
1393findAIndexOrEnd q t@(Text _arr _off len) = go 0
1394    where go !i | i >= len || q c       = i
1395                | otherwise             = go (i+d)
1396                where Iter c d          = iter t i
1397
1398-- | /O(n)/ Group characters in a string by equality.
1399group :: Text -> [Text]
1400group = groupBy (==)
1401
1402-- | /O(n)/ Return all initial segments of the given 'Text', shortest
1403-- first.
1404inits :: Text -> [Text]
1405inits t@(Text arr off len) = loop 0
1406    where loop i | i >= len = [t]
1407                 | otherwise = Text arr off i : loop (i + iter_ t i)
1408
1409-- | /O(n)/ Return all final segments of the given 'Text', longest
1410-- first.
1411tails :: Text -> [Text]
1412tails t | null t    = [empty]
1413        | otherwise = t : tails (unsafeTail t)
1414
1415-- $split
1416--
1417-- Splitting functions in this library do not perform character-wise
1418-- copies to create substrings; they just construct new 'Text's that
1419-- are slices of the original.
1420
1421-- | /O(m+n)/ Break a 'Text' into pieces separated by the first 'Text'
1422-- argument (which cannot be empty), consuming the delimiter. An empty
1423-- delimiter is invalid, and will cause an error to be raised.
1424--
1425-- Examples:
1426--
1427-- >>> splitOn "\r\n" "a\r\nb\r\nd\r\ne"
1428-- ["a","b","d","e"]
1429--
1430-- >>> splitOn "aaa"  "aaaXaaaXaaaXaaa"
1431-- ["","X","X","X",""]
1432--
1433-- >>> splitOn "x"    "x"
1434-- ["",""]
1435--
1436-- and
1437--
1438-- > intercalate s . splitOn s         == id
1439-- > splitOn (singleton c)             == split (==c)
1440--
1441-- (Note: the string @s@ to split on above cannot be empty.)
1442--
1443-- In (unlikely) bad cases, this function's time complexity degrades
1444-- towards /O(n*m)/.
1445splitOn :: Text
1446        -- ^ String to split on. If this string is empty, an error
1447        -- will occur.
1448        -> Text
1449        -- ^ Input text.
1450        -> [Text]
1451splitOn pat@(Text _ _ l) src@(Text arr off len)
1452    | l <= 0          = emptyError "splitOn"
1453    | isSingleton pat = split (== unsafeHead pat) src
1454    | otherwise       = go 0 (indices pat src)
1455  where
1456    go !s (x:xs) =  text arr (s+off) (x-s) : go (x+l) xs
1457    go  s _      = [text arr (s+off) (len-s)]
1458{-# INLINE [1] splitOn #-}
1459
1460{-# RULES
1461"TEXT splitOn/singleton -> split/==" [~1] forall c t.
1462    splitOn (singleton c) t = split (==c) t
1463  #-}
1464
1465-- | /O(n)/ Splits a 'Text' into components delimited by separators,
1466-- where the predicate returns True for a separator element.  The
1467-- resulting components do not contain the separators.  Two adjacent
1468-- separators result in an empty component in the output.  eg.
1469--
1470-- >>> split (=='a') "aabbaca"
1471-- ["","","bb","c",""]
1472--
1473-- >>> split (=='a') ""
1474-- [""]
1475split :: (Char -> Bool) -> Text -> [Text]
1476split _ t@(Text _off _arr 0) = [t]
1477split p t = loop t
1478    where loop s | null s'   = [l]
1479                 | otherwise = l : loop (unsafeTail s')
1480              where (# l, s' #) = span_ (not . p) s
1481{-# INLINE split #-}
1482
1483-- | /O(n)/ Splits a 'Text' into components of length @k@.  The last
1484-- element may be shorter than the other chunks, depending on the
1485-- length of the input. Examples:
1486--
1487-- >>> chunksOf 3 "foobarbaz"
1488-- ["foo","bar","baz"]
1489--
1490-- >>> chunksOf 4 "haskell.org"
1491-- ["hask","ell.","org"]
1492chunksOf :: Int -> Text -> [Text]
1493chunksOf k = go
1494  where
1495    go t = case splitAt k t of
1496             (a,b) | null a    -> []
1497                   | otherwise -> a : go b
1498{-# INLINE chunksOf #-}
1499
1500-- ----------------------------------------------------------------------------
1501-- * Searching
1502
1503-------------------------------------------------------------------------------
1504-- ** Searching with a predicate
1505
1506-- | /O(n)/ The 'find' function takes a predicate and a 'Text', and
1507-- returns the first element matching the predicate, or 'Nothing' if
1508-- there is no such element. Subject to fusion.
1509find :: (Char -> Bool) -> Text -> Maybe Char
1510find p t = S.findBy p (stream t)
1511{-# INLINE find #-}
1512
1513-- | /O(n)/ The 'partition' function takes a predicate and a 'Text',
1514-- and returns the pair of 'Text's with elements which do and do not
1515-- satisfy the predicate, respectively; i.e.
1516--
1517-- > partition p t == (filter p t, filter (not . p) t)
1518partition :: (Char -> Bool) -> Text -> (Text, Text)
1519partition p t = (filter p t, filter (not . p) t)
1520{-# INLINE partition #-}
1521
1522-- | /O(n)/ 'filter', applied to a predicate and a 'Text',
1523-- returns a 'Text' containing those characters that satisfy the
1524-- predicate.
1525filter :: (Char -> Bool) -> Text -> Text
1526filter p t = unstream (S.filter p (stream t))
1527{-# INLINE filter #-}
1528
1529-- | /O(n+m)/ Find the first instance of @needle@ (which must be
1530-- non-'null') in @haystack@.  The first element of the returned tuple
1531-- is the prefix of @haystack@ before @needle@ is matched.  The second
1532-- is the remainder of @haystack@, starting with the match.
1533--
1534-- Examples:
1535--
1536-- >>> breakOn "::" "a::b::c"
1537-- ("a","::b::c")
1538--
1539-- >>> breakOn "/" "foobar"
1540-- ("foobar","")
1541--
1542-- Laws:
1543--
1544-- > append prefix match == haystack
1545-- >   where (prefix, match) = breakOn needle haystack
1546--
1547-- If you need to break a string by a substring repeatedly (e.g. you
1548-- want to break on every instance of a substring), use 'breakOnAll'
1549-- instead, as it has lower startup overhead.
1550--
1551-- In (unlikely) bad cases, this function's time complexity degrades
1552-- towards /O(n*m)/.
1553breakOn :: Text -> Text -> (Text, Text)
1554breakOn pat src@(Text arr off len)
1555    | null pat  = emptyError "breakOn"
1556    | otherwise = case indices pat src of
1557                    []    -> (src, empty)
1558                    (x:_) -> (text arr off x, text arr (off+x) (len-x))
1559{-# INLINE breakOn #-}
1560
1561-- | /O(n+m)/ Similar to 'breakOn', but searches from the end of the
1562-- string.
1563--
1564-- The first element of the returned tuple is the prefix of @haystack@
1565-- up to and including the last match of @needle@.  The second is the
1566-- remainder of @haystack@, following the match.
1567--
1568-- >>> breakOnEnd "::" "a::b::c"
1569-- ("a::b::","c")
1570breakOnEnd :: Text -> Text -> (Text, Text)
1571breakOnEnd pat src = (reverse b, reverse a)
1572    where (a,b) = breakOn (reverse pat) (reverse src)
1573{-# INLINE breakOnEnd #-}
1574
1575-- | /O(n+m)/ Find all non-overlapping instances of @needle@ in
1576-- @haystack@.  Each element of the returned list consists of a pair:
1577--
1578-- * The entire string prior to the /k/th match (i.e. the prefix)
1579--
1580-- * The /k/th match, followed by the remainder of the string
1581--
1582-- Examples:
1583--
1584-- >>> breakOnAll "::" ""
1585-- []
1586--
1587-- >>> breakOnAll "/" "a/b/c/"
1588-- [("a","/b/c/"),("a/b","/c/"),("a/b/c","/")]
1589--
1590-- In (unlikely) bad cases, this function's time complexity degrades
1591-- towards /O(n*m)/.
1592--
1593-- The @needle@ parameter may not be empty.
1594breakOnAll :: Text              -- ^ @needle@ to search for
1595           -> Text              -- ^ @haystack@ in which to search
1596           -> [(Text, Text)]
1597breakOnAll pat src@(Text arr off slen)
1598    | null pat  = emptyError "breakOnAll"
1599    | otherwise = L.map step (indices pat src)
1600  where
1601    step       x = (chunk 0 x, chunk x (slen-x))
1602    chunk !n !l  = text arr (n+off) l
1603{-# INLINE breakOnAll #-}
1604
1605-------------------------------------------------------------------------------
1606-- ** Indexing 'Text's
1607
1608-- $index
1609--
1610-- If you think of a 'Text' value as an array of 'Char' values (which
1611-- it is not), you run the risk of writing inefficient code.
1612--
1613-- An idiom that is common in some languages is to find the numeric
1614-- offset of a character or substring, then use that number to split
1615-- or trim the searched string.  With a 'Text' value, this approach
1616-- would require two /O(n)/ operations: one to perform the search, and
1617-- one to operate from wherever the search ended.
1618--
1619-- For example, suppose you have a string that you want to split on
1620-- the substring @\"::\"@, such as @\"foo::bar::quux\"@. Instead of
1621-- searching for the index of @\"::\"@ and taking the substrings
1622-- before and after that index, you would instead use @breakOnAll \"::\"@.
1623
1624-- | /O(n)/ 'Text' index (subscript) operator, starting from 0. Subject to fusion.
1625index :: Text -> Int -> Char
1626index t n = S.index (stream t) n
1627{-# INLINE index #-}
1628
1629-- | /O(n)/ The 'findIndex' function takes a predicate and a 'Text'
1630-- and returns the index of the first element in the 'Text' satisfying
1631-- the predicate. Subject to fusion.
1632findIndex :: (Char -> Bool) -> Text -> Maybe Int
1633findIndex p t = S.findIndex p (stream t)
1634{-# INLINE findIndex #-}
1635
1636-- | /O(n+m)/ The 'count' function returns the number of times the
1637-- query string appears in the given 'Text'. An empty query string is
1638-- invalid, and will cause an error to be raised.
1639--
1640-- In (unlikely) bad cases, this function's time complexity degrades
1641-- towards /O(n*m)/.
1642count :: Text -> Text -> Int
1643count pat src
1644    | null pat        = emptyError "count"
1645    | isSingleton pat = countChar (unsafeHead pat) src
1646    | otherwise       = L.length (indices pat src)
1647{-# INLINE [1] count #-}
1648
1649{-# RULES
1650"TEXT count/singleton -> countChar" [~1] forall c t.
1651    count (singleton c) t = countChar c t
1652  #-}
1653
1654-- | /O(n)/ The 'countChar' function returns the number of times the
1655-- query element appears in the given 'Text'. Subject to fusion.
1656countChar :: Char -> Text -> Int
1657countChar c t = S.countChar c (stream t)
1658{-# INLINE countChar #-}
1659
1660-------------------------------------------------------------------------------
1661-- * Zipping
1662
1663-- | /O(n)/ 'zip' takes two 'Text's and returns a list of
1664-- corresponding pairs of bytes. If one input 'Text' is short,
1665-- excess elements of the longer 'Text' are discarded. This is
1666-- equivalent to a pair of 'unpack' operations.
1667zip :: Text -> Text -> [(Char,Char)]
1668zip a b = S.unstreamList $ S.zipWith (,) (stream a) (stream b)
1669{-# INLINE zip #-}
1670
1671-- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function
1672-- given as the first argument, instead of a tupling function.
1673-- Performs replacement on invalid scalar values.
1674zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text
1675zipWith f t1 t2 = unstream (S.zipWith g (stream t1) (stream t2))
1676    where g a b = safe (f a b)
1677{-# INLINE zipWith #-}
1678
1679-- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
1680-- representing white space.
1681words :: Text -> [Text]
1682words t@(Text arr off len) = loop 0 0
1683  where
1684    loop !start !n
1685        | n >= len = if start == n
1686                     then []
1687                     else [Text arr (start+off) (n-start)]
1688        | isSpace c =
1689            if start == n
1690            then loop (start+1) (start+1)
1691            else Text arr (start+off) (n-start) : loop (n+d) (n+d)
1692        | otherwise = loop start (n+d)
1693        where Iter c d = iter t n
1694{-# INLINE words #-}
1695
1696-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
1697-- newline 'Char's. The resulting strings do not contain newlines.
1698lines :: Text -> [Text]
1699lines ps | null ps   = []
1700         | otherwise = h : if null t
1701                           then []
1702                           else lines (unsafeTail t)
1703    where (# h,t #) = span_ (/= '\n') ps
1704{-# INLINE lines #-}
1705
1706{-
1707-- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
1708-- boundaries.
1709--
1710-- A line boundary is considered to be either a line feed, a carriage
1711-- return immediately followed by a line feed, or a carriage return.
1712-- This accounts for both Unix and Windows line ending conventions,
1713-- and for the old convention used on Mac OS 9 and earlier.
1714lines' :: Text -> [Text]
1715lines' ps | null ps   = []
1716          | otherwise = h : case uncons t of
1717                              Nothing -> []
1718                              Just (c,t')
1719                                  | c == '\n' -> lines t'
1720                                  | c == '\r' -> case uncons t' of
1721                                                   Just ('\n',t'') -> lines t''
1722                                                   _               -> lines t'
1723    where (h,t)    = span notEOL ps
1724          notEOL c = c /= '\n' && c /= '\r'
1725{-# INLINE lines' #-}
1726-}
1727
1728-- | /O(n)/ Joins lines, after appending a terminating newline to
1729-- each.
1730unlines :: [Text] -> Text
1731unlines = concat . L.map (`snoc` '\n')
1732{-# INLINE unlines #-}
1733
1734-- | /O(n)/ Joins words using single space characters.
1735unwords :: [Text] -> Text
1736unwords = intercalate (singleton ' ')
1737{-# INLINE unwords #-}
1738
1739-- | /O(n)/ The 'isPrefixOf' function takes two 'Text's and returns
1740-- 'True' iff the first is a prefix of the second.  Subject to fusion.
1741isPrefixOf :: Text -> Text -> Bool
1742isPrefixOf a@(Text _ _ alen) b@(Text _ _ blen) =
1743    alen <= blen && S.isPrefixOf (stream a) (stream b)
1744{-# INLINE [1] isPrefixOf #-}
1745
1746{-# RULES
1747"TEXT isPrefixOf -> fused" [~1] forall s t.
1748    isPrefixOf s t = S.isPrefixOf (stream s) (stream t)
1749  #-}
1750
1751-- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns
1752-- 'True' iff the first is a suffix of the second.
1753isSuffixOf :: Text -> Text -> Bool
1754isSuffixOf a@(Text _aarr _aoff alen) b@(Text barr boff blen) =
1755    d >= 0 && a == b'
1756  where d              = blen - alen
1757        b' | d == 0    = b
1758           | otherwise = Text barr (boff+d) alen
1759{-# INLINE isSuffixOf #-}
1760
1761-- | /O(n+m)/ The 'isInfixOf' function takes two 'Text's and returns
1762-- 'True' iff the first is contained, wholly and intact, anywhere
1763-- within the second.
1764--
1765-- In (unlikely) bad cases, this function's time complexity degrades
1766-- towards /O(n*m)/.
1767isInfixOf :: Text -> Text -> Bool
1768isInfixOf needle haystack
1769    | null needle        = True
1770    | isSingleton needle = S.elem (unsafeHead needle) . S.stream $ haystack
1771    | otherwise          = not . L.null . indices needle $ haystack
1772{-# INLINE [1] isInfixOf #-}
1773
1774{-# RULES
1775"TEXT isInfixOf/singleton -> S.elem/S.stream" [~1] forall n h.
1776    isInfixOf (singleton n) h = S.elem n (S.stream h)
1777  #-}
1778
1779-------------------------------------------------------------------------------
1780-- * View patterns
1781
1782-- | /O(n)/ Return the suffix of the second string if its prefix
1783-- matches the entire first string.
1784--
1785-- Examples:
1786--
1787-- >>> stripPrefix "foo" "foobar"
1788-- Just "bar"
1789--
1790-- >>> stripPrefix ""    "baz"
1791-- Just "baz"
1792--
1793-- >>> stripPrefix "foo" "quux"
1794-- Nothing
1795--
1796-- This is particularly useful with the @ViewPatterns@ extension to
1797-- GHC, as follows:
1798--
1799-- > {-# LANGUAGE ViewPatterns #-}
1800-- > import Data.Text as T
1801-- >
1802-- > fnordLength :: Text -> Int
1803-- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
1804-- > fnordLength _                                 = -1
1805stripPrefix :: Text -> Text -> Maybe Text
1806stripPrefix p@(Text _arr _off plen) t@(Text arr off len)
1807    | p `isPrefixOf` t = Just $! text arr (off+plen) (len-plen)
1808    | otherwise        = Nothing
1809
1810-- | /O(n)/ Find the longest non-empty common prefix of two strings
1811-- and return it, along with the suffixes of each string at which they
1812-- no longer match.
1813--
1814-- If the strings do not have a common prefix or either one is empty,
1815-- this function returns 'Nothing'.
1816--
1817-- Examples:
1818--
1819-- >>> commonPrefixes "foobar" "fooquux"
1820-- Just ("foo","bar","quux")
1821--
1822-- >>> commonPrefixes "veeble" "fetzer"
1823-- Nothing
1824--
1825-- >>> commonPrefixes "" "baz"
1826-- Nothing
1827commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
1828commonPrefixes t0@(Text arr0 off0 len0) t1@(Text arr1 off1 len1) = go 0 0
1829  where
1830    go !i !j | i < len0 && j < len1 && a == b = go (i+d0) (j+d1)
1831             | i > 0     = Just (Text arr0 off0 i,
1832                                 text arr0 (off0+i) (len0-i),
1833                                 text arr1 (off1+j) (len1-j))
1834             | otherwise = Nothing
1835      where Iter a d0 = iter t0 i
1836            Iter b d1 = iter t1 j
1837
1838-- | /O(n)/ Return the prefix of the second string if its suffix
1839-- matches the entire first string.
1840--
1841-- Examples:
1842--
1843-- >>> stripSuffix "bar" "foobar"
1844-- Just "foo"
1845--
1846-- >>> stripSuffix ""    "baz"
1847-- Just "baz"
1848--
1849-- >>> stripSuffix "foo" "quux"
1850-- Nothing
1851--
1852-- This is particularly useful with the @ViewPatterns@ extension to
1853-- GHC, as follows:
1854--
1855-- > {-# LANGUAGE ViewPatterns #-}
1856-- > import Data.Text as T
1857-- >
1858-- > quuxLength :: Text -> Int
1859-- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
1860-- > quuxLength _                                = -1
1861stripSuffix :: Text -> Text -> Maybe Text
1862stripSuffix p@(Text _arr _off plen) t@(Text arr off len)
1863    | p `isSuffixOf` t = Just $! text arr off (len-plen)
1864    | otherwise        = Nothing
1865
1866-- | Add a list of non-negative numbers.  Errors out on overflow.
1867sumP :: String -> [Int] -> Int
1868sumP fun = go 0
1869  where go !a (x:xs)
1870            | ax >= 0   = go ax xs
1871            | otherwise = overflowError fun
1872          where ax = a + x
1873        go a  _         = a
1874
1875emptyError :: String -> a
1876emptyError fun = P.error $ "Data.Text." ++ fun ++ ": empty input"
1877
1878overflowError :: String -> a
1879overflowError fun = P.error $ "Data.Text." ++ fun ++ ": size overflow"
1880
1881-- | /O(n)/ Make a distinct copy of the given string, sharing no
1882-- storage with the original string.
1883--
1884-- As an example, suppose you read a large string, of which you need
1885-- only a small portion.  If you do not use 'copy', the entire original
1886-- array will be kept alive in memory by the smaller string. Making a
1887-- copy \"breaks the link\" to the original array, allowing it to be
1888-- garbage collected if there are no other live references to it.
1889copy :: Text -> Text
1890copy (Text arr off len) = Text (A.run go) 0 len
1891  where
1892    go :: ST s (A.MArray s)
1893    go = do
1894      marr <- A.new len
1895      A.copyI marr 0 arr off len
1896      return marr
1897
1898
1899-------------------------------------------------
1900-- NOTE: the named chunk below used by doctest;
1901--       verify the doctests via `doctest -fobject-code Data/Text.hs`
1902
1903-- $setup
1904-- >>> :set -XOverloadedStrings
1905-- >>> import qualified Data.Text as T
1906