1{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
2-- |
3-- Module      : Data.Text.Internal.Fusion.Common
4-- Copyright   : (c) Bryan O'Sullivan 2009, 2012
5--
6-- License     : BSD-style
7-- Maintainer  : bos@serpentine.com
8-- Stability   : experimental
9-- Portability : GHC
10--
11-- /Warning/: this is an internal module, and does not have a stable
12-- API or name. Functions in this module may not check or enforce
13-- preconditions expected by public modules. Use at your own risk!
14--
15-- Common stream fusion functionality for text.
16
17module Data.Text.Internal.Fusion.Common
18    (
19    -- * Creation and elimination
20      singleton
21    , streamList
22    , unstreamList
23    , streamCString#
24
25    -- * Basic interface
26    , cons
27    , snoc
28    , append
29    , head
30    , uncons
31    , last
32    , tail
33    , init
34    , null
35    , lengthI
36    , compareLengthI
37    , isSingleton
38
39    -- * Transformations
40    , map
41    , intercalate
42    , intersperse
43
44    -- ** Case conversion
45    -- $case
46    , toCaseFold
47    , toLower
48    , toTitle
49    , toUpper
50
51    -- ** Justification
52    , justifyLeftI
53
54    -- * Folds
55    , foldl
56    , foldl'
57    , foldl1
58    , foldl1'
59    , foldr
60    , foldr1
61
62    -- ** Special folds
63    , concat
64    , concatMap
65    , any
66    , all
67    , maximum
68    , minimum
69
70    -- * Construction
71    -- ** Scans
72    , scanl
73
74    -- ** Generation and unfolding
75    , replicateCharI
76    , replicateI
77    , unfoldr
78    , unfoldrNI
79
80    -- * Substrings
81    -- ** Breaking strings
82    , take
83    , drop
84    , takeWhile
85    , dropWhile
86
87    -- * Predicates
88    , isPrefixOf
89
90    -- * Searching
91    , elem
92    , filter
93
94    -- * Indexing
95    , findBy
96    , indexI
97    , findIndexI
98    , countCharI
99
100    -- * Zipping and unzipping
101    , zipWith
102    ) where
103
104import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..),
105                Ord(..), Ordering(..), String, (.), ($), (+), (-), (*), (++),
106                (&&), fromIntegral, otherwise)
107import qualified Data.List as L
108import qualified Prelude as P
109import Data.Bits (shiftL)
110import Data.Char (isLetter, isSpace)
111import Data.Int (Int64)
112import Data.Text.Internal.Fusion.Types
113import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, titleMapping,
114                                     upperMapping)
115import Data.Text.Internal.Fusion.Size
116import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
117import GHC.Types (Char(..), Int(..))
118
119singleton :: Char -> Stream Char
120singleton c = Stream next False (codePointsSize 1)
121    where next False = Yield c True
122          next True  = Done
123{-# INLINE [0] singleton #-}
124
125streamList :: [a] -> Stream a
126{-# INLINE [0] streamList #-}
127streamList s  = Stream next s unknownSize
128    where next []       = Done
129          next (x:xs)   = Yield x xs
130
131unstreamList :: Stream a -> [a]
132unstreamList (Stream next s0 _len) = unfold s0
133    where unfold !s = case next s of
134                        Done       -> []
135                        Skip s'    -> unfold s'
136                        Yield x s' -> x : unfold s'
137{-# INLINE [0] unstreamList #-}
138
139{-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
140
141-- | Stream the UTF-8-like packed encoding used by GHC to represent
142-- constant strings in generated code.
143--
144-- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
145-- and the string is NUL-terminated.
146streamCString# :: Addr# -> Stream Char
147streamCString# addr = Stream step 0 unknownSize
148  where
149    step !i
150        | b == 0    = Done
151        | b <= 0x7f = Yield (C# b#) (i+1)
152        | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
153                      in Yield c (i+2)
154        | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
155                                      (next 1  `shiftL` 6) +
156                                       next 2
157                      in Yield c (i+3)
158        | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
159                                      (next 1  `shiftL` 12) +
160                                      (next 2  `shiftL` 6) +
161                                       next 3
162                      in Yield c (i+4)
163      where b      = I# (ord# b#)
164            next n = I# (ord# (at# (i+n))) - 0x80
165            !b#    = at# i
166    at# (I# i#) = indexCharOffAddr# addr i#
167    chr (I# i#) = C# (chr# i#)
168{-# INLINE [0] streamCString# #-}
169
170-- ----------------------------------------------------------------------------
171-- * Basic stream functions
172
173data C s = C0 !s
174         | C1 !s
175
176-- | /O(n)/ Adds a character to the front of a Stream Char.
177cons :: Char -> Stream Char -> Stream Char
178cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len + codePointsSize 1)
179    where
180      next (C1 s) = Yield w (C0 s)
181      next (C0 s) = case next0 s of
182                          Done -> Done
183                          Skip s' -> Skip (C0 s')
184                          Yield x s' -> Yield x (C0 s')
185{-# INLINE [0] cons #-}
186
187data Snoc a = N
188            | J !a
189
190-- | /O(n)/ Adds a character to the end of a stream.
191snoc :: Stream Char -> Char -> Stream Char
192snoc (Stream next0 xs0 len) w = Stream next (J xs0) (len + codePointsSize 1)
193  where
194    next (J xs) = case next0 xs of
195      Done        -> Yield w N
196      Skip xs'    -> Skip    (J xs')
197      Yield x xs' -> Yield x (J xs')
198    next N = Done
199{-# INLINE [0] snoc #-}
200
201data E l r = L !l
202           | R !r
203
204-- | /O(n)/ Appends one Stream to the other.
205append :: Stream Char -> Stream Char -> Stream Char
206append (Stream next0 s01 len1) (Stream next1 s02 len2) =
207    Stream next (L s01) (len1 + len2)
208    where
209      next (L s1) = case next0 s1 of
210                         Done        -> Skip    (R s02)
211                         Skip s1'    -> Skip    (L s1')
212                         Yield x s1' -> Yield x (L s1')
213      next (R s2) = case next1 s2 of
214                          Done        -> Done
215                          Skip s2'    -> Skip    (R s2')
216                          Yield x s2' -> Yield x (R s2')
217{-# INLINE [0] append #-}
218
219-- | /O(1)/ Returns the first character of a Text, which must be non-empty.
220-- Subject to array fusion.
221head :: Stream Char -> Char
222head (Stream next s0 _len) = loop_head s0
223    where
224      loop_head !s = case next s of
225                      Yield x _ -> x
226                      Skip s'   -> loop_head s'
227                      Done      -> head_empty
228{-# INLINE [0] head #-}
229
230head_empty :: a
231head_empty = streamError "head" "Empty stream"
232{-# NOINLINE head_empty #-}
233
234-- | /O(1)/ Returns the first character and remainder of a 'Stream
235-- Char', or 'Nothing' if empty.  Subject to array fusion.
236uncons :: Stream Char -> Maybe (Char, Stream Char)
237uncons (Stream next s0 len) = loop_uncons s0
238    where
239      loop_uncons !s = case next s of
240                         Yield x s1 -> Just (x, Stream next s1 (len - codePointsSize 1))
241                         Skip s'    -> loop_uncons s'
242                         Done       -> Nothing
243{-# INLINE [0] uncons #-}
244
245-- | /O(n)/ Returns the last character of a 'Stream Char', which must
246-- be non-empty.
247last :: Stream Char -> Char
248last (Stream next s0 _len) = loop0_last s0
249    where
250      loop0_last !s = case next s of
251                        Done       -> emptyError "last"
252                        Skip s'    -> loop0_last  s'
253                        Yield x s' -> loop_last x s'
254      loop_last !x !s = case next s of
255                         Done        -> x
256                         Skip s'     -> loop_last x  s'
257                         Yield x' s' -> loop_last x' s'
258{-# INLINE[0] last #-}
259
260-- | /O(1)/ Returns all characters after the head of a Stream Char, which must
261-- be non-empty.
262tail :: Stream Char -> Stream Char
263tail (Stream next0 s0 len) = Stream next (C0 s0) (len - codePointsSize 1)
264    where
265      next (C0 s) = case next0 s of
266                      Done       -> emptyError "tail"
267                      Skip s'    -> Skip (C0 s')
268                      Yield _ s' -> Skip (C1 s')
269      next (C1 s) = case next0 s of
270                      Done       -> Done
271                      Skip s'    -> Skip    (C1 s')
272                      Yield x s' -> Yield x (C1 s')
273{-# INLINE [0] tail #-}
274
275data Init s = Init0 !s
276            | Init1 {-# UNPACK #-} !Char !s
277
278-- | /O(1)/ Returns all but the last character of a Stream Char, which
279-- must be non-empty.
280init :: Stream Char -> Stream Char
281init (Stream next0 s0 len) = Stream next (Init0 s0) (len - codePointsSize 1)
282    where
283      next (Init0 s) = case next0 s of
284                         Done       -> emptyError "init"
285                         Skip s'    -> Skip (Init0 s')
286                         Yield x s' -> Skip (Init1 x s')
287      next (Init1 x s)  = case next0 s of
288                            Done        -> Done
289                            Skip s'     -> Skip    (Init1 x s')
290                            Yield x' s' -> Yield x (Init1 x' s')
291{-# INLINE [0] init #-}
292
293-- | /O(1)/ Tests whether a Stream Char is empty or not.
294null :: Stream Char -> Bool
295null (Stream next s0 _len) = loop_null s0
296    where
297      loop_null !s = case next s of
298                       Done      -> True
299                       Yield _ _ -> False
300                       Skip s'   -> loop_null s'
301{-# INLINE[0] null #-}
302
303-- | /O(n)/ Returns the number of characters in a string.
304lengthI :: Integral a => Stream Char -> a
305lengthI (Stream next s0 _len) = loop_length 0 s0
306    where
307      loop_length !z s  = case next s of
308                           Done       -> z
309                           Skip    s' -> loop_length z s'
310                           Yield _ s' -> loop_length (z + 1) s'
311{-# INLINE[0] lengthI #-}
312
313-- | /O(n)/ Compares the count of characters in a string to a number.
314-- Subject to fusion.
315--
316-- This function gives the same answer as comparing against the result
317-- of 'lengthI', but can short circuit if the count of characters is
318-- greater than the number or if the stream can't possibly be as long
319-- as the number supplied, and hence be more efficient.
320compareLengthI :: Integral a => Stream Char -> a -> Ordering
321compareLengthI (Stream next s0 len) n
322    -- Note that @len@ tracks code units whereas we want to compare the length
323    -- in code points. Specifically, a stream with hint @len@ may consist of
324    -- anywhere from @len/2@ to @len@ code points.
325  | Just r <- compareSize len n' = r
326  | otherwise = loop_cmp 0 s0
327    where
328      n' = codePointsSize $ fromIntegral n
329      loop_cmp !z s  = case next s of
330                         Done       -> compare z n
331                         Skip    s' -> loop_cmp z s'
332                         Yield _ s' | z > n     -> GT
333                                    | otherwise -> loop_cmp (z + 1) s'
334{-# INLINE[0] compareLengthI #-}
335
336-- | /O(n)/ Indicate whether a string contains exactly one element.
337isSingleton :: Stream Char -> Bool
338isSingleton (Stream next s0 _len) = loop 0 s0
339    where
340      loop !z s  = case next s of
341                     Done            -> z == (1::Int)
342                     Skip    s'      -> loop z s'
343                     Yield _ s'
344                         | z >= 1    -> False
345                         | otherwise -> loop (z+1) s'
346{-# INLINE[0] isSingleton #-}
347
348-- ----------------------------------------------------------------------------
349-- * Stream transformations
350
351-- | /O(n)/ 'map' @f @xs is the Stream Char obtained by applying @f@
352-- to each element of @xs@.
353map :: (Char -> Char) -> Stream Char -> Stream Char
354map f (Stream next0 s0 len) = Stream next s0 len
355    where
356      next !s = case next0 s of
357                  Done       -> Done
358                  Skip s'    -> Skip s'
359                  Yield x s' -> Yield (f x) s'
360{-# INLINE [0] map #-}
361
362{-#
363  RULES "STREAM map/map fusion" forall f g s.
364     map f (map g s) = map (\x -> f (g x)) s
365 #-}
366
367data I s = I1 !s
368         | I2 !s {-# UNPACK #-} !Char
369         | I3 !s
370
371-- | /O(n)/ Take a character and place it between each of the
372-- characters of a 'Stream Char'.
373intersperse :: Char -> Stream Char -> Stream Char
374intersperse c (Stream next0 s0 len) = Stream next (I1 s0) (len + unknownSize)
375    where
376      next (I1 s) = case next0 s of
377        Done       -> Done
378        Skip s'    -> Skip (I1 s')
379        Yield x s' -> Skip (I2 s' x)
380      next (I2 s x)  = Yield x (I3 s)
381      next (I3 s) = case next0 s of
382        Done       -> Done
383        Skip s'    -> Skip    (I3 s')
384        Yield x s' -> Yield c (I2 s' x)
385{-# INLINE [0] intersperse #-}
386
387-- ----------------------------------------------------------------------------
388-- ** Case conversions (folds)
389
390-- $case
391--
392-- With Unicode text, it is incorrect to use combinators like @map
393-- toUpper@ to case convert each character of a string individually.
394-- Instead, use the whole-string case conversion functions from this
395-- module.  For correctness in different writing systems, these
396-- functions may map one input character to two or three output
397-- characters.
398
399-- | Map a 'Stream' through the given case-mapping function.
400caseConvert :: (forall s. Char -> s -> Step (CC s) Char)
401            -> Stream Char -> Stream Char
402caseConvert remap (Stream next0 s0 len) =
403    Stream next (CC s0 '\0' '\0') (len `unionSize` (3*len))
404  where
405    next (CC s '\0' _) =
406        case next0 s of
407          Done       -> Done
408          Skip s'    -> Skip (CC s' '\0' '\0')
409          Yield c s' -> remap c s'
410    next (CC s a b)  =  Yield a (CC s b '\0')
411
412-- | /O(n)/ Convert a string to folded case.  This function is mainly
413-- useful for performing caseless (or case insensitive) string
414-- comparisons.
415--
416-- A string @x@ is a caseless match for a string @y@ if and only if:
417--
418-- @toCaseFold x == toCaseFold y@
419--
420-- The result string may be longer than the input string, and may
421-- differ from applying 'toLower' to the input string.  For instance,
422-- the Armenian small ligature men now (U+FB13) is case folded to the
423-- bigram men now (U+0574 U+0576), while the micro sign (U+00B5) is
424-- case folded to the Greek small letter letter mu (U+03BC) instead of
425-- itself.
426toCaseFold :: Stream Char -> Stream Char
427toCaseFold = caseConvert foldMapping
428{-# INLINE [0] toCaseFold #-}
429
430-- | /O(n)/ Convert a string to upper case, using simple case
431-- conversion.  The result string may be longer than the input string.
432-- For instance, the German eszett (U+00DF) maps to the two-letter
433-- sequence SS.
434toUpper :: Stream Char -> Stream Char
435toUpper = caseConvert upperMapping
436{-# INLINE [0] toUpper #-}
437
438-- | /O(n)/ Convert a string to lower case, using simple case
439-- conversion.  The result string may be longer than the input string.
440-- For instance, the Latin capital letter I with dot above (U+0130)
441-- maps to the sequence Latin small letter i (U+0069) followed by
442-- combining dot above (U+0307).
443toLower :: Stream Char -> Stream Char
444toLower = caseConvert lowerMapping
445{-# INLINE [0] toLower #-}
446
447-- | /O(n)/ Convert a string to title case, using simple case
448-- conversion.
449--
450-- The first letter of the input is converted to title case, as is
451-- every subsequent letter that immediately follows a non-letter.
452-- Every letter that immediately follows another letter is converted
453-- to lower case.
454--
455-- The result string may be longer than the input string. For example,
456-- the Latin small ligature &#xfb02; (U+FB02) is converted to the
457-- sequence Latin capital letter F (U+0046) followed by Latin small
458-- letter l (U+006C).
459--
460-- /Note/: this function does not take language or culture specific
461-- rules into account. For instance, in English, different style
462-- guides disagree on whether the book name \"The Hill of the Red
463-- Fox\" is correctly title cased&#x2014;but this function will
464-- capitalize /every/ word.
465toTitle :: Stream Char -> Stream Char
466toTitle (Stream next0 s0 len) = Stream next (CC (False :*: s0) '\0' '\0') (len + unknownSize)
467  where
468    next (CC (letter :*: s) '\0' _) =
469      case next0 s of
470        Done            -> Done
471        Skip s'         -> Skip (CC (letter :*: s') '\0' '\0')
472        Yield c s'
473          | nonSpace    -> if letter
474                           then lowerMapping c (nonSpace :*: s')
475                           else titleMapping c (letter' :*: s')
476          | otherwise   -> Yield c (CC (letter' :*: s') '\0' '\0')
477          where nonSpace = P.not (isSpace c)
478                letter'  = isLetter c
479    next (CC s a b)      = Yield a (CC s b '\0')
480{-# INLINE [0] toTitle #-}
481
482data Justify i s = Just1 !i !s
483                 | Just2 !i !s
484
485justifyLeftI :: Integral a => a -> Char -> Stream Char -> Stream Char
486justifyLeftI k c (Stream next0 s0 len) =
487    Stream next (Just1 0 s0) (larger (fromIntegral k * charSize c + len) len)
488  where
489    next (Just1 n s) =
490        case next0 s of
491          Done       -> next (Just2 n s)
492          Skip s'    -> Skip (Just1 n s')
493          Yield x s' -> Yield x (Just1 (n+1) s')
494    next (Just2 n s)
495        | n < k       = Yield c (Just2 (n+1) s)
496        | otherwise   = Done
497    {-# INLINE next #-}
498{-# INLINE [0] justifyLeftI #-}
499
500-- ----------------------------------------------------------------------------
501-- * Reducing Streams (folds)
502
503-- | foldl, applied to a binary operator, a starting value (typically the
504-- left-identity of the operator), and a Stream, reduces the Stream using the
505-- binary operator, from left to right.
506foldl :: (b -> Char -> b) -> b -> Stream Char -> b
507foldl f z0 (Stream next s0 _len) = loop_foldl z0 s0
508    where
509      loop_foldl z !s = case next s of
510                          Done -> z
511                          Skip s' -> loop_foldl z s'
512                          Yield x s' -> loop_foldl (f z x) s'
513{-# INLINE [0] foldl #-}
514
515-- | A strict version of foldl.
516foldl' :: (b -> Char -> b) -> b -> Stream Char -> b
517foldl' f z0 (Stream next s0 _len) = loop_foldl' z0 s0
518    where
519      loop_foldl' !z !s = case next s of
520                            Done -> z
521                            Skip s' -> loop_foldl' z s'
522                            Yield x s' -> loop_foldl' (f z x) s'
523{-# INLINE [0] foldl' #-}
524
525-- | foldl1 is a variant of foldl that has no starting value argument,
526-- and thus must be applied to non-empty Streams.
527foldl1 :: (Char -> Char -> Char) -> Stream Char -> Char
528foldl1 f (Stream next s0 _len) = loop0_foldl1 s0
529    where
530      loop0_foldl1 !s = case next s of
531                          Skip s' -> loop0_foldl1 s'
532                          Yield x s' -> loop_foldl1 x s'
533                          Done -> emptyError "foldl1"
534      loop_foldl1 z !s = case next s of
535                           Done -> z
536                           Skip s' -> loop_foldl1 z s'
537                           Yield x s' -> loop_foldl1 (f z x) s'
538{-# INLINE [0] foldl1 #-}
539
540-- | A strict version of foldl1.
541foldl1' :: (Char -> Char -> Char) -> Stream Char -> Char
542foldl1' f (Stream next s0 _len) = loop0_foldl1' s0
543    where
544      loop0_foldl1' !s = case next s of
545                           Skip s' -> loop0_foldl1' s'
546                           Yield x s' -> loop_foldl1' x s'
547                           Done -> emptyError "foldl1"
548      loop_foldl1' !z !s = case next s of
549                             Done -> z
550                             Skip s' -> loop_foldl1' z s'
551                             Yield x s' -> loop_foldl1' (f z x) s'
552{-# INLINE [0] foldl1' #-}
553
554-- | 'foldr', applied to a binary operator, a starting value (typically the
555-- right-identity of the operator), and a stream, reduces the stream using the
556-- binary operator, from right to left.
557foldr :: (Char -> b -> b) -> b -> Stream Char -> b
558foldr f z (Stream next s0 _len) = loop_foldr s0
559    where
560      loop_foldr !s = case next s of
561                        Done -> z
562                        Skip s' -> loop_foldr s'
563                        Yield x s' -> f x (loop_foldr s')
564{-# INLINE [0] foldr #-}
565
566-- | foldr1 is a variant of 'foldr' that has no starting value argument,
567-- and thus must be applied to non-empty streams.
568-- Subject to array fusion.
569foldr1 :: (Char -> Char -> Char) -> Stream Char -> Char
570foldr1 f (Stream next s0 _len) = loop0_foldr1 s0
571  where
572    loop0_foldr1 !s = case next s of
573      Done       -> emptyError "foldr1"
574      Skip    s' -> loop0_foldr1  s'
575      Yield x s' -> loop_foldr1 x s'
576
577    loop_foldr1 x !s = case next s of
578      Done        -> x
579      Skip     s' -> loop_foldr1 x s'
580      Yield x' s' -> f x (loop_foldr1 x' s')
581{-# INLINE [0] foldr1 #-}
582
583intercalate :: Stream Char -> [Stream Char] -> Stream Char
584intercalate s = concat . (L.intersperse s)
585{-# INLINE [0] intercalate #-}
586
587-- ----------------------------------------------------------------------------
588-- ** Special folds
589
590-- | /O(n)/ Concatenate a list of streams. Subject to array fusion.
591concat :: [Stream Char] -> Stream Char
592concat = L.foldr append empty
593{-# INLINE [0] concat #-}
594
595-- | Map a function over a stream that results in a stream and concatenate the
596-- results.
597concatMap :: (Char -> Stream Char) -> Stream Char -> Stream Char
598concatMap f = foldr (append . f) empty
599{-# INLINE [0] concatMap #-}
600
601-- | /O(n)/ any @p @xs determines if any character in the stream
602-- @xs@ satisfies the predicate @p@.
603any :: (Char -> Bool) -> Stream Char -> Bool
604any p (Stream next0 s0 _len) = loop_any s0
605    where
606      loop_any !s = case next0 s of
607                      Done                   -> False
608                      Skip s'                -> loop_any s'
609                      Yield x s' | p x       -> True
610                                 | otherwise -> loop_any s'
611{-# INLINE [0] any #-}
612
613-- | /O(n)/ all @p @xs determines if all characters in the 'Text'
614-- @xs@ satisfy the predicate @p@.
615all :: (Char -> Bool) -> Stream Char -> Bool
616all p (Stream next0 s0 _len) = loop_all s0
617    where
618      loop_all !s = case next0 s of
619                      Done                   -> True
620                      Skip s'                -> loop_all s'
621                      Yield x s' | p x       -> loop_all s'
622                                 | otherwise -> False
623{-# INLINE [0] all #-}
624
625-- | /O(n)/ maximum returns the maximum value from a stream, which must be
626-- non-empty.
627maximum :: Stream Char -> Char
628maximum (Stream next0 s0 _len) = loop0_maximum s0
629    where
630      loop0_maximum !s   = case next0 s of
631                             Done       -> emptyError "maximum"
632                             Skip s'    -> loop0_maximum s'
633                             Yield x s' -> loop_maximum x s'
634      loop_maximum !z !s = case next0 s of
635                             Done            -> z
636                             Skip s'         -> loop_maximum z s'
637                             Yield x s'
638                                 | x > z     -> loop_maximum x s'
639                                 | otherwise -> loop_maximum z s'
640{-# INLINE [0] maximum #-}
641
642-- | /O(n)/ minimum returns the minimum value from a 'Text', which must be
643-- non-empty.
644minimum :: Stream Char -> Char
645minimum (Stream next0 s0 _len) = loop0_minimum s0
646    where
647      loop0_minimum !s   = case next0 s of
648                             Done       -> emptyError "minimum"
649                             Skip s'    -> loop0_minimum s'
650                             Yield x s' -> loop_minimum x s'
651      loop_minimum !z !s = case next0 s of
652                             Done            -> z
653                             Skip s'         -> loop_minimum z s'
654                             Yield x s'
655                                 | x < z     -> loop_minimum x s'
656                                 | otherwise -> loop_minimum z s'
657{-# INLINE [0] minimum #-}
658
659-- -----------------------------------------------------------------------------
660-- * Building streams
661
662scanl :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
663scanl f z0 (Stream next0 s0 len) = Stream next (Scan1 z0 s0) (len+1) -- HINT maybe too low
664  where
665    {-# INLINE next #-}
666    next (Scan1 z s) = Yield z (Scan2 z s)
667    next (Scan2 z s) = case next0 s of
668                         Yield x s' -> let !x' = f z x
669                                       in Yield x' (Scan2 x' s')
670                         Skip s'    -> Skip (Scan2 z s')
671                         Done       -> Done
672{-# INLINE [0] scanl #-}
673
674-- -----------------------------------------------------------------------------
675-- ** Generating and unfolding streams
676
677replicateCharI :: Integral a => a -> Char -> Stream Char
678replicateCharI !n !c
679    | n < 0     = empty
680    | otherwise = Stream next 0 (fromIntegral n) -- HINT maybe too low
681  where
682    next !i | i >= n    = Done
683            | otherwise = Yield c (i + 1)
684{-# INLINE [0] replicateCharI #-}
685
686data RI s = RI !s {-# UNPACK #-} !Int64
687
688replicateI :: Int64 -> Stream Char -> Stream Char
689replicateI n (Stream next0 s0 len) =
690    Stream next (RI s0 0) (fromIntegral (max 0 n) * len)
691  where
692    next (RI s k)
693        | k >= n = Done
694        | otherwise = case next0 s of
695                        Done       -> Skip    (RI s0 (k+1))
696                        Skip s'    -> Skip    (RI s' k)
697                        Yield x s' -> Yield x (RI s' k)
698{-# INLINE [0] replicateI #-}
699
700-- | /O(n)/, where @n@ is the length of the result. The unfoldr function
701-- is analogous to the List 'unfoldr'. unfoldr builds a stream
702-- from a seed value. The function takes the element and returns
703-- Nothing if it is done producing the stream or returns Just
704-- (a,b), in which case, a is the next Char in the string, and b is
705-- the seed value for further production.
706unfoldr :: (a -> Maybe (Char,a)) -> a -> Stream Char
707unfoldr f s0 = Stream next s0 unknownSize
708    where
709      {-# INLINE next #-}
710      next !s = case f s of
711                 Nothing      -> Done
712                 Just (w, s') -> Yield w s'
713{-# INLINE [0] unfoldr #-}
714
715-- | /O(n)/ Like 'unfoldr', 'unfoldrNI' builds a stream from a seed
716-- value. However, the length of the result is limited by the
717-- first argument to 'unfoldrNI'. This function is more efficient than
718-- 'unfoldr' when the length of the result is known.
719unfoldrNI :: Integral a => a -> (b -> Maybe (Char,b)) -> b -> Stream Char
720unfoldrNI n f s0 | n <  0    = empty
721                 | otherwise = Stream next (0 :*: s0) (maxSize $ fromIntegral (n*2))
722    where
723      {-# INLINE next #-}
724      next (z :*: s) = case f s of
725          Nothing                  -> Done
726          Just (w, s') | z >= n    -> Done
727                       | otherwise -> Yield w ((z + 1) :*: s')
728{-# INLINE unfoldrNI #-}
729
730-------------------------------------------------------------------------------
731--  * Substreams
732
733-- | /O(n)/ @'take' n@, applied to a stream, returns the prefix of the
734-- stream of length @n@, or the stream itself if @n@ is greater than the
735-- length of the stream.
736take :: Integral a => a -> Stream Char -> Stream Char
737take n0 (Stream next0 s0 len) =
738    Stream next (n0' :*: s0) (smaller len (codePointsSize $ fromIntegral n0'))
739    where
740      n0' = max n0 0
741
742      {-# INLINE next #-}
743      next (n :*: s) | n <= 0    = Done
744                     | otherwise = case next0 s of
745                                     Done -> Done
746                                     Skip s' -> Skip (n :*: s')
747                                     Yield x s' -> Yield x ((n-1) :*: s')
748{-# INLINE [0] take #-}
749
750data Drop a s = NS !s
751              | JS !a !s
752
753-- | /O(n)/ @'drop' n@, applied to a stream, returns the suffix of the
754-- stream after the first @n@ characters, or the empty stream if @n@
755-- is greater than the length of the stream.
756drop :: Integral a => a -> Stream Char -> Stream Char
757drop n0 (Stream next0 s0 len) =
758    Stream next (JS n0' s0) (len - codePointsSize (fromIntegral n0'))
759  where
760    n0' = max n0 0
761
762    {-# INLINE next #-}
763    next (JS n s)
764      | n <= 0    = Skip (NS s)
765      | otherwise = case next0 s of
766          Done       -> Done
767          Skip    s' -> Skip (JS n    s')
768          Yield _ s' -> Skip (JS (n-1) s')
769    next (NS s) = case next0 s of
770      Done       -> Done
771      Skip    s' -> Skip    (NS s')
772      Yield x s' -> Yield x (NS s')
773{-# INLINE [0] drop #-}
774
775-- | 'takeWhile', applied to a predicate @p@ and a stream, returns the
776-- longest prefix (possibly empty) of elements that satisfy @p@.
777takeWhile :: (Char -> Bool) -> Stream Char -> Stream Char
778takeWhile p (Stream next0 s0 len) = Stream next s0 (len - unknownSize)
779    where
780      {-# INLINE next #-}
781      next !s = case next0 s of
782                  Done    -> Done
783                  Skip s' -> Skip s'
784                  Yield x s' | p x       -> Yield x s'
785                             | otherwise -> Done
786{-# INLINE [0] takeWhile #-}
787
788-- | @'dropWhile' p xs@ returns the suffix remaining after @'takeWhile' p xs@.
789dropWhile :: (Char -> Bool) -> Stream Char -> Stream Char
790dropWhile p (Stream next0 s0 len) = Stream next (L s0) (len - unknownSize)
791    where
792    {-# INLINE next #-}
793    next (L s)  = case next0 s of
794      Done                   -> Done
795      Skip    s'             -> Skip    (L s')
796      Yield x s' | p x       -> Skip    (L s')
797                 | otherwise -> Yield x (R s')
798    next (R s) = case next0 s of
799      Done       -> Done
800      Skip    s' -> Skip    (R s')
801      Yield x s' -> Yield x (R s')
802{-# INLINE [0] dropWhile #-}
803
804-- | /O(n)/ The 'isPrefixOf' function takes two 'Stream's and returns
805-- 'True' iff the first is a prefix of the second.
806isPrefixOf :: (Eq a) => Stream a -> Stream a -> Bool
807isPrefixOf (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
808    where
809      loop Done      _ = True
810      loop _    Done = False
811      loop (Skip s1')     (Skip s2')     = loop (next1 s1') (next2 s2')
812      loop (Skip s1')     x2             = loop (next1 s1') x2
813      loop x1             (Skip s2')     = loop x1          (next2 s2')
814      loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
815                                           loop (next1 s1') (next2 s2')
816{-# INLINE [0] isPrefixOf #-}
817
818-- ----------------------------------------------------------------------------
819-- * Searching
820
821-------------------------------------------------------------------------------
822-- ** Searching by equality
823
824-- | /O(n)/ 'elem' is the stream membership predicate.
825elem :: Char -> Stream Char -> Bool
826elem w (Stream next s0 _len) = loop_elem s0
827    where
828      loop_elem !s = case next s of
829                       Done -> False
830                       Skip s' -> loop_elem s'
831                       Yield x s' | x == w -> True
832                                  | otherwise -> loop_elem s'
833{-# INLINE [0] elem #-}
834
835-------------------------------------------------------------------------------
836-- ** Searching with a predicate
837
838-- | /O(n)/ The 'findBy' function takes a predicate and a stream,
839-- and returns the first element in matching the predicate, or 'Nothing'
840-- if there is no such element.
841
842findBy :: (Char -> Bool) -> Stream Char -> Maybe Char
843findBy p (Stream next s0 _len) = loop_find s0
844    where
845      loop_find !s = case next s of
846                       Done -> Nothing
847                       Skip s' -> loop_find s'
848                       Yield x s' | p x -> Just x
849                                  | otherwise -> loop_find s'
850{-# INLINE [0] findBy #-}
851
852-- | /O(n)/ Stream index (subscript) operator, starting from 0.
853indexI :: Integral a => Stream Char -> a -> Char
854indexI (Stream next s0 _len) n0
855  | n0 < 0    = streamError "index" "Negative index"
856  | otherwise = loop_index n0 s0
857  where
858    loop_index !n !s = case next s of
859      Done                   -> streamError "index" "Index too large"
860      Skip    s'             -> loop_index  n    s'
861      Yield x s' | n == 0    -> x
862                 | otherwise -> loop_index (n-1) s'
863{-# INLINE [0] indexI #-}
864
865-- | /O(n)/ 'filter', applied to a predicate and a stream,
866-- returns a stream containing those characters that satisfy the
867-- predicate.
868filter :: (Char -> Bool) -> Stream Char -> Stream Char
869filter p (Stream next0 s0 len) =
870    Stream next s0 (len - unknownSize) -- HINT maybe too high
871  where
872    next !s = case next0 s of
873                Done                   -> Done
874                Skip    s'             -> Skip    s'
875                Yield x s' | p x       -> Yield x s'
876                           | otherwise -> Skip    s'
877{-# INLINE [0] filter #-}
878
879{-# RULES
880  "STREAM filter/filter fusion" forall p q s.
881  filter p (filter q s) = filter (\x -> q x && p x) s
882  #-}
883
884-- | The 'findIndexI' function takes a predicate and a stream and
885-- returns the index of the first element in the stream satisfying the
886-- predicate.
887findIndexI :: Integral a => (Char -> Bool) -> Stream Char -> Maybe a
888findIndexI p s = case findIndicesI p s of
889                  (i:_) -> Just i
890                  _     -> Nothing
891{-# INLINE [0] findIndexI #-}
892
893-- | The 'findIndicesI' function takes a predicate and a stream and
894-- returns all indices of the elements in the stream satisfying the
895-- predicate.
896findIndicesI :: Integral a => (Char -> Bool) -> Stream Char -> [a]
897findIndicesI p (Stream next s0 _len) = loop_findIndex 0 s0
898  where
899    loop_findIndex !i !s = case next s of
900      Done                   -> []
901      Skip    s'             -> loop_findIndex i     s' -- hmm. not caught by QC
902      Yield x s' | p x       -> i : loop_findIndex (i+1) s'
903                 | otherwise -> loop_findIndex (i+1) s'
904{-# INLINE [0] findIndicesI #-}
905
906-------------------------------------------------------------------------------
907-- * Zipping
908
909-- | Strict triple.
910data Zip a b m = Z1 !a !b
911               | Z2 !a !b !m
912
913-- | zipWith generalises 'zip' by zipping with the function given as
914-- the first argument, instead of a tupling function.
915zipWith :: (a -> a -> b) -> Stream a -> Stream a -> Stream b
916zipWith f (Stream next0 sa0 len1) (Stream next1 sb0 len2) =
917    Stream next (Z1 sa0 sb0) (smaller len1 len2)
918    where
919      next (Z1 sa sb) = case next0 sa of
920                          Done -> Done
921                          Skip sa' -> Skip (Z1 sa' sb)
922                          Yield a sa' -> Skip (Z2 sa' sb a)
923
924      next (Z2 sa' sb a) = case next1 sb of
925                             Done -> Done
926                             Skip sb' -> Skip (Z2 sa' sb' a)
927                             Yield b sb' -> Yield (f a b) (Z1 sa' sb')
928{-# INLINE [0] zipWith #-}
929
930-- | /O(n)/ The 'countCharI' function returns the number of times the
931-- query element appears in the given stream.
932countCharI :: Integral a => Char -> Stream Char -> a
933countCharI a (Stream next s0 _len) = loop 0 s0
934  where
935    loop !i !s = case next s of
936      Done                   -> i
937      Skip    s'             -> loop i s'
938      Yield x s' | a == x    -> loop (i+1) s'
939                 | otherwise -> loop i s'
940{-# INLINE [0] countCharI #-}
941
942streamError :: String -> String -> a
943streamError func msg = P.error $ "Data.Text.Internal.Fusion.Common." ++ func ++ ": " ++ msg
944
945emptyError :: String -> a
946emptyError func = internalError func "Empty input"
947
948internalError :: String -> a
949internalError func = streamError func "Internal error"
950