1{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-} 2-- | 3-- Module : Data.Text.Internal.Fusion.Common 4-- Copyright : (c) Bryan O'Sullivan 2009, 2012 5-- 6-- License : BSD-style 7-- Maintainer : bos@serpentine.com 8-- Stability : experimental 9-- Portability : GHC 10-- 11-- /Warning/: this is an internal module, and does not have a stable 12-- API or name. Functions in this module may not check or enforce 13-- preconditions expected by public modules. Use at your own risk! 14-- 15-- Common stream fusion functionality for text. 16 17module Data.Text.Internal.Fusion.Common 18 ( 19 -- * Creation and elimination 20 singleton 21 , streamList 22 , unstreamList 23 , streamCString# 24 25 -- * Basic interface 26 , cons 27 , snoc 28 , append 29 , head 30 , uncons 31 , last 32 , tail 33 , init 34 , null 35 , lengthI 36 , compareLengthI 37 , isSingleton 38 39 -- * Transformations 40 , map 41 , intercalate 42 , intersperse 43 44 -- ** Case conversion 45 -- $case 46 , toCaseFold 47 , toLower 48 , toTitle 49 , toUpper 50 51 -- ** Justification 52 , justifyLeftI 53 54 -- * Folds 55 , foldl 56 , foldl' 57 , foldl1 58 , foldl1' 59 , foldr 60 , foldr1 61 62 -- ** Special folds 63 , concat 64 , concatMap 65 , any 66 , all 67 , maximum 68 , minimum 69 70 -- * Construction 71 -- ** Scans 72 , scanl 73 74 -- ** Generation and unfolding 75 , replicateCharI 76 , replicateI 77 , unfoldr 78 , unfoldrNI 79 80 -- * Substrings 81 -- ** Breaking strings 82 , take 83 , drop 84 , takeWhile 85 , dropWhile 86 87 -- * Predicates 88 , isPrefixOf 89 90 -- * Searching 91 , elem 92 , filter 93 94 -- * Indexing 95 , findBy 96 , indexI 97 , findIndexI 98 , countCharI 99 100 -- * Zipping and unzipping 101 , zipWith 102 ) where 103 104import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..), 105 Ord(..), Ordering(..), String, (.), ($), (+), (-), (*), (++), 106 (&&), fromIntegral, otherwise) 107import qualified Data.List as L 108import qualified Prelude as P 109import Data.Bits (shiftL) 110import Data.Char (isLetter, isSpace) 111import Data.Int (Int64) 112import Data.Text.Internal.Fusion.Types 113import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, titleMapping, 114 upperMapping) 115import Data.Text.Internal.Fusion.Size 116import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#) 117import GHC.Types (Char(..), Int(..)) 118 119singleton :: Char -> Stream Char 120singleton c = Stream next False (codePointsSize 1) 121 where next False = Yield c True 122 next True = Done 123{-# INLINE [0] singleton #-} 124 125streamList :: [a] -> Stream a 126{-# INLINE [0] streamList #-} 127streamList s = Stream next s unknownSize 128 where next [] = Done 129 next (x:xs) = Yield x xs 130 131unstreamList :: Stream a -> [a] 132unstreamList (Stream next s0 _len) = unfold s0 133 where unfold !s = case next s of 134 Done -> [] 135 Skip s' -> unfold s' 136 Yield x s' -> x : unfold s' 137{-# INLINE [0] unstreamList #-} 138 139{-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-} 140 141-- | Stream the UTF-8-like packed encoding used by GHC to represent 142-- constant strings in generated code. 143-- 144-- This encoding uses the byte sequence "\xc0\x80" to represent NUL, 145-- and the string is NUL-terminated. 146streamCString# :: Addr# -> Stream Char 147streamCString# addr = Stream step 0 unknownSize 148 where 149 step !i 150 | b == 0 = Done 151 | b <= 0x7f = Yield (C# b#) (i+1) 152 | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1 153 in Yield c (i+2) 154 | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) + 155 (next 1 `shiftL` 6) + 156 next 2 157 in Yield c (i+3) 158 | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) + 159 (next 1 `shiftL` 12) + 160 (next 2 `shiftL` 6) + 161 next 3 162 in Yield c (i+4) 163 where b = I# (ord# b#) 164 next n = I# (ord# (at# (i+n))) - 0x80 165 !b# = at# i 166 at# (I# i#) = indexCharOffAddr# addr i# 167 chr (I# i#) = C# (chr# i#) 168{-# INLINE [0] streamCString# #-} 169 170-- ---------------------------------------------------------------------------- 171-- * Basic stream functions 172 173data C s = C0 !s 174 | C1 !s 175 176-- | /O(n)/ Adds a character to the front of a Stream Char. 177cons :: Char -> Stream Char -> Stream Char 178cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len + codePointsSize 1) 179 where 180 next (C1 s) = Yield w (C0 s) 181 next (C0 s) = case next0 s of 182 Done -> Done 183 Skip s' -> Skip (C0 s') 184 Yield x s' -> Yield x (C0 s') 185{-# INLINE [0] cons #-} 186 187data Snoc a = N 188 | J !a 189 190-- | /O(n)/ Adds a character to the end of a stream. 191snoc :: Stream Char -> Char -> Stream Char 192snoc (Stream next0 xs0 len) w = Stream next (J xs0) (len + codePointsSize 1) 193 where 194 next (J xs) = case next0 xs of 195 Done -> Yield w N 196 Skip xs' -> Skip (J xs') 197 Yield x xs' -> Yield x (J xs') 198 next N = Done 199{-# INLINE [0] snoc #-} 200 201data E l r = L !l 202 | R !r 203 204-- | /O(n)/ Appends one Stream to the other. 205append :: Stream Char -> Stream Char -> Stream Char 206append (Stream next0 s01 len1) (Stream next1 s02 len2) = 207 Stream next (L s01) (len1 + len2) 208 where 209 next (L s1) = case next0 s1 of 210 Done -> Skip (R s02) 211 Skip s1' -> Skip (L s1') 212 Yield x s1' -> Yield x (L s1') 213 next (R s2) = case next1 s2 of 214 Done -> Done 215 Skip s2' -> Skip (R s2') 216 Yield x s2' -> Yield x (R s2') 217{-# INLINE [0] append #-} 218 219-- | /O(1)/ Returns the first character of a Text, which must be non-empty. 220-- Subject to array fusion. 221head :: Stream Char -> Char 222head (Stream next s0 _len) = loop_head s0 223 where 224 loop_head !s = case next s of 225 Yield x _ -> x 226 Skip s' -> loop_head s' 227 Done -> head_empty 228{-# INLINE [0] head #-} 229 230head_empty :: a 231head_empty = streamError "head" "Empty stream" 232{-# NOINLINE head_empty #-} 233 234-- | /O(1)/ Returns the first character and remainder of a 'Stream 235-- Char', or 'Nothing' if empty. Subject to array fusion. 236uncons :: Stream Char -> Maybe (Char, Stream Char) 237uncons (Stream next s0 len) = loop_uncons s0 238 where 239 loop_uncons !s = case next s of 240 Yield x s1 -> Just (x, Stream next s1 (len - codePointsSize 1)) 241 Skip s' -> loop_uncons s' 242 Done -> Nothing 243{-# INLINE [0] uncons #-} 244 245-- | /O(n)/ Returns the last character of a 'Stream Char', which must 246-- be non-empty. 247last :: Stream Char -> Char 248last (Stream next s0 _len) = loop0_last s0 249 where 250 loop0_last !s = case next s of 251 Done -> emptyError "last" 252 Skip s' -> loop0_last s' 253 Yield x s' -> loop_last x s' 254 loop_last !x !s = case next s of 255 Done -> x 256 Skip s' -> loop_last x s' 257 Yield x' s' -> loop_last x' s' 258{-# INLINE[0] last #-} 259 260-- | /O(1)/ Returns all characters after the head of a Stream Char, which must 261-- be non-empty. 262tail :: Stream Char -> Stream Char 263tail (Stream next0 s0 len) = Stream next (C0 s0) (len - codePointsSize 1) 264 where 265 next (C0 s) = case next0 s of 266 Done -> emptyError "tail" 267 Skip s' -> Skip (C0 s') 268 Yield _ s' -> Skip (C1 s') 269 next (C1 s) = case next0 s of 270 Done -> Done 271 Skip s' -> Skip (C1 s') 272 Yield x s' -> Yield x (C1 s') 273{-# INLINE [0] tail #-} 274 275data Init s = Init0 !s 276 | Init1 {-# UNPACK #-} !Char !s 277 278-- | /O(1)/ Returns all but the last character of a Stream Char, which 279-- must be non-empty. 280init :: Stream Char -> Stream Char 281init (Stream next0 s0 len) = Stream next (Init0 s0) (len - codePointsSize 1) 282 where 283 next (Init0 s) = case next0 s of 284 Done -> emptyError "init" 285 Skip s' -> Skip (Init0 s') 286 Yield x s' -> Skip (Init1 x s') 287 next (Init1 x s) = case next0 s of 288 Done -> Done 289 Skip s' -> Skip (Init1 x s') 290 Yield x' s' -> Yield x (Init1 x' s') 291{-# INLINE [0] init #-} 292 293-- | /O(1)/ Tests whether a Stream Char is empty or not. 294null :: Stream Char -> Bool 295null (Stream next s0 _len) = loop_null s0 296 where 297 loop_null !s = case next s of 298 Done -> True 299 Yield _ _ -> False 300 Skip s' -> loop_null s' 301{-# INLINE[0] null #-} 302 303-- | /O(n)/ Returns the number of characters in a string. 304lengthI :: Integral a => Stream Char -> a 305lengthI (Stream next s0 _len) = loop_length 0 s0 306 where 307 loop_length !z s = case next s of 308 Done -> z 309 Skip s' -> loop_length z s' 310 Yield _ s' -> loop_length (z + 1) s' 311{-# INLINE[0] lengthI #-} 312 313-- | /O(n)/ Compares the count of characters in a string to a number. 314-- Subject to fusion. 315-- 316-- This function gives the same answer as comparing against the result 317-- of 'lengthI', but can short circuit if the count of characters is 318-- greater than the number or if the stream can't possibly be as long 319-- as the number supplied, and hence be more efficient. 320compareLengthI :: Integral a => Stream Char -> a -> Ordering 321compareLengthI (Stream next s0 len) n 322 -- Note that @len@ tracks code units whereas we want to compare the length 323 -- in code points. Specifically, a stream with hint @len@ may consist of 324 -- anywhere from @len/2@ to @len@ code points. 325 | Just r <- compareSize len n' = r 326 | otherwise = loop_cmp 0 s0 327 where 328 n' = codePointsSize $ fromIntegral n 329 loop_cmp !z s = case next s of 330 Done -> compare z n 331 Skip s' -> loop_cmp z s' 332 Yield _ s' | z > n -> GT 333 | otherwise -> loop_cmp (z + 1) s' 334{-# INLINE[0] compareLengthI #-} 335 336-- | /O(n)/ Indicate whether a string contains exactly one element. 337isSingleton :: Stream Char -> Bool 338isSingleton (Stream next s0 _len) = loop 0 s0 339 where 340 loop !z s = case next s of 341 Done -> z == (1::Int) 342 Skip s' -> loop z s' 343 Yield _ s' 344 | z >= 1 -> False 345 | otherwise -> loop (z+1) s' 346{-# INLINE[0] isSingleton #-} 347 348-- ---------------------------------------------------------------------------- 349-- * Stream transformations 350 351-- | /O(n)/ 'map' @f @xs is the Stream Char obtained by applying @f@ 352-- to each element of @xs@. 353map :: (Char -> Char) -> Stream Char -> Stream Char 354map f (Stream next0 s0 len) = Stream next s0 len 355 where 356 next !s = case next0 s of 357 Done -> Done 358 Skip s' -> Skip s' 359 Yield x s' -> Yield (f x) s' 360{-# INLINE [0] map #-} 361 362{-# 363 RULES "STREAM map/map fusion" forall f g s. 364 map f (map g s) = map (\x -> f (g x)) s 365 #-} 366 367data I s = I1 !s 368 | I2 !s {-# UNPACK #-} !Char 369 | I3 !s 370 371-- | /O(n)/ Take a character and place it between each of the 372-- characters of a 'Stream Char'. 373intersperse :: Char -> Stream Char -> Stream Char 374intersperse c (Stream next0 s0 len) = Stream next (I1 s0) (len + unknownSize) 375 where 376 next (I1 s) = case next0 s of 377 Done -> Done 378 Skip s' -> Skip (I1 s') 379 Yield x s' -> Skip (I2 s' x) 380 next (I2 s x) = Yield x (I3 s) 381 next (I3 s) = case next0 s of 382 Done -> Done 383 Skip s' -> Skip (I3 s') 384 Yield x s' -> Yield c (I2 s' x) 385{-# INLINE [0] intersperse #-} 386 387-- ---------------------------------------------------------------------------- 388-- ** Case conversions (folds) 389 390-- $case 391-- 392-- With Unicode text, it is incorrect to use combinators like @map 393-- toUpper@ to case convert each character of a string individually. 394-- Instead, use the whole-string case conversion functions from this 395-- module. For correctness in different writing systems, these 396-- functions may map one input character to two or three output 397-- characters. 398 399-- | Map a 'Stream' through the given case-mapping function. 400caseConvert :: (forall s. Char -> s -> Step (CC s) Char) 401 -> Stream Char -> Stream Char 402caseConvert remap (Stream next0 s0 len) = 403 Stream next (CC s0 '\0' '\0') (len `unionSize` (3*len)) 404 where 405 next (CC s '\0' _) = 406 case next0 s of 407 Done -> Done 408 Skip s' -> Skip (CC s' '\0' '\0') 409 Yield c s' -> remap c s' 410 next (CC s a b) = Yield a (CC s b '\0') 411 412-- | /O(n)/ Convert a string to folded case. This function is mainly 413-- useful for performing caseless (or case insensitive) string 414-- comparisons. 415-- 416-- A string @x@ is a caseless match for a string @y@ if and only if: 417-- 418-- @toCaseFold x == toCaseFold y@ 419-- 420-- The result string may be longer than the input string, and may 421-- differ from applying 'toLower' to the input string. For instance, 422-- the Armenian small ligature men now (U+FB13) is case folded to the 423-- bigram men now (U+0574 U+0576), while the micro sign (U+00B5) is 424-- case folded to the Greek small letter letter mu (U+03BC) instead of 425-- itself. 426toCaseFold :: Stream Char -> Stream Char 427toCaseFold = caseConvert foldMapping 428{-# INLINE [0] toCaseFold #-} 429 430-- | /O(n)/ Convert a string to upper case, using simple case 431-- conversion. The result string may be longer than the input string. 432-- For instance, the German eszett (U+00DF) maps to the two-letter 433-- sequence SS. 434toUpper :: Stream Char -> Stream Char 435toUpper = caseConvert upperMapping 436{-# INLINE [0] toUpper #-} 437 438-- | /O(n)/ Convert a string to lower case, using simple case 439-- conversion. The result string may be longer than the input string. 440-- For instance, the Latin capital letter I with dot above (U+0130) 441-- maps to the sequence Latin small letter i (U+0069) followed by 442-- combining dot above (U+0307). 443toLower :: Stream Char -> Stream Char 444toLower = caseConvert lowerMapping 445{-# INLINE [0] toLower #-} 446 447-- | /O(n)/ Convert a string to title case, using simple case 448-- conversion. 449-- 450-- The first letter of the input is converted to title case, as is 451-- every subsequent letter that immediately follows a non-letter. 452-- Every letter that immediately follows another letter is converted 453-- to lower case. 454-- 455-- The result string may be longer than the input string. For example, 456-- the Latin small ligature fl (U+FB02) is converted to the 457-- sequence Latin capital letter F (U+0046) followed by Latin small 458-- letter l (U+006C). 459-- 460-- /Note/: this function does not take language or culture specific 461-- rules into account. For instance, in English, different style 462-- guides disagree on whether the book name \"The Hill of the Red 463-- Fox\" is correctly title cased—but this function will 464-- capitalize /every/ word. 465toTitle :: Stream Char -> Stream Char 466toTitle (Stream next0 s0 len) = Stream next (CC (False :*: s0) '\0' '\0') (len + unknownSize) 467 where 468 next (CC (letter :*: s) '\0' _) = 469 case next0 s of 470 Done -> Done 471 Skip s' -> Skip (CC (letter :*: s') '\0' '\0') 472 Yield c s' 473 | nonSpace -> if letter 474 then lowerMapping c (nonSpace :*: s') 475 else titleMapping c (letter' :*: s') 476 | otherwise -> Yield c (CC (letter' :*: s') '\0' '\0') 477 where nonSpace = P.not (isSpace c) 478 letter' = isLetter c 479 next (CC s a b) = Yield a (CC s b '\0') 480{-# INLINE [0] toTitle #-} 481 482data Justify i s = Just1 !i !s 483 | Just2 !i !s 484 485justifyLeftI :: Integral a => a -> Char -> Stream Char -> Stream Char 486justifyLeftI k c (Stream next0 s0 len) = 487 Stream next (Just1 0 s0) (larger (fromIntegral k * charSize c + len) len) 488 where 489 next (Just1 n s) = 490 case next0 s of 491 Done -> next (Just2 n s) 492 Skip s' -> Skip (Just1 n s') 493 Yield x s' -> Yield x (Just1 (n+1) s') 494 next (Just2 n s) 495 | n < k = Yield c (Just2 (n+1) s) 496 | otherwise = Done 497 {-# INLINE next #-} 498{-# INLINE [0] justifyLeftI #-} 499 500-- ---------------------------------------------------------------------------- 501-- * Reducing Streams (folds) 502 503-- | foldl, applied to a binary operator, a starting value (typically the 504-- left-identity of the operator), and a Stream, reduces the Stream using the 505-- binary operator, from left to right. 506foldl :: (b -> Char -> b) -> b -> Stream Char -> b 507foldl f z0 (Stream next s0 _len) = loop_foldl z0 s0 508 where 509 loop_foldl z !s = case next s of 510 Done -> z 511 Skip s' -> loop_foldl z s' 512 Yield x s' -> loop_foldl (f z x) s' 513{-# INLINE [0] foldl #-} 514 515-- | A strict version of foldl. 516foldl' :: (b -> Char -> b) -> b -> Stream Char -> b 517foldl' f z0 (Stream next s0 _len) = loop_foldl' z0 s0 518 where 519 loop_foldl' !z !s = case next s of 520 Done -> z 521 Skip s' -> loop_foldl' z s' 522 Yield x s' -> loop_foldl' (f z x) s' 523{-# INLINE [0] foldl' #-} 524 525-- | foldl1 is a variant of foldl that has no starting value argument, 526-- and thus must be applied to non-empty Streams. 527foldl1 :: (Char -> Char -> Char) -> Stream Char -> Char 528foldl1 f (Stream next s0 _len) = loop0_foldl1 s0 529 where 530 loop0_foldl1 !s = case next s of 531 Skip s' -> loop0_foldl1 s' 532 Yield x s' -> loop_foldl1 x s' 533 Done -> emptyError "foldl1" 534 loop_foldl1 z !s = case next s of 535 Done -> z 536 Skip s' -> loop_foldl1 z s' 537 Yield x s' -> loop_foldl1 (f z x) s' 538{-# INLINE [0] foldl1 #-} 539 540-- | A strict version of foldl1. 541foldl1' :: (Char -> Char -> Char) -> Stream Char -> Char 542foldl1' f (Stream next s0 _len) = loop0_foldl1' s0 543 where 544 loop0_foldl1' !s = case next s of 545 Skip s' -> loop0_foldl1' s' 546 Yield x s' -> loop_foldl1' x s' 547 Done -> emptyError "foldl1" 548 loop_foldl1' !z !s = case next s of 549 Done -> z 550 Skip s' -> loop_foldl1' z s' 551 Yield x s' -> loop_foldl1' (f z x) s' 552{-# INLINE [0] foldl1' #-} 553 554-- | 'foldr', applied to a binary operator, a starting value (typically the 555-- right-identity of the operator), and a stream, reduces the stream using the 556-- binary operator, from right to left. 557foldr :: (Char -> b -> b) -> b -> Stream Char -> b 558foldr f z (Stream next s0 _len) = loop_foldr s0 559 where 560 loop_foldr !s = case next s of 561 Done -> z 562 Skip s' -> loop_foldr s' 563 Yield x s' -> f x (loop_foldr s') 564{-# INLINE [0] foldr #-} 565 566-- | foldr1 is a variant of 'foldr' that has no starting value argument, 567-- and thus must be applied to non-empty streams. 568-- Subject to array fusion. 569foldr1 :: (Char -> Char -> Char) -> Stream Char -> Char 570foldr1 f (Stream next s0 _len) = loop0_foldr1 s0 571 where 572 loop0_foldr1 !s = case next s of 573 Done -> emptyError "foldr1" 574 Skip s' -> loop0_foldr1 s' 575 Yield x s' -> loop_foldr1 x s' 576 577 loop_foldr1 x !s = case next s of 578 Done -> x 579 Skip s' -> loop_foldr1 x s' 580 Yield x' s' -> f x (loop_foldr1 x' s') 581{-# INLINE [0] foldr1 #-} 582 583intercalate :: Stream Char -> [Stream Char] -> Stream Char 584intercalate s = concat . (L.intersperse s) 585{-# INLINE [0] intercalate #-} 586 587-- ---------------------------------------------------------------------------- 588-- ** Special folds 589 590-- | /O(n)/ Concatenate a list of streams. Subject to array fusion. 591concat :: [Stream Char] -> Stream Char 592concat = L.foldr append empty 593{-# INLINE [0] concat #-} 594 595-- | Map a function over a stream that results in a stream and concatenate the 596-- results. 597concatMap :: (Char -> Stream Char) -> Stream Char -> Stream Char 598concatMap f = foldr (append . f) empty 599{-# INLINE [0] concatMap #-} 600 601-- | /O(n)/ any @p @xs determines if any character in the stream 602-- @xs@ satisfies the predicate @p@. 603any :: (Char -> Bool) -> Stream Char -> Bool 604any p (Stream next0 s0 _len) = loop_any s0 605 where 606 loop_any !s = case next0 s of 607 Done -> False 608 Skip s' -> loop_any s' 609 Yield x s' | p x -> True 610 | otherwise -> loop_any s' 611{-# INLINE [0] any #-} 612 613-- | /O(n)/ all @p @xs determines if all characters in the 'Text' 614-- @xs@ satisfy the predicate @p@. 615all :: (Char -> Bool) -> Stream Char -> Bool 616all p (Stream next0 s0 _len) = loop_all s0 617 where 618 loop_all !s = case next0 s of 619 Done -> True 620 Skip s' -> loop_all s' 621 Yield x s' | p x -> loop_all s' 622 | otherwise -> False 623{-# INLINE [0] all #-} 624 625-- | /O(n)/ maximum returns the maximum value from a stream, which must be 626-- non-empty. 627maximum :: Stream Char -> Char 628maximum (Stream next0 s0 _len) = loop0_maximum s0 629 where 630 loop0_maximum !s = case next0 s of 631 Done -> emptyError "maximum" 632 Skip s' -> loop0_maximum s' 633 Yield x s' -> loop_maximum x s' 634 loop_maximum !z !s = case next0 s of 635 Done -> z 636 Skip s' -> loop_maximum z s' 637 Yield x s' 638 | x > z -> loop_maximum x s' 639 | otherwise -> loop_maximum z s' 640{-# INLINE [0] maximum #-} 641 642-- | /O(n)/ minimum returns the minimum value from a 'Text', which must be 643-- non-empty. 644minimum :: Stream Char -> Char 645minimum (Stream next0 s0 _len) = loop0_minimum s0 646 where 647 loop0_minimum !s = case next0 s of 648 Done -> emptyError "minimum" 649 Skip s' -> loop0_minimum s' 650 Yield x s' -> loop_minimum x s' 651 loop_minimum !z !s = case next0 s of 652 Done -> z 653 Skip s' -> loop_minimum z s' 654 Yield x s' 655 | x < z -> loop_minimum x s' 656 | otherwise -> loop_minimum z s' 657{-# INLINE [0] minimum #-} 658 659-- ----------------------------------------------------------------------------- 660-- * Building streams 661 662scanl :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char 663scanl f z0 (Stream next0 s0 len) = Stream next (Scan1 z0 s0) (len+1) -- HINT maybe too low 664 where 665 {-# INLINE next #-} 666 next (Scan1 z s) = Yield z (Scan2 z s) 667 next (Scan2 z s) = case next0 s of 668 Yield x s' -> let !x' = f z x 669 in Yield x' (Scan2 x' s') 670 Skip s' -> Skip (Scan2 z s') 671 Done -> Done 672{-# INLINE [0] scanl #-} 673 674-- ----------------------------------------------------------------------------- 675-- ** Generating and unfolding streams 676 677replicateCharI :: Integral a => a -> Char -> Stream Char 678replicateCharI !n !c 679 | n < 0 = empty 680 | otherwise = Stream next 0 (fromIntegral n) -- HINT maybe too low 681 where 682 next !i | i >= n = Done 683 | otherwise = Yield c (i + 1) 684{-# INLINE [0] replicateCharI #-} 685 686data RI s = RI !s {-# UNPACK #-} !Int64 687 688replicateI :: Int64 -> Stream Char -> Stream Char 689replicateI n (Stream next0 s0 len) = 690 Stream next (RI s0 0) (fromIntegral (max 0 n) * len) 691 where 692 next (RI s k) 693 | k >= n = Done 694 | otherwise = case next0 s of 695 Done -> Skip (RI s0 (k+1)) 696 Skip s' -> Skip (RI s' k) 697 Yield x s' -> Yield x (RI s' k) 698{-# INLINE [0] replicateI #-} 699 700-- | /O(n)/, where @n@ is the length of the result. The unfoldr function 701-- is analogous to the List 'unfoldr'. unfoldr builds a stream 702-- from a seed value. The function takes the element and returns 703-- Nothing if it is done producing the stream or returns Just 704-- (a,b), in which case, a is the next Char in the string, and b is 705-- the seed value for further production. 706unfoldr :: (a -> Maybe (Char,a)) -> a -> Stream Char 707unfoldr f s0 = Stream next s0 unknownSize 708 where 709 {-# INLINE next #-} 710 next !s = case f s of 711 Nothing -> Done 712 Just (w, s') -> Yield w s' 713{-# INLINE [0] unfoldr #-} 714 715-- | /O(n)/ Like 'unfoldr', 'unfoldrNI' builds a stream from a seed 716-- value. However, the length of the result is limited by the 717-- first argument to 'unfoldrNI'. This function is more efficient than 718-- 'unfoldr' when the length of the result is known. 719unfoldrNI :: Integral a => a -> (b -> Maybe (Char,b)) -> b -> Stream Char 720unfoldrNI n f s0 | n < 0 = empty 721 | otherwise = Stream next (0 :*: s0) (maxSize $ fromIntegral (n*2)) 722 where 723 {-# INLINE next #-} 724 next (z :*: s) = case f s of 725 Nothing -> Done 726 Just (w, s') | z >= n -> Done 727 | otherwise -> Yield w ((z + 1) :*: s') 728{-# INLINE unfoldrNI #-} 729 730------------------------------------------------------------------------------- 731-- * Substreams 732 733-- | /O(n)/ @'take' n@, applied to a stream, returns the prefix of the 734-- stream of length @n@, or the stream itself if @n@ is greater than the 735-- length of the stream. 736take :: Integral a => a -> Stream Char -> Stream Char 737take n0 (Stream next0 s0 len) = 738 Stream next (n0' :*: s0) (smaller len (codePointsSize $ fromIntegral n0')) 739 where 740 n0' = max n0 0 741 742 {-# INLINE next #-} 743 next (n :*: s) | n <= 0 = Done 744 | otherwise = case next0 s of 745 Done -> Done 746 Skip s' -> Skip (n :*: s') 747 Yield x s' -> Yield x ((n-1) :*: s') 748{-# INLINE [0] take #-} 749 750data Drop a s = NS !s 751 | JS !a !s 752 753-- | /O(n)/ @'drop' n@, applied to a stream, returns the suffix of the 754-- stream after the first @n@ characters, or the empty stream if @n@ 755-- is greater than the length of the stream. 756drop :: Integral a => a -> Stream Char -> Stream Char 757drop n0 (Stream next0 s0 len) = 758 Stream next (JS n0' s0) (len - codePointsSize (fromIntegral n0')) 759 where 760 n0' = max n0 0 761 762 {-# INLINE next #-} 763 next (JS n s) 764 | n <= 0 = Skip (NS s) 765 | otherwise = case next0 s of 766 Done -> Done 767 Skip s' -> Skip (JS n s') 768 Yield _ s' -> Skip (JS (n-1) s') 769 next (NS s) = case next0 s of 770 Done -> Done 771 Skip s' -> Skip (NS s') 772 Yield x s' -> Yield x (NS s') 773{-# INLINE [0] drop #-} 774 775-- | 'takeWhile', applied to a predicate @p@ and a stream, returns the 776-- longest prefix (possibly empty) of elements that satisfy @p@. 777takeWhile :: (Char -> Bool) -> Stream Char -> Stream Char 778takeWhile p (Stream next0 s0 len) = Stream next s0 (len - unknownSize) 779 where 780 {-# INLINE next #-} 781 next !s = case next0 s of 782 Done -> Done 783 Skip s' -> Skip s' 784 Yield x s' | p x -> Yield x s' 785 | otherwise -> Done 786{-# INLINE [0] takeWhile #-} 787 788-- | @'dropWhile' p xs@ returns the suffix remaining after @'takeWhile' p xs@. 789dropWhile :: (Char -> Bool) -> Stream Char -> Stream Char 790dropWhile p (Stream next0 s0 len) = Stream next (L s0) (len - unknownSize) 791 where 792 {-# INLINE next #-} 793 next (L s) = case next0 s of 794 Done -> Done 795 Skip s' -> Skip (L s') 796 Yield x s' | p x -> Skip (L s') 797 | otherwise -> Yield x (R s') 798 next (R s) = case next0 s of 799 Done -> Done 800 Skip s' -> Skip (R s') 801 Yield x s' -> Yield x (R s') 802{-# INLINE [0] dropWhile #-} 803 804-- | /O(n)/ The 'isPrefixOf' function takes two 'Stream's and returns 805-- 'True' iff the first is a prefix of the second. 806isPrefixOf :: (Eq a) => Stream a -> Stream a -> Bool 807isPrefixOf (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2) 808 where 809 loop Done _ = True 810 loop _ Done = False 811 loop (Skip s1') (Skip s2') = loop (next1 s1') (next2 s2') 812 loop (Skip s1') x2 = loop (next1 s1') x2 813 loop x1 (Skip s2') = loop x1 (next2 s2') 814 loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 && 815 loop (next1 s1') (next2 s2') 816{-# INLINE [0] isPrefixOf #-} 817 818-- ---------------------------------------------------------------------------- 819-- * Searching 820 821------------------------------------------------------------------------------- 822-- ** Searching by equality 823 824-- | /O(n)/ 'elem' is the stream membership predicate. 825elem :: Char -> Stream Char -> Bool 826elem w (Stream next s0 _len) = loop_elem s0 827 where 828 loop_elem !s = case next s of 829 Done -> False 830 Skip s' -> loop_elem s' 831 Yield x s' | x == w -> True 832 | otherwise -> loop_elem s' 833{-# INLINE [0] elem #-} 834 835------------------------------------------------------------------------------- 836-- ** Searching with a predicate 837 838-- | /O(n)/ The 'findBy' function takes a predicate and a stream, 839-- and returns the first element in matching the predicate, or 'Nothing' 840-- if there is no such element. 841 842findBy :: (Char -> Bool) -> Stream Char -> Maybe Char 843findBy p (Stream next s0 _len) = loop_find s0 844 where 845 loop_find !s = case next s of 846 Done -> Nothing 847 Skip s' -> loop_find s' 848 Yield x s' | p x -> Just x 849 | otherwise -> loop_find s' 850{-# INLINE [0] findBy #-} 851 852-- | /O(n)/ Stream index (subscript) operator, starting from 0. 853indexI :: Integral a => Stream Char -> a -> Char 854indexI (Stream next s0 _len) n0 855 | n0 < 0 = streamError "index" "Negative index" 856 | otherwise = loop_index n0 s0 857 where 858 loop_index !n !s = case next s of 859 Done -> streamError "index" "Index too large" 860 Skip s' -> loop_index n s' 861 Yield x s' | n == 0 -> x 862 | otherwise -> loop_index (n-1) s' 863{-# INLINE [0] indexI #-} 864 865-- | /O(n)/ 'filter', applied to a predicate and a stream, 866-- returns a stream containing those characters that satisfy the 867-- predicate. 868filter :: (Char -> Bool) -> Stream Char -> Stream Char 869filter p (Stream next0 s0 len) = 870 Stream next s0 (len - unknownSize) -- HINT maybe too high 871 where 872 next !s = case next0 s of 873 Done -> Done 874 Skip s' -> Skip s' 875 Yield x s' | p x -> Yield x s' 876 | otherwise -> Skip s' 877{-# INLINE [0] filter #-} 878 879{-# RULES 880 "STREAM filter/filter fusion" forall p q s. 881 filter p (filter q s) = filter (\x -> q x && p x) s 882 #-} 883 884-- | The 'findIndexI' function takes a predicate and a stream and 885-- returns the index of the first element in the stream satisfying the 886-- predicate. 887findIndexI :: Integral a => (Char -> Bool) -> Stream Char -> Maybe a 888findIndexI p s = case findIndicesI p s of 889 (i:_) -> Just i 890 _ -> Nothing 891{-# INLINE [0] findIndexI #-} 892 893-- | The 'findIndicesI' function takes a predicate and a stream and 894-- returns all indices of the elements in the stream satisfying the 895-- predicate. 896findIndicesI :: Integral a => (Char -> Bool) -> Stream Char -> [a] 897findIndicesI p (Stream next s0 _len) = loop_findIndex 0 s0 898 where 899 loop_findIndex !i !s = case next s of 900 Done -> [] 901 Skip s' -> loop_findIndex i s' -- hmm. not caught by QC 902 Yield x s' | p x -> i : loop_findIndex (i+1) s' 903 | otherwise -> loop_findIndex (i+1) s' 904{-# INLINE [0] findIndicesI #-} 905 906------------------------------------------------------------------------------- 907-- * Zipping 908 909-- | Strict triple. 910data Zip a b m = Z1 !a !b 911 | Z2 !a !b !m 912 913-- | zipWith generalises 'zip' by zipping with the function given as 914-- the first argument, instead of a tupling function. 915zipWith :: (a -> a -> b) -> Stream a -> Stream a -> Stream b 916zipWith f (Stream next0 sa0 len1) (Stream next1 sb0 len2) = 917 Stream next (Z1 sa0 sb0) (smaller len1 len2) 918 where 919 next (Z1 sa sb) = case next0 sa of 920 Done -> Done 921 Skip sa' -> Skip (Z1 sa' sb) 922 Yield a sa' -> Skip (Z2 sa' sb a) 923 924 next (Z2 sa' sb a) = case next1 sb of 925 Done -> Done 926 Skip sb' -> Skip (Z2 sa' sb' a) 927 Yield b sb' -> Yield (f a b) (Z1 sa' sb') 928{-# INLINE [0] zipWith #-} 929 930-- | /O(n)/ The 'countCharI' function returns the number of times the 931-- query element appears in the given stream. 932countCharI :: Integral a => Char -> Stream Char -> a 933countCharI a (Stream next s0 _len) = loop 0 s0 934 where 935 loop !i !s = case next s of 936 Done -> i 937 Skip s' -> loop i s' 938 Yield x s' | a == x -> loop (i+1) s' 939 | otherwise -> loop i s' 940{-# INLINE [0] countCharI #-} 941 942streamError :: String -> String -> a 943streamError func msg = P.error $ "Data.Text.Internal.Fusion.Common." ++ func ++ ": " ++ msg 944 945emptyError :: String -> a 946emptyError func = internalError func "Empty input" 947 948internalError :: String -> a 949internalError func = streamError func "Internal error" 950