1{-# LANGUAGE CPP #-} 2{-# LANGUAGE MagicHash, UnboxedTuples, 3 NamedFieldPuns, BangPatterns #-} 4{-# OPTIONS_HADDOCK prune #-} 5#if __GLASGOW_HASKELL__ >= 701 6{-# LANGUAGE Trustworthy #-} 7#endif 8 9-- | 10-- Module : Data.ByteString 11-- Copyright : (c) The University of Glasgow 2001, 12-- (c) David Roundy 2003-2005, 13-- (c) Simon Marlow 2005, 14-- (c) Bjorn Bringert 2006, 15-- (c) Don Stewart 2005-2008, 16-- (c) Duncan Coutts 2006-2013 17-- License : BSD-style 18-- 19-- Maintainer : dons00@gmail.com, duncan@community.haskell.org 20-- Stability : stable 21-- Portability : portable 22-- 23-- A time- and space-efficient implementation of byte vectors using 24-- packed Word8 arrays, suitable for high performance use, both in terms 25-- of large data quantities and high speed requirements. Byte vectors 26-- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr', 27-- and can be passed between C and Haskell with little effort. 28-- 29-- The recomended way to assemble ByteStrings from smaller parts 30-- is to use the builder monoid from "Data.ByteString.Builder". 31-- 32-- This module is intended to be imported @qualified@, to avoid name 33-- clashes with "Prelude" functions. eg. 34-- 35-- > import qualified Data.ByteString as B 36-- 37-- Original GHC implementation by Bryan O\'Sullivan. 38-- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. 39-- Rewritten to support slices and use 'ForeignPtr' by David Roundy. 40-- Rewritten again and extended by Don Stewart and Duncan Coutts. 41-- 42 43module Data.ByteString ( 44 45 -- * The @ByteString@ type 46 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid 47 48 -- * Introducing and eliminating 'ByteString's 49 empty, -- :: ByteString 50 singleton, -- :: Word8 -> ByteString 51 pack, -- :: [Word8] -> ByteString 52 unpack, -- :: ByteString -> [Word8] 53 54 -- * Basic interface 55 cons, -- :: Word8 -> ByteString -> ByteString 56 snoc, -- :: ByteString -> Word8 -> ByteString 57 append, -- :: ByteString -> ByteString -> ByteString 58 head, -- :: ByteString -> Word8 59 uncons, -- :: ByteString -> Maybe (Word8, ByteString) 60 unsnoc, -- :: ByteString -> Maybe (ByteString, Word8) 61 last, -- :: ByteString -> Word8 62 tail, -- :: ByteString -> ByteString 63 init, -- :: ByteString -> ByteString 64 null, -- :: ByteString -> Bool 65 length, -- :: ByteString -> Int 66 67 -- * Transforming ByteStrings 68 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString 69 reverse, -- :: ByteString -> ByteString 70 intersperse, -- :: Word8 -> ByteString -> ByteString 71 intercalate, -- :: ByteString -> [ByteString] -> ByteString 72 transpose, -- :: [ByteString] -> [ByteString] 73 74 -- * Reducing 'ByteString's (folds) 75 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a 76 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a 77 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 78 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 79 80 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a 81 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a 82 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 83 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 84 85 -- ** Special folds 86 concat, -- :: [ByteString] -> ByteString 87 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString 88 any, -- :: (Word8 -> Bool) -> ByteString -> Bool 89 all, -- :: (Word8 -> Bool) -> ByteString -> Bool 90 maximum, -- :: ByteString -> Word8 91 minimum, -- :: ByteString -> Word8 92 93 -- * Building ByteStrings 94 -- ** Scans 95 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 96 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 97 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 98 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 99 100 -- ** Accumulating maps 101 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 102 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 103 104 -- ** Generating and unfolding ByteStrings 105 replicate, -- :: Int -> Word8 -> ByteString 106 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString 107 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) 108 109 -- * Substrings 110 111 -- ** Breaking strings 112 take, -- :: Int -> ByteString -> ByteString 113 drop, -- :: Int -> ByteString -> ByteString 114 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) 115 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 116 takeWhileEnd, -- :: (Word8 -> Bool) -> ByteString -> ByteString 117 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 118 dropWhileEnd, -- :: (Word8 -> Bool) -> ByteString -> ByteString 119 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 120 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 121 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 122 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 123 group, -- :: ByteString -> [ByteString] 124 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 125 inits, -- :: ByteString -> [ByteString] 126 tails, -- :: ByteString -> [ByteString] 127 stripPrefix, -- :: ByteString -> ByteString -> Maybe ByteString 128 stripSuffix, -- :: ByteString -> ByteString -> Maybe ByteString 129 130 -- ** Breaking into many substrings 131 split, -- :: Word8 -> ByteString -> [ByteString] 132 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] 133 134 -- * Predicates 135 isPrefixOf, -- :: ByteString -> ByteString -> Bool 136 isSuffixOf, -- :: ByteString -> ByteString -> Bool 137 isInfixOf, -- :: ByteString -> ByteString -> Bool 138 139 -- ** Search for arbitrary substrings 140 breakSubstring, -- :: ByteString -> ByteString -> (ByteString,ByteString) 141 findSubstring, -- :: ByteString -> ByteString -> Maybe Int 142 findSubstrings, -- :: ByteString -> ByteString -> [Int] 143 144 -- * Searching ByteStrings 145 146 -- ** Searching by equality 147 elem, -- :: Word8 -> ByteString -> Bool 148 notElem, -- :: Word8 -> ByteString -> Bool 149 150 -- ** Searching with a predicate 151 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 152 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString 153 partition, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 154 155 -- * Indexing ByteStrings 156 index, -- :: ByteString -> Int -> Word8 157 elemIndex, -- :: Word8 -> ByteString -> Maybe Int 158 elemIndices, -- :: Word8 -> ByteString -> [Int] 159 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int 160 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int 161 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] 162 findIndexEnd, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int 163 count, -- :: Word8 -> ByteString -> Int 164 165 -- * Zipping and unzipping ByteStrings 166 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] 167 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] 168 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) 169 170 -- * Ordered ByteStrings 171 sort, -- :: ByteString -> ByteString 172 173 -- * Low level conversions 174 -- ** Copying ByteStrings 175 copy, -- :: ByteString -> ByteString 176 177 -- ** Packing 'CString's and pointers 178 packCString, -- :: CString -> IO ByteString 179 packCStringLen, -- :: CStringLen -> IO ByteString 180 181 -- ** Using ByteStrings as 'CString's 182 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a 183 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a 184 185 -- * I\/O with 'ByteString's 186 187 -- ** Standard input and output 188 getLine, -- :: IO ByteString 189 getContents, -- :: IO ByteString 190 putStr, -- :: ByteString -> IO () 191 putStrLn, -- :: ByteString -> IO () 192 interact, -- :: (ByteString -> ByteString) -> IO () 193 194 -- ** Files 195 readFile, -- :: FilePath -> IO ByteString 196 writeFile, -- :: FilePath -> ByteString -> IO () 197 appendFile, -- :: FilePath -> ByteString -> IO () 198 199 -- ** I\/O with Handles 200 hGetLine, -- :: Handle -> IO ByteString 201 hGetContents, -- :: Handle -> IO ByteString 202 hGet, -- :: Handle -> Int -> IO ByteString 203 hGetSome, -- :: Handle -> Int -> IO ByteString 204 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString 205 hPut, -- :: Handle -> ByteString -> IO () 206 hPutNonBlocking, -- :: Handle -> ByteString -> IO ByteString 207 hPutStr, -- :: Handle -> ByteString -> IO () 208 hPutStrLn, -- :: Handle -> ByteString -> IO () 209 210 breakByte 211 212 ) where 213 214import qualified Prelude as P 215import Prelude hiding (reverse,head,tail,last,init,null 216 ,length,map,lines,foldl,foldr,unlines 217 ,concat,any,take,drop,splitAt,takeWhile 218 ,dropWhile,span,break,elem,filter,maximum 219 ,minimum,all,concatMap,foldl1,foldr1 220 ,scanl,scanl1,scanr,scanr1 221 ,readFile,writeFile,appendFile,replicate 222 ,getContents,getLine,putStr,putStrLn,interact 223 ,zip,zipWith,unzip,notElem 224#if !MIN_VERSION_base(4,6,0) 225 ,catch 226#endif 227 ) 228 229#if MIN_VERSION_base(4,7,0) 230import Data.Bits (finiteBitSize, shiftL, (.|.), (.&.)) 231#else 232import Data.Bits (bitSize, shiftL, (.|.), (.&.)) 233#endif 234 235import Data.ByteString.Internal 236import Data.ByteString.Unsafe 237 238import qualified Data.List as List 239 240import Data.Word (Word8) 241import Data.Maybe (isJust) 242 243import Control.Exception (IOException, catch, finally, assert, throwIO) 244import Control.Monad (when) 245 246import Foreign.C.String (CString, CStringLen) 247import Foreign.C.Types (CSize) 248import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, touchForeignPtr) 249#if MIN_VERSION_base(4,5,0) 250import Foreign.ForeignPtr.Unsafe(unsafeForeignPtrToPtr) 251#else 252import Foreign.ForeignPtr (unsafeForeignPtrToPtr) 253#endif 254import Foreign.Marshal.Alloc (allocaBytes) 255import Foreign.Marshal.Array (allocaArray) 256import Foreign.Ptr 257import Foreign.Storable (Storable(..)) 258 259-- hGetBuf and hPutBuf not available in yhc or nhc 260import System.IO (stdin,stdout,hClose,hFileSize 261 ,hGetBuf,hPutBuf,hGetBufNonBlocking 262 ,hPutBufNonBlocking,withBinaryFile 263 ,IOMode(..)) 264import System.IO.Error (mkIOError, illegalOperationErrorType) 265 266#if !(MIN_VERSION_base(4,8,0)) 267import Data.Monoid (Monoid(..)) 268#endif 269 270#if MIN_VERSION_base(4,3,0) 271import System.IO (hGetBufSome) 272#else 273import System.IO (hWaitForInput, hIsEOF) 274#endif 275 276import Data.IORef 277import GHC.IO.Handle.Internals 278import GHC.IO.Handle.Types 279import GHC.IO.Buffer 280import GHC.IO.BufferedIO as Buffered 281import GHC.IO (unsafePerformIO, unsafeDupablePerformIO) 282import Data.Char (ord) 283import Foreign.Marshal.Utils (copyBytes) 284 285import GHC.Prim (Word#) 286import GHC.Base (build) 287import GHC.Word hiding (Word8) 288 289#if !(MIN_VERSION_base(4,7,0)) 290finiteBitSize = bitSize 291#endif 292 293-- ----------------------------------------------------------------------------- 294-- Introducing and eliminating 'ByteString's 295 296-- | /O(1)/ The empty 'ByteString' 297empty :: ByteString 298empty = PS nullForeignPtr 0 0 299 300-- | /O(1)/ Convert a 'Word8' into a 'ByteString' 301singleton :: Word8 -> ByteString 302singleton c = unsafeCreate 1 $ \p -> poke p c 303{-# INLINE [1] singleton #-} 304 305-- Inline [1] for intercalate rule 306 307-- 308-- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical! 309-- 310-- Otherwise: 311-- 312-- singleton 255 `compare` singleton 127 313-- 314-- is compiled to: 315-- 316-- case mallocByteString 2 of 317-- ForeignPtr f internals -> 318-- case writeWord8OffAddr# f 0 255 of _ -> 319-- case writeWord8OffAddr# f 0 127 of _ -> 320-- case eqAddr# f f of 321-- False -> case compare (GHC.Prim.plusAddr# f 0) 322-- (GHC.Prim.plusAddr# f 0) 323-- 324-- 325 326-- | /O(n)/ Convert a @['Word8']@ into a 'ByteString'. 327-- 328-- For applications with large numbers of string literals, 'pack' can be a 329-- bottleneck. In such cases, consider using 'unsafePackAddress' (GHC only). 330pack :: [Word8] -> ByteString 331pack = packBytes 332 333-- | /O(n)/ Converts a 'ByteString' to a @['Word8']@. 334unpack :: ByteString -> [Word8] 335unpack bs = build (unpackFoldr bs) 336{-# INLINE unpack #-} 337 338-- 339-- Have unpack fuse with good list consumers 340-- 341unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a 342unpackFoldr bs k z = foldr k z bs 343{-# INLINE [0] unpackFoldr #-} 344 345{-# RULES 346"ByteString unpack-list" [1] forall bs . 347 unpackFoldr bs (:) [] = unpackBytes bs 348 #-} 349 350-- --------------------------------------------------------------------- 351-- Basic interface 352 353-- | /O(1)/ Test whether a ByteString is empty. 354null :: ByteString -> Bool 355null (PS _ _ l) = assert (l >= 0) $ l <= 0 356{-# INLINE null #-} 357 358-- --------------------------------------------------------------------- 359-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. 360length :: ByteString -> Int 361length (PS _ _ l) = assert (l >= 0) l 362{-# INLINE length #-} 363 364------------------------------------------------------------------------ 365 366infixr 5 `cons` --same as list (:) 367infixl 5 `snoc` 368 369-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different 370-- complexity, as it requires making a copy. 371cons :: Word8 -> ByteString -> ByteString 372cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 373 poke p c 374 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) 375{-# INLINE cons #-} 376 377-- | /O(n)/ Append a byte to the end of a 'ByteString' 378snoc :: ByteString -> Word8 -> ByteString 379snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 380 memcpy p (f `plusPtr` s) (fromIntegral l) 381 poke (p `plusPtr` l) c 382{-# INLINE snoc #-} 383 384-- todo fuse 385 386-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. 387-- An exception will be thrown in the case of an empty ByteString. 388head :: ByteString -> Word8 389head (PS x s l) 390 | l <= 0 = errorEmptyList "head" 391 | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> peekByteOff p s 392{-# INLINE head #-} 393 394-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. 395-- An exception will be thrown in the case of an empty ByteString. 396tail :: ByteString -> ByteString 397tail (PS p s l) 398 | l <= 0 = errorEmptyList "tail" 399 | otherwise = PS p (s+1) (l-1) 400{-# INLINE tail #-} 401 402-- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing 403-- if it is empty. 404uncons :: ByteString -> Maybe (Word8, ByteString) 405uncons (PS x s l) 406 | l <= 0 = Nothing 407 | otherwise = Just (accursedUnutterablePerformIO $ withForeignPtr x 408 $ \p -> peekByteOff p s, 409 PS x (s+1) (l-1)) 410{-# INLINE uncons #-} 411 412-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. 413-- An exception will be thrown in the case of an empty ByteString. 414last :: ByteString -> Word8 415last ps@(PS x s l) 416 | null ps = errorEmptyList "last" 417 | otherwise = accursedUnutterablePerformIO $ 418 withForeignPtr x $ \p -> peekByteOff p (s+l-1) 419{-# INLINE last #-} 420 421-- | /O(1)/ Return all the elements of a 'ByteString' except the last one. 422-- An exception will be thrown in the case of an empty ByteString. 423init :: ByteString -> ByteString 424init ps@(PS p s l) 425 | null ps = errorEmptyList "init" 426 | otherwise = PS p s (l-1) 427{-# INLINE init #-} 428 429-- | /O(1)/ Extract the 'init' and 'last' of a ByteString, returning Nothing 430-- if it is empty. 431unsnoc :: ByteString -> Maybe (ByteString, Word8) 432unsnoc (PS x s l) 433 | l <= 0 = Nothing 434 | otherwise = Just (PS x s (l-1), 435 accursedUnutterablePerformIO $ 436 withForeignPtr x $ \p -> peekByteOff p (s+l-1)) 437{-# INLINE unsnoc #-} 438 439-- | /O(n)/ Append two ByteStrings 440append :: ByteString -> ByteString -> ByteString 441append = mappend 442{-# INLINE append #-} 443 444-- --------------------------------------------------------------------- 445-- Transformations 446 447-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each 448-- element of @xs@. 449map :: (Word8 -> Word8) -> ByteString -> ByteString 450map f (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> 451 create len $ map_ 0 (a `plusPtr` s) 452 where 453 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () 454 map_ !n !p1 !p2 455 | n >= len = return () 456 | otherwise = do 457 x <- peekByteOff p1 n 458 pokeByteOff p2 n (f x) 459 map_ (n+1) p1 p2 460{-# INLINE map #-} 461 462-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. 463reverse :: ByteString -> ByteString 464reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> 465 c_reverse p (f `plusPtr` s) (fromIntegral l) 466 467-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a 468-- 'ByteString' and \`intersperses\' that byte between the elements of 469-- the 'ByteString'. It is analogous to the intersperse function on 470-- Lists. 471intersperse :: Word8 -> ByteString -> ByteString 472intersperse c ps@(PS x s l) 473 | length ps < 2 = ps 474 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f -> 475 c_intersperse p (f `plusPtr` s) (fromIntegral l) c 476 477-- | The 'transpose' function transposes the rows and columns of its 478-- 'ByteString' argument. 479transpose :: [ByteString] -> [ByteString] 480transpose ps = P.map pack . List.transpose . P.map unpack $ ps 481 482-- --------------------------------------------------------------------- 483-- Reducing 'ByteString's 484 485-- | 'foldl', applied to a binary operator, a starting value (typically 486-- the left-identity of the operator), and a ByteString, reduces the 487-- ByteString using the binary operator, from left to right. 488-- 489foldl :: (a -> Word8 -> a) -> a -> ByteString -> a 490foldl f z (PS fp off len) = 491 let p = unsafeForeignPtrToPtr fp 492 in go (p `plusPtr` (off+len-1)) (p `plusPtr` (off-1)) 493 where 494 -- not tail recursive; traverses array right to left 495 go !p !q | p == q = z 496 | otherwise = let !x = accursedUnutterablePerformIO $ do 497 x' <- peek p 498 touchForeignPtr fp 499 return x' 500 in f (go (p `plusPtr` (-1)) q) x 501{-# INLINE foldl #-} 502 503-- | 'foldl'' is like 'foldl', but strict in the accumulator. 504-- 505foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a 506foldl' f v (PS fp off len) = 507 accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> 508 go v (p `plusPtr` off) (p `plusPtr` (off+len)) 509 where 510 -- tail recursive; traverses array left to right 511 go !z !p !q | p == q = return z 512 | otherwise = do x <- peek p 513 go (f z x) (p `plusPtr` 1) q 514{-# INLINE foldl' #-} 515 516-- | 'foldr', applied to a binary operator, a starting value 517-- (typically the right-identity of the operator), and a ByteString, 518-- reduces the ByteString using the binary operator, from right to left. 519foldr :: (Word8 -> a -> a) -> a -> ByteString -> a 520foldr k z (PS fp off len) = 521 let p = unsafeForeignPtrToPtr fp 522 in go (p `plusPtr` off) (p `plusPtr` (off+len)) 523 where 524 -- not tail recursive; traverses array left to right 525 go !p !q | p == q = z 526 | otherwise = let !x = accursedUnutterablePerformIO $ do 527 x' <- peek p 528 touchForeignPtr fp 529 return x' 530 in k x (go (p `plusPtr` 1) q) 531{-# INLINE foldr #-} 532 533-- | 'foldr'' is like 'foldr', but strict in the accumulator. 534foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a 535foldr' k v (PS fp off len) = 536 accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> 537 go v (p `plusPtr` (off+len-1)) (p `plusPtr` (off-1)) 538 where 539 -- tail recursive; traverses array right to left 540 go !z !p !q | p == q = return z 541 | otherwise = do x <- peek p 542 go (k x z) (p `plusPtr` (-1)) q 543{-# INLINE foldr' #-} 544 545-- | 'foldl1' is a variant of 'foldl' that has no starting value 546-- argument, and thus must be applied to non-empty 'ByteString's. 547-- An exception will be thrown in the case of an empty ByteString. 548foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 549foldl1 f ps 550 | null ps = errorEmptyList "foldl1" 551 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) 552{-# INLINE foldl1 #-} 553 554-- | 'foldl1'' is like 'foldl1', but strict in the accumulator. 555-- An exception will be thrown in the case of an empty ByteString. 556foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 557foldl1' f ps 558 | null ps = errorEmptyList "foldl1'" 559 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) 560{-# INLINE foldl1' #-} 561 562-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, 563-- and thus must be applied to non-empty 'ByteString's 564-- An exception will be thrown in the case of an empty ByteString. 565foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 566foldr1 f ps 567 | null ps = errorEmptyList "foldr1" 568 | otherwise = foldr f (unsafeLast ps) (unsafeInit ps) 569{-# INLINE foldr1 #-} 570 571-- | 'foldr1'' is a variant of 'foldr1', but is strict in the 572-- accumulator. 573foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 574foldr1' f ps 575 | null ps = errorEmptyList "foldr1" 576 | otherwise = foldr' f (unsafeLast ps) (unsafeInit ps) 577{-# INLINE foldr1' #-} 578 579-- --------------------------------------------------------------------- 580-- Special folds 581 582-- | /O(n)/ Concatenate a list of ByteStrings. 583concat :: [ByteString] -> ByteString 584concat = mconcat 585 586-- | Map a function over a 'ByteString' and concatenate the results 587concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString 588concatMap f = concat . foldr ((:) . f) [] 589 590-- foldr (append . f) empty 591 592-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if 593-- any element of the 'ByteString' satisfies the predicate. 594any :: (Word8 -> Bool) -> ByteString -> Bool 595any _ (PS _ _ 0) = False 596any f (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ptr -> 597 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 598 where 599 go !p !q | p == q = return False 600 | otherwise = do c <- peek p 601 if f c then return True 602 else go (p `plusPtr` 1) q 603{-# INLINE any #-} 604 605-- todo fuse 606 607-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines 608-- if all elements of the 'ByteString' satisfy the predicate. 609all :: (Word8 -> Bool) -> ByteString -> Bool 610all _ (PS _ _ 0) = True 611all f (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ptr -> 612 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 613 where 614 go !p !q | p == q = return True -- end of list 615 | otherwise = do c <- peek p 616 if f c 617 then go (p `plusPtr` 1) q 618 else return False 619{-# INLINE all #-} 620 621------------------------------------------------------------------------ 622 623-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' 624-- This function will fuse. 625-- An exception will be thrown in the case of an empty ByteString. 626maximum :: ByteString -> Word8 627maximum xs@(PS x s l) 628 | null xs = errorEmptyList "maximum" 629 | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 630 c_maximum (p `plusPtr` s) (fromIntegral l) 631{-# INLINE maximum #-} 632 633-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' 634-- This function will fuse. 635-- An exception will be thrown in the case of an empty ByteString. 636minimum :: ByteString -> Word8 637minimum xs@(PS x s l) 638 | null xs = errorEmptyList "minimum" 639 | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 640 c_minimum (p `plusPtr` s) (fromIntegral l) 641{-# INLINE minimum #-} 642 643------------------------------------------------------------------------ 644 645-- | The 'mapAccumL' function behaves like a combination of 'map' and 646-- 'foldl'; it applies a function to each element of a ByteString, 647-- passing an accumulating parameter from left to right, and returning a 648-- final value of this accumulator together with the new list. 649mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 650mapAccumL f acc (PS fp o len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> do 651 gp <- mallocByteString len 652 acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p 653 return (acc', PS gp 0 len) 654 where 655 mapAccumL_ !s !n !p1 !p2 656 | n >= len = return s 657 | otherwise = do 658 x <- peekByteOff p1 n 659 let (s', y) = f s x 660 pokeByteOff p2 n y 661 mapAccumL_ s' (n+1) p1 p2 662{-# INLINE mapAccumL #-} 663 664-- | The 'mapAccumR' function behaves like a combination of 'map' and 665-- 'foldr'; it applies a function to each element of a ByteString, 666-- passing an accumulating parameter from right to left, and returning a 667-- final value of this accumulator together with the new ByteString. 668mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 669mapAccumR f acc (PS fp o len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> do 670 gp <- mallocByteString len 671 acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p 672 return $! (acc', PS gp 0 len) 673 where 674 mapAccumR_ !s !n !p !q 675 | n < 0 = return s 676 | otherwise = do 677 x <- peekByteOff p n 678 let (s', y) = f s x 679 pokeByteOff q n y 680 mapAccumR_ s' (n-1) p q 681{-# INLINE mapAccumR #-} 682 683-- --------------------------------------------------------------------- 684-- Building ByteStrings 685 686-- | 'scanl' is similar to 'foldl', but returns a list of successive 687-- reduced values from the left. This function will fuse. 688-- 689-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 690-- 691-- Note that 692-- 693-- > last (scanl f z xs) == foldl f z xs. 694-- 695scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 696 697scanl f v (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> 698 create (len+1) $ \q -> do 699 poke q v 700 scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1) 701 where 702 scanl_ !z !n !p !q 703 | n >= len = return () 704 | otherwise = do 705 x <- peekByteOff p n 706 let z' = f z x 707 pokeByteOff q n z' 708 scanl_ z' (n+1) p q 709{-# INLINE scanl #-} 710 711 -- n.b. haskell's List scan returns a list one bigger than the 712 -- input, so we need to snoc here to get some extra space, however, 713 -- it breaks map/up fusion (i.e. scanl . map no longer fuses) 714 715-- | 'scanl1' is a variant of 'scanl' that has no starting value argument. 716-- This function will fuse. 717-- 718-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] 719scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 720scanl1 f ps 721 | null ps = empty 722 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps) 723{-# INLINE scanl1 #-} 724 725-- | scanr is the right-to-left dual of scanl. 726scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 727scanr f v (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> 728 create (len+1) $ \q -> do 729 poke (q `plusPtr` len) v 730 scanr_ v (len-1) (a `plusPtr` s) q 731 where 732 scanr_ !z !n !p !q 733 | n < 0 = return () 734 | otherwise = do 735 x <- peekByteOff p n 736 let z' = f x z 737 pokeByteOff q n z' 738 scanr_ z' (n-1) p q 739{-# INLINE scanr #-} 740 741-- | 'scanr1' is a variant of 'scanr' that has no starting value argument. 742scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 743scanr1 f ps 744 | null ps = empty 745 | otherwise = scanr f (unsafeLast ps) (unsafeInit ps) 746{-# INLINE scanr1 #-} 747 748-- --------------------------------------------------------------------- 749-- Unfolds and replicates 750 751-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ 752-- the value of every element. The following holds: 753-- 754-- > replicate w c = unfoldr w (\u -> Just (u,u)) c 755-- 756-- This implemenation uses @memset(3)@ 757replicate :: Int -> Word8 -> ByteString 758replicate w c 759 | w <= 0 = empty 760 | otherwise = unsafeCreate w $ \ptr -> 761 memset ptr c (fromIntegral w) >> return () 762 763-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' 764-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a 765-- ByteString from a seed value. The function takes the element and 766-- returns 'Nothing' if it is done producing the ByteString or returns 767-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, 768-- and @b@ is the seed value for further production. 769-- 770-- Examples: 771-- 772-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 773-- > == pack [0, 1, 2, 3, 4, 5] 774-- 775unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString 776unfoldr f = concat . unfoldChunk 32 64 777 where unfoldChunk n n' x = 778 case unfoldrN n f x of 779 (s, Nothing) -> s : [] 780 (s, Just x') -> s : unfoldChunk n' (n+n') x' 781{-# INLINE unfoldr #-} 782 783-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed 784-- value. However, the length of the result is limited by the first 785-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' 786-- when the maximum length of the result is known. 787-- 788-- The following equation relates 'unfoldrN' and 'unfoldr': 789-- 790-- > fst (unfoldrN n f s) == take n (unfoldr f s) 791-- 792unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) 793unfoldrN i f x0 794 | i < 0 = (empty, Just x0) 795 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 796 where 797 go !p !x !n 798 | n == i = return (0, n, Just x) 799 | otherwise = case f x of 800 Nothing -> return (0, n, Nothing) 801 Just (w,x') -> do poke p w 802 go (p `plusPtr` 1) x' (n+1) 803{-# INLINE unfoldrN #-} 804 805-- --------------------------------------------------------------------- 806-- Substrings 807 808-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix 809-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. 810take :: Int -> ByteString -> ByteString 811take n ps@(PS x s l) 812 | n <= 0 = empty 813 | n >= l = ps 814 | otherwise = PS x s n 815{-# INLINE take #-} 816 817-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ 818-- elements, or @[]@ if @n > 'length' xs@. 819drop :: Int -> ByteString -> ByteString 820drop n ps@(PS x s l) 821 | n <= 0 = ps 822 | n >= l = empty 823 | otherwise = PS x (s+n) (l-n) 824{-# INLINE drop #-} 825 826-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. 827splitAt :: Int -> ByteString -> (ByteString, ByteString) 828splitAt n ps@(PS x s l) 829 | n <= 0 = (empty, ps) 830 | n >= l = (ps, empty) 831 | otherwise = (PS x s n, PS x (s+n) (l-n)) 832{-# INLINE splitAt #-} 833 834-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, 835-- returns the longest prefix (possibly empty) of @xs@ of elements that 836-- satisfy @p@. 837takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString 838takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps 839{-# INLINE takeWhile #-} 840 841-- | 'takeWhileEnd', applied to a predicate @p@ and a ByteString @xs@, returns 842-- the longest suffix (possibly empty) of @xs@ of elements that satisfy @p@. 843-- 844-- @since 0.10.12.0 845takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString 846takeWhileEnd f ps = unsafeDrop (findFromEndUntil (not . f) ps) ps 847{-# INLINE takeWhileEnd #-} 848 849-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. 850dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString 851dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps 852{-# INLINE dropWhile #-} 853 854-- | 'dropWhileEnd' @p xs@ returns the prefix remaining after 'takeWhileEnd' @p 855-- xs@. 856-- 857-- @since 0.10.12.0 858dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString 859dropWhileEnd f ps = unsafeTake (findFromEndUntil (not . f) ps) ps 860{-# INLINE dropWhileEnd #-} 861 862-- instead of findIndexOrEnd, we could use memchr here. 863 864-- | 'break' @p@ is equivalent to @'span' ('not' . p)@. 865-- 866-- Under GHC, a rewrite rule will transform break (==) into a 867-- call to the specialised breakByte: 868-- 869-- > break ((==) x) = breakByte x 870-- > break (==x) = breakByte x 871-- 872break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 873break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) 874{-# INLINE [1] break #-} 875 876-- See bytestring #70 877#if MIN_VERSION_base(4,9,0) 878{-# RULES 879"ByteString specialise break (x ==)" forall x. 880 break (x `eqWord8`) = breakByte x 881"ByteString specialise break (== x)" forall x. 882 break (`eqWord8` x) = breakByte x 883 #-} 884#else 885{-# RULES 886"ByteString specialise break (x ==)" forall x. 887 break (x ==) = breakByte x 888"ByteString specialise break (== x)" forall x. 889 break (== x) = breakByte x 890 #-} 891#endif 892 893-- INTERNAL: 894 895-- | 'breakByte' breaks its ByteString argument at the first occurence 896-- of the specified byte. It is more efficient than 'break' as it is 897-- implemented with @memchr(3)@. I.e. 898-- 899-- > break (==99) "abcd" == breakByte 99 "abcd" -- fromEnum 'c' == 99 900-- 901breakByte :: Word8 -> ByteString -> (ByteString, ByteString) 902breakByte c p = case elemIndex c p of 903 Nothing -> (p,empty) 904 Just n -> (unsafeTake n p, unsafeDrop n p) 905{-# INLINE breakByte #-} 906{-# DEPRECATED breakByte "It is an internal function and should never have been exported. Use 'break (== x)' instead. (There are rewrite rules that handle this special case of 'break'.)" #-} 907 908-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' 909-- 910-- breakEnd p == spanEnd (not.p) 911breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 912breakEnd p ps = splitAt (findFromEndUntil p ps) ps 913 914-- | 'span' @p xs@ breaks the ByteString into two segments. It is 915-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ 916span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 917span p ps = break (not . p) ps 918{-# INLINE [1] span #-} 919 920-- | 'spanByte' breaks its ByteString argument at the first 921-- occurence of a byte other than its argument. It is more efficient 922-- than 'span (==)' 923-- 924-- > span (==99) "abcd" == spanByte 99 "abcd" -- fromEnum 'c' == 99 925-- 926spanByte :: Word8 -> ByteString -> (ByteString, ByteString) 927spanByte c ps@(PS x s l) = 928 accursedUnutterablePerformIO $ 929 withForeignPtr x $ \p -> 930 go (p `plusPtr` s) 0 931 where 932 go !p !i | i >= l = return (ps, empty) 933 | otherwise = do c' <- peekByteOff p i 934 if c /= c' 935 then return (unsafeTake i ps, unsafeDrop i ps) 936 else go p (i+1) 937{-# INLINE spanByte #-} 938 939-- See bytestring #70 940#if MIN_VERSION_base(4,9,0) 941{-# RULES 942"ByteString specialise span (x ==)" forall x. 943 span (x `eqWord8`) = spanByte x 944"ByteString specialise span (== x)" forall x. 945 span (`eqWord8` x) = spanByte x 946 #-} 947#else 948{-# RULES 949"ByteString specialise span (x ==)" forall x. 950 span (x ==) = spanByte x 951"ByteString specialise span (== x)" forall x. 952 span (== x) = spanByte x 953 #-} 954#endif 955 956-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. 957-- We have 958-- 959-- > spanEnd (not.isSpace) "x y z" == ("x y ","z") 960-- 961-- and 962-- 963-- > spanEnd (not . isSpace) ps 964-- > == 965-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 966-- 967spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 968spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps 969 970-- | /O(n)/ Splits a 'ByteString' into components delimited by 971-- separators, where the predicate returns True for a separator element. 972-- The resulting components do not contain the separators. Two adjacent 973-- separators result in an empty component in the output. eg. 974-- 975-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 976-- > splitWith (==97) [] == [] 977-- 978splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] 979splitWith _pred (PS _ _ 0) = [] 980splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp 981 where pred# c# = pred_ (W8# c#) 982 983 splitWith0 !pred' !off' !len' !fp' = 984 accursedUnutterablePerformIO $ 985 withForeignPtr fp $ \p -> 986 splitLoop pred' p 0 off' len' fp' 987 988 splitLoop :: (Word# -> Bool) 989 -> Ptr Word8 990 -> Int -> Int -> Int 991 -> ForeignPtr Word8 992 -> IO [ByteString] 993 994 splitLoop pred' p idx' off' len' fp' 995 | idx' >= len' = return [PS fp' off' idx'] 996 | otherwise = do 997 w <- peekElemOff p (off'+idx') 998 if pred' (case w of W8# w# -> w#) 999 then return (PS fp' off' idx' : 1000 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp') 1001 else splitLoop pred' p (idx'+1) off' len' fp' 1002{-# INLINE splitWith #-} 1003 1004-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte 1005-- argument, consuming the delimiter. I.e. 1006-- 1007-- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 1008-- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 1009-- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 1010-- 1011-- and 1012-- 1013-- > intercalate [c] . split c == id 1014-- > split == splitWith . (==) 1015-- 1016-- As for all splitting functions in this library, this function does 1017-- not copy the substrings, it just constructs new 'ByteString's that 1018-- are slices of the original. 1019-- 1020split :: Word8 -> ByteString -> [ByteString] 1021split _ (PS _ _ 0) = [] 1022split w (PS x s l) = loop 0 1023 where 1024 loop !n = 1025 let q = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 1026 memchr (p `plusPtr` (s+n)) 1027 w (fromIntegral (l-n)) 1028 in if q == nullPtr 1029 then [PS x (s+n) (l-n)] 1030 else let i = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 1031 return (q `minusPtr` (p `plusPtr` s)) 1032 in PS x (s+n) (i-n) : loop (i+1) 1033 1034{-# INLINE split #-} 1035 1036 1037-- | The 'group' function takes a ByteString and returns a list of 1038-- ByteStrings such that the concatenation of the result is equal to the 1039-- argument. Moreover, each sublist in the result contains only equal 1040-- elements. For example, 1041-- 1042-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] 1043-- 1044-- It is a special case of 'groupBy', which allows the programmer to 1045-- supply their own equality test. It is about 40% faster than 1046-- /groupBy (==)/ 1047group :: ByteString -> [ByteString] 1048group xs 1049 | null xs = [] 1050 | otherwise = ys : group zs 1051 where 1052 (ys, zs) = spanByte (unsafeHead xs) xs 1053 1054-- | The 'groupBy' function is the non-overloaded version of 'group'. 1055groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 1056groupBy k xs 1057 | null xs = [] 1058 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs) 1059 where 1060 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs) 1061 1062-- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of 1063-- 'ByteString's and concatenates the list after interspersing the first 1064-- argument between each element of the list. 1065intercalate :: ByteString -> [ByteString] -> ByteString 1066intercalate s = concat . List.intersperse s 1067{-# INLINE [1] intercalate #-} 1068 1069{-# RULES 1070"ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 . 1071 intercalate (singleton c) [s1, s2] = intercalateWithByte c s1 s2 1072 #-} 1073 1074-- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings 1075-- with a char. Around 4 times faster than the generalised join. 1076-- 1077intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString 1078intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr -> 1079 withForeignPtr ffp $ \fp -> 1080 withForeignPtr fgp $ \gp -> do 1081 memcpy ptr (fp `plusPtr` s) (fromIntegral l) 1082 poke (ptr `plusPtr` l) c 1083 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m) 1084 where 1085 len = length f + length g + 1 1086{-# INLINE intercalateWithByte #-} 1087 1088-- --------------------------------------------------------------------- 1089-- Indexing ByteStrings 1090 1091-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. 1092index :: ByteString -> Int -> Word8 1093index ps n 1094 | n < 0 = moduleError "index" ("negative index: " ++ show n) 1095 | n >= length ps = moduleError "index" ("index too large: " ++ show n 1096 ++ ", length = " ++ show (length ps)) 1097 | otherwise = ps `unsafeIndex` n 1098{-# INLINE index #-} 1099 1100-- | /O(n)/ The 'elemIndex' function returns the index of the first 1101-- element in the given 'ByteString' which is equal to the query 1102-- element, or 'Nothing' if there is no such element. 1103-- This implementation uses memchr(3). 1104elemIndex :: Word8 -> ByteString -> Maybe Int 1105elemIndex c (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> do 1106 let p' = p `plusPtr` s 1107 q <- memchr p' c (fromIntegral l) 1108 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p' 1109{-# INLINE elemIndex #-} 1110 1111-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the 1112-- element in the given 'ByteString' which is equal to the query 1113-- element, or 'Nothing' if there is no such element. The following 1114-- holds: 1115-- 1116-- > elemIndexEnd c xs == 1117-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) 1118-- 1119elemIndexEnd :: Word8 -> ByteString -> Maybe Int 1120elemIndexEnd = findIndexEnd . (==) 1121{-# INLINE elemIndexEnd #-} 1122 1123-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning 1124-- the indices of all elements equal to the query element, in ascending order. 1125-- This implementation uses memchr(3). 1126elemIndices :: Word8 -> ByteString -> [Int] 1127elemIndices w (PS x s l) = loop 0 1128 where 1129 loop !n = let q = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 1130 memchr (p `plusPtr` (n+s)) 1131 w (fromIntegral (l - n)) 1132 in if q == nullPtr 1133 then [] 1134 else let i = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 1135 return (q `minusPtr` (p `plusPtr` s)) 1136 in i : loop (i+1) 1137{-# INLINE elemIndices #-} 1138 1139-- | count returns the number of times its argument appears in the ByteString 1140-- 1141-- > count = length . elemIndices 1142-- 1143-- But more efficiently than using length on the intermediate list. 1144count :: Word8 -> ByteString -> Int 1145count w (PS x s m) = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> 1146 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w 1147{-# INLINE count #-} 1148 1149-- | /O(n)/ The 'findIndex' function takes a predicate and a 'ByteString' and 1150-- returns the index of the first element in the ByteString 1151-- satisfying the predicate. 1152findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int 1153findIndex k (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 1154 where 1155 go !ptr !n | n >= l = return Nothing 1156 | otherwise = do w <- peek ptr 1157 if k w 1158 then return (Just n) 1159 else go (ptr `plusPtr` 1) (n+1) 1160{-# INLINE findIndex #-} 1161 1162-- | /O(n)/ The 'findIndexEnd' function takes a predicate and a 'ByteString' and 1163-- returns the index of the last element in the ByteString 1164-- satisfying the predicate. 1165-- 1166-- @since 0.10.12.0 1167findIndexEnd :: (Word8 -> Bool) -> ByteString -> Maybe Int 1168findIndexEnd k (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ f -> go (f `plusPtr` s) (l-1) 1169 where 1170 go !ptr !n | n < 0 = return Nothing 1171 | otherwise = do w <- peekByteOff ptr n 1172 if k w 1173 then return (Just n) 1174 else go ptr (n-1) 1175{-# INLINE findIndexEnd #-} 1176 1177-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the 1178-- indices of all elements satisfying the predicate, in ascending order. 1179findIndices :: (Word8 -> Bool) -> ByteString -> [Int] 1180findIndices p ps = loop 0 ps 1181 where 1182 loop !n !qs | null qs = [] 1183 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) 1184 | otherwise = loop (n+1) (unsafeTail qs) 1185 1186-- --------------------------------------------------------------------- 1187-- Searching ByteStrings 1188 1189-- | /O(n)/ 'elem' is the 'ByteString' membership predicate. 1190elem :: Word8 -> ByteString -> Bool 1191elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True 1192{-# INLINE elem #-} 1193 1194-- | /O(n)/ 'notElem' is the inverse of 'elem' 1195notElem :: Word8 -> ByteString -> Bool 1196notElem c ps = not (elem c ps) 1197{-# INLINE notElem #-} 1198 1199-- | /O(n)/ 'filter', applied to a predicate and a ByteString, 1200-- returns a ByteString containing those characters that satisfy the 1201-- predicate. 1202filter :: (Word8 -> Bool) -> ByteString -> ByteString 1203filter k ps@(PS x s l) 1204 | null ps = ps 1205 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do 1206 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) 1207 return $! t `minusPtr` p -- actual length 1208 where 1209 go !f !t !end | f == end = return t 1210 | otherwise = do 1211 w <- peek f 1212 if k w 1213 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end 1214 else go (f `plusPtr` 1) t end 1215{-# INLINE filter #-} 1216 1217{- 1218-- 1219-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common 1220-- case of filtering a single byte. It is more efficient to use 1221-- /filterByte/ in this case. 1222-- 1223-- > filterByte == filter . (==) 1224-- 1225-- filterByte is around 10x faster, and uses much less space, than its 1226-- filter equivalent 1227-- 1228filterByte :: Word8 -> ByteString -> ByteString 1229filterByte w ps = replicate (count w ps) w 1230{-# INLINE filterByte #-} 1231 1232{-# RULES 1233"ByteString specialise filter (== x)" forall x. 1234 filter ((==) x) = filterByte x 1235"ByteString specialise filter (== x)" forall x. 1236 filter (== x) = filterByte x 1237 #-} 1238-} 1239 1240-- | /O(n)/ The 'find' function takes a predicate and a ByteString, 1241-- and returns the first element in matching the predicate, or 'Nothing' 1242-- if there is no such element. 1243-- 1244-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing 1245-- 1246find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 1247find f p = case findIndex f p of 1248 Just n -> Just (p `unsafeIndex` n) 1249 _ -> Nothing 1250{-# INLINE find #-} 1251 1252-- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns 1253-- the pair of ByteStrings with elements which do and do not satisfy the 1254-- predicate, respectively; i.e., 1255-- 1256-- > partition p bs == (filter p xs, filter (not . p) xs) 1257-- 1258partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 1259partition f s = unsafeDupablePerformIO $ 1260 do fp' <- mallocByteString len 1261 withForeignPtr fp' $ \p -> 1262 do let end = p `plusPtr` (len - 1) 1263 mid <- sep 0 p end 1264 rev mid end 1265 let i = mid `minusPtr` p 1266 return (PS fp' 0 i, 1267 PS fp' i (len - i)) 1268 where 1269 len = length s 1270 incr = (`plusPtr` 1) 1271 decr = (`plusPtr` (-1)) 1272 1273 sep !i !p1 !p2 1274 | i == len = return p1 1275 | f w = do poke p1 w 1276 sep (i + 1) (incr p1) p2 1277 | otherwise = do poke p2 w 1278 sep (i + 1) p1 (decr p2) 1279 where 1280 w = s `unsafeIndex` i 1281 1282 rev !p1 !p2 1283 | p1 >= p2 = return () 1284 | otherwise = do a <- peek p1 1285 b <- peek p2 1286 poke p1 b 1287 poke p2 a 1288 rev (incr p1) (decr p2) 1289 1290-- -------------------------------------------------------------------- 1291-- Sarching for substrings 1292 1293-- |/O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' 1294-- if the first is a prefix of the second. 1295isPrefixOf :: ByteString -> ByteString -> Bool 1296isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2) 1297 | l1 == 0 = True 1298 | l2 < l1 = False 1299 | otherwise = accursedUnutterablePerformIO $ withForeignPtr x1 $ \p1 -> 1300 withForeignPtr x2 $ \p2 -> do 1301 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1) 1302 return $! i == 0 1303 1304-- | /O(n)/ The 'stripPrefix' function takes two ByteStrings and returns 'Just' 1305-- the remainder of the second iff the first is its prefix, and otherwise 1306-- 'Nothing'. 1307-- 1308-- @since 0.10.8.0 1309stripPrefix :: ByteString -> ByteString -> Maybe ByteString 1310stripPrefix bs1@(PS _ _ l1) bs2 1311 | bs1 `isPrefixOf` bs2 = Just (unsafeDrop l1 bs2) 1312 | otherwise = Nothing 1313 1314-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' 1315-- iff the first is a suffix of the second. 1316-- 1317-- The following holds: 1318-- 1319-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y 1320-- 1321-- However, the real implemenation uses memcmp to compare the end of the 1322-- string only, with no reverse required.. 1323isSuffixOf :: ByteString -> ByteString -> Bool 1324isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2) 1325 | l1 == 0 = True 1326 | l2 < l1 = False 1327 | otherwise = accursedUnutterablePerformIO $ withForeignPtr x1 $ \p1 -> 1328 withForeignPtr x2 $ \p2 -> do 1329 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1) 1330 return $! i == 0 1331 1332-- | /O(n)/ The 'stripSuffix' function takes two ByteStrings and returns 'Just' 1333-- the remainder of the second iff the first is its suffix, and otherwise 1334-- 'Nothing'. 1335stripSuffix :: ByteString -> ByteString -> Maybe ByteString 1336stripSuffix bs1@(PS _ _ l1) bs2@(PS _ _ l2) 1337 | bs1 `isSuffixOf` bs2 = Just (unsafeTake (l2 - l1) bs2) 1338 | otherwise = Nothing 1339 1340-- | Check whether one string is a substring of another. @isInfixOf 1341-- p s@ is equivalent to @not (null (findSubstrings p s))@. 1342isInfixOf :: ByteString -> ByteString -> Bool 1343isInfixOf p s = isJust (findSubstring p s) 1344 1345-- | Break a string on a substring, returning a pair of the part of the 1346-- string prior to the match, and the rest of the string. 1347-- 1348-- The following relationships hold: 1349-- 1350-- > break (== c) l == breakSubstring (singleton c) l 1351-- 1352-- and: 1353-- 1354-- > findSubstring s l == 1355-- > if null s then Just 0 1356-- > else case breakSubstring s l of 1357-- > (x,y) | null y -> Nothing 1358-- > | otherwise -> Just (length x) 1359-- 1360-- For example, to tokenise a string, dropping delimiters: 1361-- 1362-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) 1363-- > where (h,t) = breakSubstring x y 1364-- 1365-- To skip to the first occurence of a string: 1366-- 1367-- > snd (breakSubstring x y) 1368-- 1369-- To take the parts of a string before a delimiter: 1370-- 1371-- > fst (breakSubstring x y) 1372-- 1373-- Note that calling `breakSubstring x` does some preprocessing work, so 1374-- you should avoid unnecessarily duplicating breakSubstring calls with the same 1375-- pattern. 1376-- 1377breakSubstring :: ByteString -- ^ String to search for 1378 -> ByteString -- ^ String to search in 1379 -> (ByteString,ByteString) -- ^ Head and tail of string broken at substring 1380breakSubstring pat = 1381 case lp of 1382 0 -> \src -> (empty,src) 1383 1 -> breakByte (unsafeHead pat) 1384 _ -> if lp * 8 <= finiteBitSize (0 :: Word) 1385 then shift 1386 else karpRabin 1387 where 1388 unsafeSplitAt i s = (unsafeTake i s, unsafeDrop i s) 1389 lp = length pat 1390 karpRabin :: ByteString -> (ByteString, ByteString) 1391 karpRabin src 1392 | length src < lp = (src,empty) 1393 | otherwise = search (rollingHash $ unsafeTake lp src) lp 1394 where 1395 k = 2891336453 :: Word32 1396 rollingHash = foldl' (\h b -> h * k + fromIntegral b) 0 1397 hp = rollingHash pat 1398 m = k ^ lp 1399 get = fromIntegral . unsafeIndex src 1400 search !hs !i 1401 | hp == hs && pat == unsafeTake lp b = u 1402 | length src <= i = (src,empty) -- not found 1403 | otherwise = search hs' (i + 1) 1404 where 1405 u@(_, b) = unsafeSplitAt (i - lp) src 1406 hs' = hs * k + 1407 get i - 1408 m * get (i - lp) 1409 {-# INLINE karpRabin #-} 1410 1411 shift :: ByteString -> (ByteString, ByteString) 1412 shift !src 1413 | length src < lp = (src,empty) 1414 | otherwise = search (intoWord $ unsafeTake lp src) lp 1415 where 1416 intoWord :: ByteString -> Word 1417 intoWord = foldl' (\w b -> (w `shiftL` 8) .|. fromIntegral b) 0 1418 wp = intoWord pat 1419 mask = (1 `shiftL` (8 * lp)) - 1 1420 search !w !i 1421 | w == wp = unsafeSplitAt (i - lp) src 1422 | length src <= i = (src, empty) 1423 | otherwise = search w' (i + 1) 1424 where 1425 b = fromIntegral (unsafeIndex src i) 1426 w' = mask .&. ((w `shiftL` 8) .|. b) 1427 {-# INLINE shift #-} 1428 1429-- | Get the first index of a substring in another string, 1430-- or 'Nothing' if the string is not found. 1431-- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@. 1432findSubstring :: ByteString -- ^ String to search for. 1433 -> ByteString -- ^ String to seach in. 1434 -> Maybe Int 1435findSubstring pat src 1436 | null pat && null src = Just 0 1437 | null b = Nothing 1438 | otherwise = Just (length a) 1439 where (a, b) = breakSubstring pat src 1440 1441{-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-} 1442 1443-- | Find the indices of all non-overlapping occurences of a substring in a 1444-- string. 1445-- 1446-- Note, prior to @0.10.6.0@ this function returned the indices of all 1447-- possibly-overlapping matches. 1448findSubstrings :: ByteString -- ^ String to search for. 1449 -> ByteString -- ^ String to seach in. 1450 -> [Int] 1451findSubstrings pat src 1452 | null pat = [0 .. ls] 1453 | otherwise = search 0 1454 where 1455 lp = length pat 1456 ls = length src 1457 search !n 1458 | (n > ls - lp) || null b = [] 1459 | otherwise = let k = n + length a 1460 in k : search (k + lp) 1461 where 1462 (a, b) = breakSubstring pat (unsafeDrop n src) 1463 1464-- In 1465-- [0.10.6.0](<https://github.com/haskell/bytestring/commit/2160e091e215fecc9177d55a37cd50fc253ba86a?w=1>) 1466-- 'findSubstrings' was refactored to call an improved 'breakString' 1467-- implementation, but the refactored code no longer matches overlapping 1468-- strings. The behaviour change appears to be inadvertent, but the function 1469-- had already been deprecated for more than seven years. At this time 1470-- (@0.10.10.1@), the deprecation was twelve years in the past. 1471-- 1472{-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-} 1473 1474-- --------------------------------------------------------------------- 1475-- Zipping 1476 1477-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of 1478-- corresponding pairs of bytes. If one input ByteString is short, 1479-- excess elements of the longer ByteString are discarded. This is 1480-- equivalent to a pair of 'unpack' operations. 1481zip :: ByteString -> ByteString -> [(Word8,Word8)] 1482zip ps qs 1483 | null ps || null qs = [] 1484 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs) 1485 1486-- | 'zipWith' generalises 'zip' by zipping with the function given as 1487-- the first argument, instead of a tupling function. For example, 1488-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of 1489-- corresponding sums. 1490zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] 1491zipWith f ps qs 1492 | null ps || null qs = [] 1493 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs) 1494{-# NOINLINE [1] zipWith #-} 1495 1496-- 1497-- | A specialised version of zipWith for the common case of a 1498-- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules 1499-- are used to automatically covert zipWith into zipWith' when a pack is 1500-- performed on the result of zipWith. 1501-- 1502zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString 1503zipWith' f (PS fp s l) (PS fq t m) = unsafeDupablePerformIO $ 1504 withForeignPtr fp $ \a -> 1505 withForeignPtr fq $ \b -> 1506 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t) 1507 where 1508 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () 1509 zipWith_ !n !p1 !p2 !r 1510 | n >= len = return () 1511 | otherwise = do 1512 x <- peekByteOff p1 n 1513 y <- peekByteOff p2 n 1514 pokeByteOff r n (f x y) 1515 zipWith_ (n+1) p1 p2 r 1516 1517 len = min l m 1518{-# INLINE zipWith' #-} 1519 1520{-# RULES 1521"ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q . 1522 zipWith f p q = unpack (zipWith' f p q) 1523 #-} 1524 1525-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of 1526-- ByteStrings. Note that this performs two 'pack' operations. 1527unzip :: [(Word8,Word8)] -> (ByteString,ByteString) 1528unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) 1529{-# INLINE unzip #-} 1530 1531-- --------------------------------------------------------------------- 1532-- Special lists 1533 1534-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. 1535inits :: ByteString -> [ByteString] 1536inits (PS x s l) = [PS x s n | n <- [0..l]] 1537 1538-- | /O(n)/ Return all final segments of the given 'ByteString', longest first. 1539tails :: ByteString -> [ByteString] 1540tails p | null p = [empty] 1541 | otherwise = p : tails (unsafeTail p) 1542 1543-- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]] 1544 1545-- --------------------------------------------------------------------- 1546-- ** Ordered 'ByteString's 1547 1548-- | /O(n)/ Sort a ByteString efficiently, using counting sort. 1549sort :: ByteString -> ByteString 1550sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do 1551 1552 _ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) 1553 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l) 1554 1555 let go 256 !_ = return () 1556 go i !ptr = do n <- peekElemOff arr i 1557 when (n /= 0) $ memset ptr (fromIntegral i) n >> return () 1558 go (i + 1) (ptr `plusPtr` fromIntegral n) 1559 go 0 p 1560 where 1561 -- | Count the number of occurrences of each byte. 1562 -- Used by 'sort' 1563 -- 1564 countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () 1565 countOccurrences !counts !str !len = go 0 1566 where 1567 go !i | i == len = return () 1568 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i 1569 x <- peekElemOff counts k 1570 pokeElemOff counts k (x + 1) 1571 go (i + 1) 1572 1573 1574-- --------------------------------------------------------------------- 1575-- Low level constructors 1576 1577-- | /O(n) construction/ Use a @ByteString@ with a function requiring a 1578-- null-terminated @CString@. The @CString@ is a copy and will be freed 1579-- automatically; it must not be stored or used after the 1580-- subcomputation finishes. 1581useAsCString :: ByteString -> (CString -> IO a) -> IO a 1582useAsCString (PS fp o l) action = 1583 allocaBytes (l+1) $ \buf -> 1584 withForeignPtr fp $ \p -> do 1585 memcpy buf (p `plusPtr` o) (fromIntegral l) 1586 pokeByteOff buf l (0::Word8) 1587 action (castPtr buf) 1588 1589-- | /O(n) construction/ Use a @ByteString@ with a function requiring a @CStringLen@. 1590-- As for @useAsCString@ this function makes a copy of the original @ByteString@. 1591-- It must not be stored or used after the subcomputation finishes. 1592useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a 1593useAsCStringLen p@(PS _ _ l) f = useAsCString p $ \cstr -> f (cstr,l) 1594 1595------------------------------------------------------------------------ 1596 1597-- | /O(n)./ Construct a new @ByteString@ from a @CString@. The 1598-- resulting @ByteString@ is an immutable copy of the original 1599-- @CString@, and is managed on the Haskell heap. The original 1600-- @CString@ must be null terminated. 1601packCString :: CString -> IO ByteString 1602packCString cstr = do 1603 len <- c_strlen cstr 1604 packCStringLen (cstr, fromIntegral len) 1605 1606-- | /O(n)./ Construct a new @ByteString@ from a @CStringLen@. The 1607-- resulting @ByteString@ is an immutable copy of the original @CStringLen@. 1608-- The @ByteString@ is a normal Haskell value and will be managed on the 1609-- Haskell heap. 1610packCStringLen :: CStringLen -> IO ByteString 1611packCStringLen (cstr, len) | len >= 0 = create len $ \p -> 1612 memcpy p (castPtr cstr) (fromIntegral len) 1613packCStringLen (_, len) = 1614 moduleErrorIO "packCStringLen" ("negative length: " ++ show len) 1615 1616------------------------------------------------------------------------ 1617 1618-- | /O(n)/ Make a copy of the 'ByteString' with its own storage. 1619-- This is mainly useful to allow the rest of the data pointed 1620-- to by the 'ByteString' to be garbage collected, for example 1621-- if a large string has been read in, and only a small part of it 1622-- is needed in the rest of the program. 1623-- 1624copy :: ByteString -> ByteString 1625copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> 1626 memcpy p (f `plusPtr` s) (fromIntegral l) 1627 1628-- --------------------------------------------------------------------- 1629-- Line IO 1630 1631-- | Read a line from stdin. 1632getLine :: IO ByteString 1633getLine = hGetLine stdin 1634 1635-- | Read a line from a handle 1636 1637hGetLine :: Handle -> IO ByteString 1638hGetLine h = 1639 wantReadableHandle_ "Data.ByteString.hGetLine" h $ 1640 \ h_@Handle__{haByteBuffer} -> do 1641 flushCharReadBuffer h_ 1642 buf <- readIORef haByteBuffer 1643 if isEmptyBuffer buf 1644 then fill h_ buf 0 [] 1645 else haveBuf h_ buf 0 [] 1646 where 1647 1648 fill h_@Handle__{haByteBuffer,haDevice} buf !len xss = do 1649 (r,buf') <- Buffered.fillReadBuffer haDevice buf 1650 if r == 0 1651 then do writeIORef haByteBuffer buf{ bufR=0, bufL=0 } 1652 if len > 0 1653 then mkBigPS len xss 1654 else ioe_EOF 1655 else haveBuf h_ buf' len xss 1656 1657 haveBuf h_@Handle__{haByteBuffer} 1658 buf@Buffer{ bufRaw=raw, bufR=w, bufL=r } 1659 len xss = 1660 do 1661 off <- findEOL r w raw 1662 let new_len = len + off - r 1663 xs <- mkPS raw r off 1664 1665 -- if eol == True, then off is the offset of the '\n' 1666 -- otherwise off == w and the buffer is now empty. 1667 if off /= w 1668 then do if w == off + 1 1669 then writeIORef haByteBuffer buf{ bufL=0, bufR=0 } 1670 else writeIORef haByteBuffer buf{ bufL = off + 1 } 1671 mkBigPS new_len (xs:xss) 1672 else fill h_ buf{ bufL=0, bufR=0 } new_len (xs:xss) 1673 1674 -- find the end-of-line character, if there is one 1675 findEOL r w raw 1676 | r == w = return w 1677 | otherwise = do 1678 c <- readWord8Buf raw r 1679 if c == fromIntegral (ord '\n') 1680 then return r -- NB. not r+1: don't include the '\n' 1681 else findEOL (r+1) w raw 1682 1683mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString 1684mkPS buf start end = 1685 create len $ \p -> 1686 withRawBuffer buf $ \pbuf -> copyBytes p (pbuf `plusPtr` start) len 1687 where 1688 len = end - start 1689 1690mkBigPS :: Int -> [ByteString] -> IO ByteString 1691mkBigPS _ [ps] = return ps 1692mkBigPS _ pss = return $! concat (P.reverse pss) 1693 1694-- --------------------------------------------------------------------- 1695-- Block IO 1696 1697-- | Outputs a 'ByteString' to the specified 'Handle'. 1698hPut :: Handle -> ByteString -> IO () 1699hPut _ (PS _ _ 0) = return () 1700hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l 1701 1702-- | Similar to 'hPut' except that it will never block. Instead it returns 1703-- any tail that did not get written. This tail may be 'empty' in the case that 1704-- the whole string was written, or the whole original string if nothing was 1705-- written. Partial writes are also possible. 1706-- 1707-- Note: on Windows and with Haskell implementation other than GHC, this 1708-- function does not work correctly; it behaves identically to 'hPut'. 1709-- 1710hPutNonBlocking :: Handle -> ByteString -> IO ByteString 1711hPutNonBlocking h bs@(PS ps s l) = do 1712 bytesWritten <- withForeignPtr ps $ \p-> hPutBufNonBlocking h (p `plusPtr` s) l 1713 return $! drop bytesWritten bs 1714 1715-- | A synonym for @hPut@, for compatibility 1716hPutStr :: Handle -> ByteString -> IO () 1717hPutStr = hPut 1718 1719-- | Write a ByteString to a handle, appending a newline byte 1720hPutStrLn :: Handle -> ByteString -> IO () 1721hPutStrLn h ps 1722 | length ps < 1024 = hPut h (ps `snoc` 0x0a) 1723 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy 1724 1725-- | Write a ByteString to stdout 1726putStr :: ByteString -> IO () 1727putStr = hPut stdout 1728 1729-- | Write a ByteString to stdout, appending a newline byte 1730putStrLn :: ByteString -> IO () 1731putStrLn = hPutStrLn stdout 1732 1733{-# DEPRECATED hPutStrLn 1734 "Use Data.ByteString.Char8.hPutStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)" 1735 #-} 1736{-# DEPRECATED putStrLn 1737 "Use Data.ByteString.Char8.putStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)" 1738 #-} 1739 1740------------------------------------------------------------------------ 1741-- Low level IO 1742 1743-- | Read a 'ByteString' directly from the specified 'Handle'. This 1744-- is far more efficient than reading the characters into a 'String' 1745-- and then using 'pack'. First argument is the Handle to read from, 1746-- and the second is the number of bytes to read. It returns the bytes 1747-- read, up to n, or 'empty' if EOF has been reached. 1748-- 1749-- 'hGet' is implemented in terms of 'hGetBuf'. 1750-- 1751-- If the handle is a pipe or socket, and the writing end 1752-- is closed, 'hGet' will behave as if EOF was reached. 1753-- 1754hGet :: Handle -> Int -> IO ByteString 1755hGet h i 1756 | i > 0 = createAndTrim i $ \p -> hGetBuf h p i 1757 | i == 0 = return empty 1758 | otherwise = illegalBufferSize h "hGet" i 1759 1760-- | hGetNonBlocking is similar to 'hGet', except that it will never block 1761-- waiting for data to become available, instead it returns only whatever data 1762-- is available. If there is no data available to be read, 'hGetNonBlocking' 1763-- returns 'empty'. 1764-- 1765-- Note: on Windows and with Haskell implementation other than GHC, this 1766-- function does not work correctly; it behaves identically to 'hGet'. 1767-- 1768hGetNonBlocking :: Handle -> Int -> IO ByteString 1769hGetNonBlocking h i 1770 | i > 0 = createAndTrim i $ \p -> hGetBufNonBlocking h p i 1771 | i == 0 = return empty 1772 | otherwise = illegalBufferSize h "hGetNonBlocking" i 1773 1774-- | Like 'hGet', except that a shorter 'ByteString' may be returned 1775-- if there are not enough bytes immediately available to satisfy the 1776-- whole request. 'hGetSome' only blocks if there is no data 1777-- available, and EOF has not yet been reached. 1778-- 1779hGetSome :: Handle -> Int -> IO ByteString 1780hGetSome hh i 1781#if MIN_VERSION_base(4,3,0) 1782 | i > 0 = createAndTrim i $ \p -> hGetBufSome hh p i 1783#else 1784 | i > 0 = let 1785 loop = do 1786 s <- hGetNonBlocking hh i 1787 if not (null s) 1788 then return s 1789 else do eof <- hIsEOF hh 1790 if eof then return s 1791 else hWaitForInput hh (-1) >> loop 1792 -- for this to work correctly, the 1793 -- Handle should be in binary mode 1794 -- (see GHC ticket #3808) 1795 in loop 1796#endif 1797 | i == 0 = return empty 1798 | otherwise = illegalBufferSize hh "hGetSome" i 1799 1800illegalBufferSize :: Handle -> String -> Int -> IO a 1801illegalBufferSize handle fn sz = 1802 ioError (mkIOError illegalOperationErrorType msg (Just handle) Nothing) 1803 --TODO: System.IO uses InvalidArgument here, but it's not exported :-( 1804 where 1805 msg = fn ++ ": illegal ByteString size " ++ showsPrec 9 sz [] 1806 1807 1808-- | Read a handle's entire contents strictly into a 'ByteString'. 1809-- 1810-- This function reads chunks at a time, increasing the chunk size on each 1811-- read. The final string is then reallocated to the appropriate size. For 1812-- files > half of available memory, this may lead to memory exhaustion. 1813-- Consider using 'readFile' in this case. 1814-- 1815-- The Handle is closed once the contents have been read, 1816-- or if an exception is thrown. 1817-- 1818hGetContents :: Handle -> IO ByteString 1819hGetContents hnd = do 1820 bs <- hGetContentsSizeHint hnd 1024 2048 1821 `finally` hClose hnd 1822 -- don't waste too much space for small files: 1823 if length bs < 900 1824 then return $! copy bs 1825 else return bs 1826 1827hGetContentsSizeHint :: Handle 1828 -> Int -- ^ first read size 1829 -> Int -- ^ initial buffer size increment 1830 -> IO ByteString 1831hGetContentsSizeHint hnd = 1832 readChunks [] 1833 where 1834 readChunks chunks sz sz' = do 1835 fp <- mallocByteString sz 1836 readcount <- withForeignPtr fp $ \buf -> hGetBuf hnd buf sz 1837 let chunk = PS fp 0 readcount 1838 -- We rely on the hGetBuf behaviour (not hGetBufSome) where it reads up 1839 -- to the size we ask for, or EOF. So short reads indicate EOF. 1840 if readcount < sz && sz > 0 1841 then return $! concat (P.reverse (chunk : chunks)) 1842 else readChunks (chunk : chunks) sz' ((sz+sz') `min` 32752) 1843 -- we grow the buffer sizes, but not too huge 1844 -- we concatenate in the end anyway 1845 1846-- | getContents. Read stdin strictly. Equivalent to hGetContents stdin 1847-- The 'Handle' is closed after the contents have been read. 1848-- 1849getContents :: IO ByteString 1850getContents = hGetContents stdin 1851 1852-- | The interact function takes a function of type @ByteString -> ByteString@ 1853-- as its argument. The entire input from the standard input device is passed 1854-- to this function as its argument, and the resulting string is output on the 1855-- standard output device. 1856-- 1857interact :: (ByteString -> ByteString) -> IO () 1858interact transformer = putStr . transformer =<< getContents 1859 1860-- | Read an entire file strictly into a 'ByteString'. 1861-- 1862readFile :: FilePath -> IO ByteString 1863readFile f = 1864 withBinaryFile f ReadMode $ \h -> do 1865 -- hFileSize fails if file is not regular file (like 1866 -- /dev/null). Catch exception and try reading anyway. 1867 filesz <- catch (hFileSize h) useZeroIfNotRegularFile 1868 let readsz = (fromIntegral filesz `max` 0) + 1 1869 hGetContentsSizeHint h readsz (readsz `max` 255) 1870 -- Our initial size is one bigger than the file size so that in the 1871 -- typical case we will read the whole file in one go and not have 1872 -- to allocate any more chunks. We'll still do the right thing if the 1873 -- file size is 0 or is changed before we do the read. 1874 where 1875 useZeroIfNotRegularFile :: IOException -> IO Integer 1876 useZeroIfNotRegularFile _ = return 0 1877 1878modifyFile :: IOMode -> FilePath -> ByteString -> IO () 1879modifyFile mode f txt = withBinaryFile f mode (`hPut` txt) 1880 1881-- | Write a 'ByteString' to a file. 1882writeFile :: FilePath -> ByteString -> IO () 1883writeFile = modifyFile WriteMode 1884 1885-- | Append a 'ByteString' to a file. 1886appendFile :: FilePath -> ByteString -> IO () 1887appendFile = modifyFile AppendMode 1888 1889-- --------------------------------------------------------------------- 1890-- Internal utilities 1891 1892-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length 1893-- of the string if no element is found, rather than Nothing. 1894findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int 1895findIndexOrEnd k (PS x s l) = 1896 accursedUnutterablePerformIO $ 1897 withForeignPtr x $ \f -> 1898 go (f `plusPtr` s) 0 1899 where 1900 go !ptr !n | n >= l = return l 1901 | otherwise = do w <- peek ptr 1902 if k w 1903 then return n 1904 else go (ptr `plusPtr` 1) (n+1) 1905{-# INLINE findIndexOrEnd #-} 1906 1907-- Common up near identical calls to `error' to reduce the number 1908-- constant strings created when compiled: 1909errorEmptyList :: String -> a 1910errorEmptyList fun = moduleError fun "empty ByteString" 1911{-# NOINLINE errorEmptyList #-} 1912 1913moduleError :: String -> String -> a 1914moduleError fun msg = error (moduleErrorMsg fun msg) 1915{-# NOINLINE moduleError #-} 1916 1917moduleErrorIO :: String -> String -> IO a 1918moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg 1919{-# NOINLINE moduleErrorIO #-} 1920 1921moduleErrorMsg :: String -> String -> String 1922moduleErrorMsg fun msg = "Data.ByteString." ++ fun ++ ':':' ':msg 1923 1924-- Find from the end of the string using predicate 1925findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int 1926findFromEndUntil f ps@(PS x s l) 1927 | null ps = 0 1928 | f (unsafeLast ps) = l 1929 | otherwise = findFromEndUntil f (PS x s (l - 1)) 1930