1{-# LANGUAGE CPP #-}
2{-# LANGUAGE MagicHash, UnboxedTuples,
3            NamedFieldPuns, BangPatterns #-}
4{-# OPTIONS_HADDOCK prune #-}
5#if __GLASGOW_HASKELL__ >= 701
6{-# LANGUAGE Trustworthy #-}
7#endif
8
9-- |
10-- Module      : Data.ByteString
11-- Copyright   : (c) The University of Glasgow 2001,
12--               (c) David Roundy 2003-2005,
13--               (c) Simon Marlow 2005,
14--               (c) Bjorn Bringert 2006,
15--               (c) Don Stewart 2005-2008,
16--               (c) Duncan Coutts 2006-2013
17-- License     : BSD-style
18--
19-- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
20-- Stability   : stable
21-- Portability : portable
22--
23-- A time- and space-efficient implementation of byte vectors using
24-- packed Word8 arrays, suitable for high performance use, both in terms
25-- of large data quantities and high speed requirements. Byte vectors
26-- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27-- and can be passed between C and Haskell with little effort.
28--
29-- The recomended way to assemble ByteStrings from smaller parts
30-- is to use the builder monoid from "Data.ByteString.Builder".
31--
32-- This module is intended to be imported @qualified@, to avoid name
33-- clashes with "Prelude" functions.  eg.
34--
35-- > import qualified Data.ByteString as B
36--
37-- Original GHC implementation by Bryan O\'Sullivan.
38-- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
39-- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
40-- Rewritten again and extended by Don Stewart and Duncan Coutts.
41--
42
43module Data.ByteString (
44
45        -- * The @ByteString@ type
46        ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
47
48        -- * Introducing and eliminating 'ByteString's
49        empty,                  -- :: ByteString
50        singleton,              -- :: Word8   -> ByteString
51        pack,                   -- :: [Word8] -> ByteString
52        unpack,                 -- :: ByteString -> [Word8]
53
54        -- * Basic interface
55        cons,                   -- :: Word8 -> ByteString -> ByteString
56        snoc,                   -- :: ByteString -> Word8 -> ByteString
57        append,                 -- :: ByteString -> ByteString -> ByteString
58        head,                   -- :: ByteString -> Word8
59        uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)
60        unsnoc,                 -- :: ByteString -> Maybe (ByteString, Word8)
61        last,                   -- :: ByteString -> Word8
62        tail,                   -- :: ByteString -> ByteString
63        init,                   -- :: ByteString -> ByteString
64        null,                   -- :: ByteString -> Bool
65        length,                 -- :: ByteString -> Int
66
67        -- * Transforming ByteStrings
68        map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
69        reverse,                -- :: ByteString -> ByteString
70        intersperse,            -- :: Word8 -> ByteString -> ByteString
71        intercalate,            -- :: ByteString -> [ByteString] -> ByteString
72        transpose,              -- :: [ByteString] -> [ByteString]
73
74        -- * Reducing 'ByteString's (folds)
75        foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76        foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
77        foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78        foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79
80        foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
81        foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
82        foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
83        foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
84
85        -- ** Special folds
86        concat,                 -- :: [ByteString] -> ByteString
87        concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
88        any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
89        all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
90        maximum,                -- :: ByteString -> Word8
91        minimum,                -- :: ByteString -> Word8
92
93        -- * Building ByteStrings
94        -- ** Scans
95        scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
96        scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
97        scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
98        scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
99
100        -- ** Accumulating maps
101        mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
102        mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
103
104        -- ** Generating and unfolding ByteStrings
105        replicate,              -- :: Int -> Word8 -> ByteString
106        unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
107        unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
108
109        -- * Substrings
110
111        -- ** Breaking strings
112        take,                   -- :: Int -> ByteString -> ByteString
113        drop,                   -- :: Int -> ByteString -> ByteString
114        splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
115        takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
116        takeWhileEnd,           -- :: (Word8 -> Bool) -> ByteString -> ByteString
117        dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
118        dropWhileEnd,           -- :: (Word8 -> Bool) -> ByteString -> ByteString
119        span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
120        spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
121        break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
122        breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
123        group,                  -- :: ByteString -> [ByteString]
124        groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
125        inits,                  -- :: ByteString -> [ByteString]
126        tails,                  -- :: ByteString -> [ByteString]
127        stripPrefix,            -- :: ByteString -> ByteString -> Maybe ByteString
128        stripSuffix,            -- :: ByteString -> ByteString -> Maybe ByteString
129
130        -- ** Breaking into many substrings
131        split,                  -- :: Word8 -> ByteString -> [ByteString]
132        splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
133
134        -- * Predicates
135        isPrefixOf,             -- :: ByteString -> ByteString -> Bool
136        isSuffixOf,             -- :: ByteString -> ByteString -> Bool
137        isInfixOf,              -- :: ByteString -> ByteString -> Bool
138
139        -- ** Search for arbitrary substrings
140        breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
141        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
142        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
143
144        -- * Searching ByteStrings
145
146        -- ** Searching by equality
147        elem,                   -- :: Word8 -> ByteString -> Bool
148        notElem,                -- :: Word8 -> ByteString -> Bool
149
150        -- ** Searching with a predicate
151        find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
152        filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
153        partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
154
155        -- * Indexing ByteStrings
156        index,                  -- :: ByteString -> Int -> Word8
157        elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
158        elemIndices,            -- :: Word8 -> ByteString -> [Int]
159        elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
160        findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
161        findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
162        findIndexEnd,           -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
163        count,                  -- :: Word8 -> ByteString -> Int
164
165        -- * Zipping and unzipping ByteStrings
166        zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
167        zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
168        unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
169
170        -- * Ordered ByteStrings
171        sort,                   -- :: ByteString -> ByteString
172
173        -- * Low level conversions
174        -- ** Copying ByteStrings
175        copy,                   -- :: ByteString -> ByteString
176
177        -- ** Packing 'CString's and pointers
178        packCString,            -- :: CString -> IO ByteString
179        packCStringLen,         -- :: CStringLen -> IO ByteString
180
181        -- ** Using ByteStrings as 'CString's
182        useAsCString,           -- :: ByteString -> (CString    -> IO a) -> IO a
183        useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
184
185        -- * I\/O with 'ByteString's
186
187        -- ** Standard input and output
188        getLine,                -- :: IO ByteString
189        getContents,            -- :: IO ByteString
190        putStr,                 -- :: ByteString -> IO ()
191        putStrLn,               -- :: ByteString -> IO ()
192        interact,               -- :: (ByteString -> ByteString) -> IO ()
193
194        -- ** Files
195        readFile,               -- :: FilePath -> IO ByteString
196        writeFile,              -- :: FilePath -> ByteString -> IO ()
197        appendFile,             -- :: FilePath -> ByteString -> IO ()
198
199        -- ** I\/O with Handles
200        hGetLine,               -- :: Handle -> IO ByteString
201        hGetContents,           -- :: Handle -> IO ByteString
202        hGet,                   -- :: Handle -> Int -> IO ByteString
203        hGetSome,               -- :: Handle -> Int -> IO ByteString
204        hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
205        hPut,                   -- :: Handle -> ByteString -> IO ()
206        hPutNonBlocking,        -- :: Handle -> ByteString -> IO ByteString
207        hPutStr,                -- :: Handle -> ByteString -> IO ()
208        hPutStrLn,              -- :: Handle -> ByteString -> IO ()
209
210        breakByte
211
212  ) where
213
214import qualified Prelude as P
215import Prelude hiding           (reverse,head,tail,last,init,null
216                                ,length,map,lines,foldl,foldr,unlines
217                                ,concat,any,take,drop,splitAt,takeWhile
218                                ,dropWhile,span,break,elem,filter,maximum
219                                ,minimum,all,concatMap,foldl1,foldr1
220                                ,scanl,scanl1,scanr,scanr1
221                                ,readFile,writeFile,appendFile,replicate
222                                ,getContents,getLine,putStr,putStrLn,interact
223                                ,zip,zipWith,unzip,notElem
224#if !MIN_VERSION_base(4,6,0)
225                                ,catch
226#endif
227                                )
228
229#if MIN_VERSION_base(4,7,0)
230import Data.Bits                (finiteBitSize, shiftL, (.|.), (.&.))
231#else
232import Data.Bits                (bitSize, shiftL, (.|.), (.&.))
233#endif
234
235import Data.ByteString.Internal
236import Data.ByteString.Unsafe
237
238import qualified Data.List as List
239
240import Data.Word                (Word8)
241import Data.Maybe               (isJust)
242
243import Control.Exception        (IOException, catch, finally, assert, throwIO)
244import Control.Monad            (when)
245
246import Foreign.C.String         (CString, CStringLen)
247import Foreign.C.Types          (CSize)
248import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr, touchForeignPtr)
249#if MIN_VERSION_base(4,5,0)
250import Foreign.ForeignPtr.Unsafe(unsafeForeignPtrToPtr)
251#else
252import Foreign.ForeignPtr       (unsafeForeignPtrToPtr)
253#endif
254import Foreign.Marshal.Alloc    (allocaBytes)
255import Foreign.Marshal.Array    (allocaArray)
256import Foreign.Ptr
257import Foreign.Storable         (Storable(..))
258
259-- hGetBuf and hPutBuf not available in yhc or nhc
260import System.IO                (stdin,stdout,hClose,hFileSize
261                                ,hGetBuf,hPutBuf,hGetBufNonBlocking
262                                ,hPutBufNonBlocking,withBinaryFile
263                                ,IOMode(..))
264import System.IO.Error          (mkIOError, illegalOperationErrorType)
265
266#if !(MIN_VERSION_base(4,8,0))
267import Data.Monoid              (Monoid(..))
268#endif
269
270#if MIN_VERSION_base(4,3,0)
271import System.IO                (hGetBufSome)
272#else
273import System.IO                (hWaitForInput, hIsEOF)
274#endif
275
276import Data.IORef
277import GHC.IO.Handle.Internals
278import GHC.IO.Handle.Types
279import GHC.IO.Buffer
280import GHC.IO.BufferedIO as Buffered
281import GHC.IO                   (unsafePerformIO, unsafeDupablePerformIO)
282import Data.Char                (ord)
283import Foreign.Marshal.Utils    (copyBytes)
284
285import GHC.Prim                 (Word#)
286import GHC.Base                 (build)
287import GHC.Word hiding (Word8)
288
289#if !(MIN_VERSION_base(4,7,0))
290finiteBitSize = bitSize
291#endif
292
293-- -----------------------------------------------------------------------------
294-- Introducing and eliminating 'ByteString's
295
296-- | /O(1)/ The empty 'ByteString'
297empty :: ByteString
298empty = PS nullForeignPtr 0 0
299
300-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
301singleton :: Word8 -> ByteString
302singleton c = unsafeCreate 1 $ \p -> poke p c
303{-# INLINE [1] singleton #-}
304
305-- Inline [1] for intercalate rule
306
307--
308-- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical!
309--
310-- Otherwise:
311--
312--  singleton 255 `compare` singleton 127
313--
314-- is compiled to:
315--
316--  case mallocByteString 2 of
317--      ForeignPtr f internals ->
318--           case writeWord8OffAddr# f 0 255 of _ ->
319--           case writeWord8OffAddr# f 0 127 of _ ->
320--           case eqAddr# f f of
321--                  False -> case compare (GHC.Prim.plusAddr# f 0)
322--                                        (GHC.Prim.plusAddr# f 0)
323--
324--
325
326-- | /O(n)/ Convert a @['Word8']@ into a 'ByteString'.
327--
328-- For applications with large numbers of string literals, 'pack' can be a
329-- bottleneck. In such cases, consider using 'unsafePackAddress' (GHC only).
330pack :: [Word8] -> ByteString
331pack = packBytes
332
333-- | /O(n)/ Converts a 'ByteString' to a @['Word8']@.
334unpack :: ByteString -> [Word8]
335unpack bs = build (unpackFoldr bs)
336{-# INLINE unpack #-}
337
338--
339-- Have unpack fuse with good list consumers
340--
341unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
342unpackFoldr bs k z = foldr k z bs
343{-# INLINE [0] unpackFoldr #-}
344
345{-# RULES
346"ByteString unpack-list" [1]  forall bs .
347    unpackFoldr bs (:) [] = unpackBytes bs
348 #-}
349
350-- ---------------------------------------------------------------------
351-- Basic interface
352
353-- | /O(1)/ Test whether a ByteString is empty.
354null :: ByteString -> Bool
355null (PS _ _ l) = assert (l >= 0) $ l <= 0
356{-# INLINE null #-}
357
358-- ---------------------------------------------------------------------
359-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
360length :: ByteString -> Int
361length (PS _ _ l) = assert (l >= 0) l
362{-# INLINE length #-}
363
364------------------------------------------------------------------------
365
366infixr 5 `cons` --same as list (:)
367infixl 5 `snoc`
368
369-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
370-- complexity, as it requires making a copy.
371cons :: Word8 -> ByteString -> ByteString
372cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
373        poke p c
374        memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
375{-# INLINE cons #-}
376
377-- | /O(n)/ Append a byte to the end of a 'ByteString'
378snoc :: ByteString -> Word8 -> ByteString
379snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
380        memcpy p (f `plusPtr` s) (fromIntegral l)
381        poke (p `plusPtr` l) c
382{-# INLINE snoc #-}
383
384-- todo fuse
385
386-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
387-- An exception will be thrown in the case of an empty ByteString.
388head :: ByteString -> Word8
389head (PS x s l)
390    | l <= 0    = errorEmptyList "head"
391    | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
392{-# INLINE head #-}
393
394-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
395-- An exception will be thrown in the case of an empty ByteString.
396tail :: ByteString -> ByteString
397tail (PS p s l)
398    | l <= 0    = errorEmptyList "tail"
399    | otherwise = PS p (s+1) (l-1)
400{-# INLINE tail #-}
401
402-- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
403-- if it is empty.
404uncons :: ByteString -> Maybe (Word8, ByteString)
405uncons (PS x s l)
406    | l <= 0    = Nothing
407    | otherwise = Just (accursedUnutterablePerformIO $ withForeignPtr x
408                                                     $ \p -> peekByteOff p s,
409                        PS x (s+1) (l-1))
410{-# INLINE uncons #-}
411
412-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
413-- An exception will be thrown in the case of an empty ByteString.
414last :: ByteString -> Word8
415last ps@(PS x s l)
416    | null ps   = errorEmptyList "last"
417    | otherwise = accursedUnutterablePerformIO $
418                    withForeignPtr x $ \p -> peekByteOff p (s+l-1)
419{-# INLINE last #-}
420
421-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
422-- An exception will be thrown in the case of an empty ByteString.
423init :: ByteString -> ByteString
424init ps@(PS p s l)
425    | null ps   = errorEmptyList "init"
426    | otherwise = PS p s (l-1)
427{-# INLINE init #-}
428
429-- | /O(1)/ Extract the 'init' and 'last' of a ByteString, returning Nothing
430-- if it is empty.
431unsnoc :: ByteString -> Maybe (ByteString, Word8)
432unsnoc (PS x s l)
433    | l <= 0    = Nothing
434    | otherwise = Just (PS x s (l-1),
435                        accursedUnutterablePerformIO $
436                          withForeignPtr x $ \p -> peekByteOff p (s+l-1))
437{-# INLINE unsnoc #-}
438
439-- | /O(n)/ Append two ByteStrings
440append :: ByteString -> ByteString -> ByteString
441append = mappend
442{-# INLINE append #-}
443
444-- ---------------------------------------------------------------------
445-- Transformations
446
447-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
448-- element of @xs@.
449map :: (Word8 -> Word8) -> ByteString -> ByteString
450map f (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a ->
451    create len $ map_ 0 (a `plusPtr` s)
452  where
453    map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
454    map_ !n !p1 !p2
455       | n >= len = return ()
456       | otherwise = do
457            x <- peekByteOff p1 n
458            pokeByteOff p2 n (f x)
459            map_ (n+1) p1 p2
460{-# INLINE map #-}
461
462-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
463reverse :: ByteString -> ByteString
464reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
465        c_reverse p (f `plusPtr` s) (fromIntegral l)
466
467-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
468-- 'ByteString' and \`intersperses\' that byte between the elements of
469-- the 'ByteString'.  It is analogous to the intersperse function on
470-- Lists.
471intersperse :: Word8 -> ByteString -> ByteString
472intersperse c ps@(PS x s l)
473    | length ps < 2  = ps
474    | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
475        c_intersperse p (f `plusPtr` s) (fromIntegral l) c
476
477-- | The 'transpose' function transposes the rows and columns of its
478-- 'ByteString' argument.
479transpose :: [ByteString] -> [ByteString]
480transpose ps = P.map pack . List.transpose . P.map unpack $ ps
481
482-- ---------------------------------------------------------------------
483-- Reducing 'ByteString's
484
485-- | 'foldl', applied to a binary operator, a starting value (typically
486-- the left-identity of the operator), and a ByteString, reduces the
487-- ByteString using the binary operator, from left to right.
488--
489foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
490foldl f z (PS fp off len) =
491      let p = unsafeForeignPtrToPtr fp
492       in go (p `plusPtr` (off+len-1)) (p `plusPtr` (off-1))
493    where
494      -- not tail recursive; traverses array right to left
495      go !p !q | p == q    = z
496               | otherwise = let !x = accursedUnutterablePerformIO $ do
497                                        x' <- peek p
498                                        touchForeignPtr fp
499                                        return x'
500                             in f (go (p `plusPtr` (-1)) q) x
501{-# INLINE foldl #-}
502
503-- | 'foldl'' is like 'foldl', but strict in the accumulator.
504--
505foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
506foldl' f v (PS fp off len) =
507      accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
508        go v (p `plusPtr` off) (p `plusPtr` (off+len))
509    where
510      -- tail recursive; traverses array left to right
511      go !z !p !q | p == q    = return z
512                  | otherwise = do x <- peek p
513                                   go (f z x) (p `plusPtr` 1) q
514{-# INLINE foldl' #-}
515
516-- | 'foldr', applied to a binary operator, a starting value
517-- (typically the right-identity of the operator), and a ByteString,
518-- reduces the ByteString using the binary operator, from right to left.
519foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
520foldr k z (PS fp off len) =
521      let p = unsafeForeignPtrToPtr fp
522       in go (p `plusPtr` off) (p `plusPtr` (off+len))
523    where
524      -- not tail recursive; traverses array left to right
525      go !p !q | p == q    = z
526               | otherwise = let !x = accursedUnutterablePerformIO $ do
527                                        x' <- peek p
528                                        touchForeignPtr fp
529                                        return x'
530                              in k x (go (p `plusPtr` 1) q)
531{-# INLINE foldr #-}
532
533-- | 'foldr'' is like 'foldr', but strict in the accumulator.
534foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
535foldr' k v (PS fp off len) =
536      accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
537        go v (p `plusPtr` (off+len-1)) (p `plusPtr` (off-1))
538    where
539      -- tail recursive; traverses array right to left
540      go !z !p !q | p == q    = return z
541                  | otherwise = do x <- peek p
542                                   go (k x z) (p `plusPtr` (-1)) q
543{-# INLINE foldr' #-}
544
545-- | 'foldl1' is a variant of 'foldl' that has no starting value
546-- argument, and thus must be applied to non-empty 'ByteString's.
547-- An exception will be thrown in the case of an empty ByteString.
548foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
549foldl1 f ps
550    | null ps   = errorEmptyList "foldl1"
551    | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
552{-# INLINE foldl1 #-}
553
554-- | 'foldl1'' is like 'foldl1', but strict in the accumulator.
555-- An exception will be thrown in the case of an empty ByteString.
556foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
557foldl1' f ps
558    | null ps   = errorEmptyList "foldl1'"
559    | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
560{-# INLINE foldl1' #-}
561
562-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
563-- and thus must be applied to non-empty 'ByteString's
564-- An exception will be thrown in the case of an empty ByteString.
565foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
566foldr1 f ps
567    | null ps        = errorEmptyList "foldr1"
568    | otherwise      = foldr f (unsafeLast ps) (unsafeInit ps)
569{-# INLINE foldr1 #-}
570
571-- | 'foldr1'' is a variant of 'foldr1', but is strict in the
572-- accumulator.
573foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
574foldr1' f ps
575    | null ps        = errorEmptyList "foldr1"
576    | otherwise      = foldr' f (unsafeLast ps) (unsafeInit ps)
577{-# INLINE foldr1' #-}
578
579-- ---------------------------------------------------------------------
580-- Special folds
581
582-- | /O(n)/ Concatenate a list of ByteStrings.
583concat :: [ByteString] -> ByteString
584concat = mconcat
585
586-- | Map a function over a 'ByteString' and concatenate the results
587concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
588concatMap f = concat . foldr ((:) . f) []
589
590-- foldr (append . f) empty
591
592-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
593-- any element of the 'ByteString' satisfies the predicate.
594any :: (Word8 -> Bool) -> ByteString -> Bool
595any _ (PS _ _ 0) = False
596any f (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ptr ->
597        go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
598    where
599        go !p !q | p == q    = return False
600                 | otherwise = do c <- peek p
601                                  if f c then return True
602                                         else go (p `plusPtr` 1) q
603{-# INLINE any #-}
604
605-- todo fuse
606
607-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
608-- if all elements of the 'ByteString' satisfy the predicate.
609all :: (Word8 -> Bool) -> ByteString -> Bool
610all _ (PS _ _ 0) = True
611all f (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ptr ->
612        go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
613    where
614        go !p !q | p == q     = return True  -- end of list
615                 | otherwise  = do c <- peek p
616                                   if f c
617                                      then go (p `plusPtr` 1) q
618                                      else return False
619{-# INLINE all #-}
620
621------------------------------------------------------------------------
622
623-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
624-- This function will fuse.
625-- An exception will be thrown in the case of an empty ByteString.
626maximum :: ByteString -> Word8
627maximum xs@(PS x s l)
628    | null xs   = errorEmptyList "maximum"
629    | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
630                      c_maximum (p `plusPtr` s) (fromIntegral l)
631{-# INLINE maximum #-}
632
633-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
634-- This function will fuse.
635-- An exception will be thrown in the case of an empty ByteString.
636minimum :: ByteString -> Word8
637minimum xs@(PS x s l)
638    | null xs   = errorEmptyList "minimum"
639    | otherwise = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
640                      c_minimum (p `plusPtr` s) (fromIntegral l)
641{-# INLINE minimum #-}
642
643------------------------------------------------------------------------
644
645-- | The 'mapAccumL' function behaves like a combination of 'map' and
646-- 'foldl'; it applies a function to each element of a ByteString,
647-- passing an accumulating parameter from left to right, and returning a
648-- final value of this accumulator together with the new list.
649mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
650mapAccumL f acc (PS fp o len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> do
651    gp   <- mallocByteString len
652    acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p
653    return (acc', PS gp 0 len)
654  where
655    mapAccumL_ !s !n !p1 !p2
656       | n >= len = return s
657       | otherwise = do
658            x <- peekByteOff p1 n
659            let (s', y) = f s x
660            pokeByteOff p2 n y
661            mapAccumL_ s' (n+1) p1 p2
662{-# INLINE mapAccumL #-}
663
664-- | The 'mapAccumR' function behaves like a combination of 'map' and
665-- 'foldr'; it applies a function to each element of a ByteString,
666-- passing an accumulating parameter from right to left, and returning a
667-- final value of this accumulator together with the new ByteString.
668mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
669mapAccumR f acc (PS fp o len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a -> do
670    gp   <- mallocByteString len
671    acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p
672    return $! (acc', PS gp 0 len)
673  where
674    mapAccumR_ !s !n !p !q
675       | n <  0    = return s
676       | otherwise = do
677            x  <- peekByteOff p n
678            let (s', y) = f s x
679            pokeByteOff q n y
680            mapAccumR_ s' (n-1) p q
681{-# INLINE mapAccumR #-}
682
683-- ---------------------------------------------------------------------
684-- Building ByteStrings
685
686-- | 'scanl' is similar to 'foldl', but returns a list of successive
687-- reduced values from the left. This function will fuse.
688--
689-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
690--
691-- Note that
692--
693-- > last (scanl f z xs) == foldl f z xs.
694--
695scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
696
697scanl f v (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a ->
698    create (len+1) $ \q -> do
699        poke q v
700        scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1)
701  where
702    scanl_ !z !n !p !q
703        | n >= len  = return ()
704        | otherwise = do
705            x <- peekByteOff p n
706            let z' = f z x
707            pokeByteOff q n z'
708            scanl_ z' (n+1) p q
709{-# INLINE scanl #-}
710
711    -- n.b. haskell's List scan returns a list one bigger than the
712    -- input, so we need to snoc here to get some extra space, however,
713    -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
714
715-- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
716-- This function will fuse.
717--
718-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
719scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
720scanl1 f ps
721    | null ps   = empty
722    | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
723{-# INLINE scanl1 #-}
724
725-- | scanr is the right-to-left dual of scanl.
726scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
727scanr f v (PS fp s len) = unsafeDupablePerformIO $ withForeignPtr fp $ \a ->
728    create (len+1) $ \q -> do
729        poke (q `plusPtr` len) v
730        scanr_ v (len-1) (a `plusPtr` s) q
731  where
732    scanr_ !z !n !p !q
733        | n <  0    = return ()
734        | otherwise = do
735            x <- peekByteOff p n
736            let z' = f x z
737            pokeByteOff q n z'
738            scanr_ z' (n-1) p q
739{-# INLINE scanr #-}
740
741-- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
742scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
743scanr1 f ps
744    | null ps   = empty
745    | otherwise = scanr f (unsafeLast ps) (unsafeInit ps)
746{-# INLINE scanr1 #-}
747
748-- ---------------------------------------------------------------------
749-- Unfolds and replicates
750
751-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
752-- the value of every element. The following holds:
753--
754-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
755--
756-- This implemenation uses @memset(3)@
757replicate :: Int -> Word8 -> ByteString
758replicate w c
759    | w <= 0    = empty
760    | otherwise = unsafeCreate w $ \ptr ->
761                      memset ptr c (fromIntegral w) >> return ()
762
763-- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
764-- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
765-- ByteString from a seed value.  The function takes the element and
766-- returns 'Nothing' if it is done producing the ByteString or returns
767-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
768-- and @b@ is the seed value for further production.
769--
770-- Examples:
771--
772-- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
773-- > == pack [0, 1, 2, 3, 4, 5]
774--
775unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
776unfoldr f = concat . unfoldChunk 32 64
777  where unfoldChunk n n' x =
778          case unfoldrN n f x of
779            (s, Nothing) -> s : []
780            (s, Just x') -> s : unfoldChunk n' (n+n') x'
781{-# INLINE unfoldr #-}
782
783-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
784-- value.  However, the length of the result is limited by the first
785-- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
786-- when the maximum length of the result is known.
787--
788-- The following equation relates 'unfoldrN' and 'unfoldr':
789--
790-- > fst (unfoldrN n f s) == take n (unfoldr f s)
791--
792unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
793unfoldrN i f x0
794    | i < 0     = (empty, Just x0)
795    | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
796  where
797    go !p !x !n
798      | n == i    = return (0, n, Just x)
799      | otherwise = case f x of
800                      Nothing     -> return (0, n, Nothing)
801                      Just (w,x') -> do poke p w
802                                        go (p `plusPtr` 1) x' (n+1)
803{-# INLINE unfoldrN #-}
804
805-- ---------------------------------------------------------------------
806-- Substrings
807
808-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
809-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
810take :: Int -> ByteString -> ByteString
811take n ps@(PS x s l)
812    | n <= 0    = empty
813    | n >= l    = ps
814    | otherwise = PS x s n
815{-# INLINE take #-}
816
817-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
818-- elements, or @[]@ if @n > 'length' xs@.
819drop  :: Int -> ByteString -> ByteString
820drop n ps@(PS x s l)
821    | n <= 0    = ps
822    | n >= l    = empty
823    | otherwise = PS x (s+n) (l-n)
824{-# INLINE drop #-}
825
826-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
827splitAt :: Int -> ByteString -> (ByteString, ByteString)
828splitAt n ps@(PS x s l)
829    | n <= 0    = (empty, ps)
830    | n >= l    = (ps, empty)
831    | otherwise = (PS x s n, PS x (s+n) (l-n))
832{-# INLINE splitAt #-}
833
834-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
835-- returns the longest prefix (possibly empty) of @xs@ of elements that
836-- satisfy @p@.
837takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
838takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
839{-# INLINE takeWhile #-}
840
841-- | 'takeWhileEnd', applied to a predicate @p@ and a ByteString @xs@, returns
842-- the longest suffix (possibly empty) of @xs@ of elements that satisfy @p@.
843--
844-- @since 0.10.12.0
845takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
846takeWhileEnd f ps = unsafeDrop (findFromEndUntil (not . f) ps) ps
847{-# INLINE takeWhileEnd #-}
848
849-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
850dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
851dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
852{-# INLINE dropWhile #-}
853
854-- | 'dropWhileEnd' @p xs@ returns the prefix remaining after 'takeWhileEnd' @p
855-- xs@.
856--
857-- @since 0.10.12.0
858dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
859dropWhileEnd f ps = unsafeTake (findFromEndUntil (not . f) ps) ps
860{-# INLINE dropWhileEnd #-}
861
862-- instead of findIndexOrEnd, we could use memchr here.
863
864-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
865--
866-- Under GHC, a rewrite rule will transform break (==) into a
867-- call to the specialised breakByte:
868--
869-- > break ((==) x) = breakByte x
870-- > break (==x) = breakByte x
871--
872break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
873break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
874{-# INLINE [1] break #-}
875
876-- See bytestring #70
877#if MIN_VERSION_base(4,9,0)
878{-# RULES
879"ByteString specialise break (x ==)" forall x.
880    break (x `eqWord8`) = breakByte x
881"ByteString specialise break (== x)" forall x.
882    break (`eqWord8` x) = breakByte x
883  #-}
884#else
885{-# RULES
886"ByteString specialise break (x ==)" forall x.
887    break (x ==) = breakByte x
888"ByteString specialise break (== x)" forall x.
889    break (== x) = breakByte x
890  #-}
891#endif
892
893-- INTERNAL:
894
895-- | 'breakByte' breaks its ByteString argument at the first occurence
896-- of the specified byte. It is more efficient than 'break' as it is
897-- implemented with @memchr(3)@. I.e.
898--
899-- > break (==99) "abcd" == breakByte 99 "abcd" -- fromEnum 'c' == 99
900--
901breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
902breakByte c p = case elemIndex c p of
903    Nothing -> (p,empty)
904    Just n  -> (unsafeTake n p, unsafeDrop n p)
905{-# INLINE breakByte #-}
906{-# DEPRECATED breakByte "It is an internal function and should never have been exported. Use 'break (== x)' instead. (There are rewrite rules that handle this special case of 'break'.)" #-}
907
908-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
909--
910-- breakEnd p == spanEnd (not.p)
911breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
912breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
913
914-- | 'span' @p xs@ breaks the ByteString into two segments. It is
915-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
916span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
917span p ps = break (not . p) ps
918{-# INLINE [1] span #-}
919
920-- | 'spanByte' breaks its ByteString argument at the first
921-- occurence of a byte other than its argument. It is more efficient
922-- than 'span (==)'
923--
924-- > span  (==99) "abcd" == spanByte 99 "abcd" -- fromEnum 'c' == 99
925--
926spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
927spanByte c ps@(PS x s l) =
928    accursedUnutterablePerformIO $
929      withForeignPtr x $ \p ->
930        go (p `plusPtr` s) 0
931  where
932    go !p !i | i >= l    = return (ps, empty)
933             | otherwise = do c' <- peekByteOff p i
934                              if c /= c'
935                                  then return (unsafeTake i ps, unsafeDrop i ps)
936                                  else go p (i+1)
937{-# INLINE spanByte #-}
938
939-- See bytestring #70
940#if MIN_VERSION_base(4,9,0)
941{-# RULES
942"ByteString specialise span (x ==)" forall x.
943    span (x `eqWord8`) = spanByte x
944"ByteString specialise span (== x)" forall x.
945    span (`eqWord8` x) = spanByte x
946  #-}
947#else
948{-# RULES
949"ByteString specialise span (x ==)" forall x.
950    span (x ==) = spanByte x
951"ByteString specialise span (== x)" forall x.
952    span (== x) = spanByte x
953  #-}
954#endif
955
956-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
957-- We have
958--
959-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
960--
961-- and
962--
963-- > spanEnd (not . isSpace) ps
964-- >    ==
965-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
966--
967spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
968spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
969
970-- | /O(n)/ Splits a 'ByteString' into components delimited by
971-- separators, where the predicate returns True for a separator element.
972-- The resulting components do not contain the separators.  Two adjacent
973-- separators result in an empty component in the output.  eg.
974--
975-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97
976-- > splitWith (==97) []        == []
977--
978splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
979splitWith _pred (PS _  _   0) = []
980splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
981  where pred# c# = pred_ (W8# c#)
982
983        splitWith0 !pred' !off' !len' !fp' =
984          accursedUnutterablePerformIO $
985            withForeignPtr fp $ \p ->
986              splitLoop pred' p 0 off' len' fp'
987
988        splitLoop :: (Word# -> Bool)
989                  -> Ptr Word8
990                  -> Int -> Int -> Int
991                  -> ForeignPtr Word8
992                  -> IO [ByteString]
993
994        splitLoop pred' p idx' off' len' fp'
995            | idx' >= len'  = return [PS fp' off' idx']
996            | otherwise = do
997                w <- peekElemOff p (off'+idx')
998                if pred' (case w of W8# w# -> w#)
999                   then return (PS fp' off' idx' :
1000                              splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1001                   else splitLoop pred' p (idx'+1) off' len' fp'
1002{-# INLINE splitWith #-}
1003
1004-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1005-- argument, consuming the delimiter. I.e.
1006--
1007-- > split 10  "a\nb\nd\ne" == ["a","b","d","e"]   -- fromEnum '\n' == 10
1008-- > split 97  "aXaXaXa"    == ["","X","X","X",""] -- fromEnum 'a' == 97
1009-- > split 120 "x"          == ["",""]             -- fromEnum 'x' == 120
1010--
1011-- and
1012--
1013-- > intercalate [c] . split c == id
1014-- > split == splitWith . (==)
1015--
1016-- As for all splitting functions in this library, this function does
1017-- not copy the substrings, it just constructs new 'ByteString's that
1018-- are slices of the original.
1019--
1020split :: Word8 -> ByteString -> [ByteString]
1021split _ (PS _ _ 0) = []
1022split w (PS x s l) = loop 0
1023    where
1024        loop !n =
1025            let q = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
1026                      memchr (p `plusPtr` (s+n))
1027                             w (fromIntegral (l-n))
1028            in if q == nullPtr
1029                then [PS x (s+n) (l-n)]
1030                else let i = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
1031                               return (q `minusPtr` (p `plusPtr` s))
1032                      in PS x (s+n) (i-n) : loop (i+1)
1033
1034{-# INLINE split #-}
1035
1036
1037-- | The 'group' function takes a ByteString and returns a list of
1038-- ByteStrings such that the concatenation of the result is equal to the
1039-- argument.  Moreover, each sublist in the result contains only equal
1040-- elements.  For example,
1041--
1042-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1043--
1044-- It is a special case of 'groupBy', which allows the programmer to
1045-- supply their own equality test. It is about 40% faster than
1046-- /groupBy (==)/
1047group :: ByteString -> [ByteString]
1048group xs
1049    | null xs   = []
1050    | otherwise = ys : group zs
1051    where
1052        (ys, zs) = spanByte (unsafeHead xs) xs
1053
1054-- | The 'groupBy' function is the non-overloaded version of 'group'.
1055groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1056groupBy k xs
1057    | null xs   = []
1058    | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1059    where
1060        n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1061
1062-- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
1063-- 'ByteString's and concatenates the list after interspersing the first
1064-- argument between each element of the list.
1065intercalate :: ByteString -> [ByteString] -> ByteString
1066intercalate s = concat . List.intersperse s
1067{-# INLINE [1] intercalate #-}
1068
1069{-# RULES
1070"ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
1071    intercalate (singleton c) [s1, s2] = intercalateWithByte c s1 s2
1072  #-}
1073
1074-- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
1075-- with a char. Around 4 times faster than the generalised join.
1076--
1077intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1078intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1079    withForeignPtr ffp $ \fp ->
1080    withForeignPtr fgp $ \gp -> do
1081        memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1082        poke (ptr `plusPtr` l) c
1083        memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1084    where
1085      len = length f + length g + 1
1086{-# INLINE intercalateWithByte #-}
1087
1088-- ---------------------------------------------------------------------
1089-- Indexing ByteStrings
1090
1091-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1092index :: ByteString -> Int -> Word8
1093index ps n
1094    | n < 0          = moduleError "index" ("negative index: " ++ show n)
1095    | n >= length ps = moduleError "index" ("index too large: " ++ show n
1096                                         ++ ", length = " ++ show (length ps))
1097    | otherwise      = ps `unsafeIndex` n
1098{-# INLINE index #-}
1099
1100-- | /O(n)/ The 'elemIndex' function returns the index of the first
1101-- element in the given 'ByteString' which is equal to the query
1102-- element, or 'Nothing' if there is no such element.
1103-- This implementation uses memchr(3).
1104elemIndex :: Word8 -> ByteString -> Maybe Int
1105elemIndex c (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \p -> do
1106    let p' = p `plusPtr` s
1107    q <- memchr p' c (fromIntegral l)
1108    return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1109{-# INLINE elemIndex #-}
1110
1111-- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1112-- element in the given 'ByteString' which is equal to the query
1113-- element, or 'Nothing' if there is no such element. The following
1114-- holds:
1115--
1116-- > elemIndexEnd c xs ==
1117-- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1118--
1119elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1120elemIndexEnd = findIndexEnd . (==)
1121{-# INLINE elemIndexEnd #-}
1122
1123-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1124-- the indices of all elements equal to the query element, in ascending order.
1125-- This implementation uses memchr(3).
1126elemIndices :: Word8 -> ByteString -> [Int]
1127elemIndices w (PS x s l) = loop 0
1128    where
1129        loop !n = let q = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
1130                           memchr (p `plusPtr` (n+s))
1131                                                w (fromIntegral (l - n))
1132                  in if q == nullPtr
1133                        then []
1134                        else let i = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
1135                                       return (q `minusPtr` (p `plusPtr` s))
1136                             in i : loop (i+1)
1137{-# INLINE elemIndices #-}
1138
1139-- | count returns the number of times its argument appears in the ByteString
1140--
1141-- > count = length . elemIndices
1142--
1143-- But more efficiently than using length on the intermediate list.
1144count :: Word8 -> ByteString -> Int
1145count w (PS x s m) = accursedUnutterablePerformIO $ withForeignPtr x $ \p ->
1146    fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1147{-# INLINE count #-}
1148
1149-- | /O(n)/ The 'findIndex' function takes a predicate and a 'ByteString' and
1150-- returns the index of the first element in the ByteString
1151-- satisfying the predicate.
1152findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1153findIndex k (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1154  where
1155    go !ptr !n | n >= l    = return Nothing
1156               | otherwise = do w <- peek ptr
1157                                if k w
1158                                  then return (Just n)
1159                                  else go (ptr `plusPtr` 1) (n+1)
1160{-# INLINE findIndex #-}
1161
1162-- | /O(n)/ The 'findIndexEnd' function takes a predicate and a 'ByteString' and
1163-- returns the index of the last element in the ByteString
1164-- satisfying the predicate.
1165--
1166-- @since 0.10.12.0
1167findIndexEnd :: (Word8 -> Bool) -> ByteString -> Maybe Int
1168findIndexEnd k (PS x s l) = accursedUnutterablePerformIO $ withForeignPtr x $ \ f -> go (f `plusPtr` s) (l-1)
1169  where
1170    go !ptr !n | n < 0     = return Nothing
1171               | otherwise = do w <- peekByteOff ptr n
1172                                if k w
1173                                  then return (Just n)
1174                                  else go ptr (n-1)
1175{-# INLINE findIndexEnd #-}
1176
1177-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the
1178-- indices of all elements satisfying the predicate, in ascending order.
1179findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1180findIndices p ps = loop 0 ps
1181   where
1182     loop !n !qs | null qs           = []
1183                 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1184                 | otherwise         =     loop (n+1) (unsafeTail qs)
1185
1186-- ---------------------------------------------------------------------
1187-- Searching ByteStrings
1188
1189-- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1190elem :: Word8 -> ByteString -> Bool
1191elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1192{-# INLINE elem #-}
1193
1194-- | /O(n)/ 'notElem' is the inverse of 'elem'
1195notElem :: Word8 -> ByteString -> Bool
1196notElem c ps = not (elem c ps)
1197{-# INLINE notElem #-}
1198
1199-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1200-- returns a ByteString containing those characters that satisfy the
1201-- predicate.
1202filter :: (Word8 -> Bool) -> ByteString -> ByteString
1203filter k ps@(PS x s l)
1204    | null ps   = ps
1205    | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1206        t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1207        return $! t `minusPtr` p -- actual length
1208    where
1209        go !f !t !end | f == end  = return t
1210                      | otherwise = do
1211                          w <- peek f
1212                          if k w
1213                            then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1214                            else             go (f `plusPtr` 1) t               end
1215{-# INLINE filter #-}
1216
1217{-
1218--
1219-- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1220-- case of filtering a single byte. It is more efficient to use
1221-- /filterByte/ in this case.
1222--
1223-- > filterByte == filter . (==)
1224--
1225-- filterByte is around 10x faster, and uses much less space, than its
1226-- filter equivalent
1227--
1228filterByte :: Word8 -> ByteString -> ByteString
1229filterByte w ps = replicate (count w ps) w
1230{-# INLINE filterByte #-}
1231
1232{-# RULES
1233"ByteString specialise filter (== x)" forall x.
1234    filter ((==) x) = filterByte x
1235"ByteString specialise filter (== x)" forall x.
1236    filter (== x) = filterByte x
1237  #-}
1238-}
1239
1240-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1241-- and returns the first element in matching the predicate, or 'Nothing'
1242-- if there is no such element.
1243--
1244-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1245--
1246find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1247find f p = case findIndex f p of
1248                    Just n -> Just (p `unsafeIndex` n)
1249                    _      -> Nothing
1250{-# INLINE find #-}
1251
1252-- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
1253-- the pair of ByteStrings with elements which do and do not satisfy the
1254-- predicate, respectively; i.e.,
1255--
1256-- > partition p bs == (filter p xs, filter (not . p) xs)
1257--
1258partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1259partition f s = unsafeDupablePerformIO $
1260    do fp' <- mallocByteString len
1261       withForeignPtr fp' $ \p ->
1262           do let end = p `plusPtr` (len - 1)
1263              mid <- sep 0 p end
1264              rev mid end
1265              let i = mid `minusPtr` p
1266              return (PS fp' 0 i,
1267                      PS fp' i (len - i))
1268  where
1269    len  = length s
1270    incr = (`plusPtr` 1)
1271    decr = (`plusPtr` (-1))
1272
1273    sep !i !p1 !p2
1274       | i == len  = return p1
1275       | f w       = do poke p1 w
1276                        sep (i + 1) (incr p1) p2
1277       | otherwise = do poke p2 w
1278                        sep (i + 1) p1 (decr p2)
1279      where
1280        w = s `unsafeIndex` i
1281
1282    rev !p1 !p2
1283      | p1 >= p2  = return ()
1284      | otherwise = do a <- peek p1
1285                       b <- peek p2
1286                       poke p1 b
1287                       poke p2 a
1288                       rev (incr p1) (decr p2)
1289
1290-- --------------------------------------------------------------------
1291-- Sarching for substrings
1292
1293-- |/O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1294-- if the first is a prefix of the second.
1295isPrefixOf :: ByteString -> ByteString -> Bool
1296isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1297    | l1 == 0   = True
1298    | l2 < l1   = False
1299    | otherwise = accursedUnutterablePerformIO $ withForeignPtr x1 $ \p1 ->
1300        withForeignPtr x2 $ \p2 -> do
1301            i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1302            return $! i == 0
1303
1304-- | /O(n)/ The 'stripPrefix' function takes two ByteStrings and returns 'Just'
1305-- the remainder of the second iff the first is its prefix, and otherwise
1306-- 'Nothing'.
1307--
1308-- @since 0.10.8.0
1309stripPrefix :: ByteString -> ByteString -> Maybe ByteString
1310stripPrefix bs1@(PS _ _ l1) bs2
1311   | bs1 `isPrefixOf` bs2 = Just (unsafeDrop l1 bs2)
1312   | otherwise = Nothing
1313
1314-- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1315-- iff the first is a suffix of the second.
1316--
1317-- The following holds:
1318--
1319-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1320--
1321-- However, the real implemenation uses memcmp to compare the end of the
1322-- string only, with no reverse required..
1323isSuffixOf :: ByteString -> ByteString -> Bool
1324isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1325    | l1 == 0   = True
1326    | l2 < l1   = False
1327    | otherwise = accursedUnutterablePerformIO $ withForeignPtr x1 $ \p1 ->
1328        withForeignPtr x2 $ \p2 -> do
1329            i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1330            return $! i == 0
1331
1332-- | /O(n)/ The 'stripSuffix' function takes two ByteStrings and returns 'Just'
1333-- the remainder of the second iff the first is its suffix, and otherwise
1334-- 'Nothing'.
1335stripSuffix :: ByteString -> ByteString -> Maybe ByteString
1336stripSuffix bs1@(PS _ _ l1) bs2@(PS _ _ l2)
1337   | bs1 `isSuffixOf` bs2 = Just (unsafeTake (l2 - l1) bs2)
1338   | otherwise = Nothing
1339
1340-- | Check whether one string is a substring of another. @isInfixOf
1341-- p s@ is equivalent to @not (null (findSubstrings p s))@.
1342isInfixOf :: ByteString -> ByteString -> Bool
1343isInfixOf p s = isJust (findSubstring p s)
1344
1345-- | Break a string on a substring, returning a pair of the part of the
1346-- string prior to the match, and the rest of the string.
1347--
1348-- The following relationships hold:
1349--
1350-- > break (== c) l == breakSubstring (singleton c) l
1351--
1352-- and:
1353--
1354-- > findSubstring s l ==
1355-- >    if null s then Just 0
1356-- >              else case breakSubstring s l of
1357-- >                       (x,y) | null y    -> Nothing
1358-- >                             | otherwise -> Just (length x)
1359--
1360-- For example, to tokenise a string, dropping delimiters:
1361--
1362-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
1363-- >     where (h,t) = breakSubstring x y
1364--
1365-- To skip to the first occurence of a string:
1366--
1367-- > snd (breakSubstring x y)
1368--
1369-- To take the parts of a string before a delimiter:
1370--
1371-- > fst (breakSubstring x y)
1372--
1373-- Note that calling `breakSubstring x` does some preprocessing work, so
1374-- you should avoid unnecessarily duplicating breakSubstring calls with the same
1375-- pattern.
1376--
1377breakSubstring :: ByteString -- ^ String to search for
1378               -> ByteString -- ^ String to search in
1379               -> (ByteString,ByteString) -- ^ Head and tail of string broken at substring
1380breakSubstring pat =
1381  case lp of
1382    0 -> \src -> (empty,src)
1383    1 -> breakByte (unsafeHead pat)
1384    _ -> if lp * 8 <= finiteBitSize (0 :: Word)
1385             then shift
1386             else karpRabin
1387  where
1388    unsafeSplitAt i s = (unsafeTake i s, unsafeDrop i s)
1389    lp                = length pat
1390    karpRabin :: ByteString -> (ByteString, ByteString)
1391    karpRabin src
1392        | length src < lp = (src,empty)
1393        | otherwise = search (rollingHash $ unsafeTake lp src) lp
1394      where
1395        k           = 2891336453 :: Word32
1396        rollingHash = foldl' (\h b -> h * k + fromIntegral b) 0
1397        hp          = rollingHash pat
1398        m           = k ^ lp
1399        get = fromIntegral . unsafeIndex src
1400        search !hs !i
1401            | hp == hs && pat == unsafeTake lp b = u
1402            | length src <= i                    = (src,empty) -- not found
1403            | otherwise                          = search hs' (i + 1)
1404          where
1405            u@(_, b) = unsafeSplitAt (i - lp) src
1406            hs' = hs * k +
1407                  get i -
1408                  m * get (i - lp)
1409    {-# INLINE karpRabin #-}
1410
1411    shift :: ByteString -> (ByteString, ByteString)
1412    shift !src
1413        | length src < lp = (src,empty)
1414        | otherwise       = search (intoWord $ unsafeTake lp src) lp
1415      where
1416        intoWord :: ByteString -> Word
1417        intoWord = foldl' (\w b -> (w `shiftL` 8) .|. fromIntegral b) 0
1418        wp   = intoWord pat
1419        mask = (1 `shiftL` (8 * lp)) - 1
1420        search !w !i
1421            | w == wp         = unsafeSplitAt (i - lp) src
1422            | length src <= i = (src, empty)
1423            | otherwise       = search w' (i + 1)
1424          where
1425            b  = fromIntegral (unsafeIndex src i)
1426            w' = mask .&. ((w `shiftL` 8) .|. b)
1427    {-# INLINE shift #-}
1428
1429-- | Get the first index of a substring in another string,
1430--   or 'Nothing' if the string is not found.
1431--   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1432findSubstring :: ByteString -- ^ String to search for.
1433              -> ByteString -- ^ String to seach in.
1434              -> Maybe Int
1435findSubstring pat src
1436    | null pat && null src = Just 0
1437    | null b = Nothing
1438    | otherwise = Just (length a)
1439  where (a, b) = breakSubstring pat src
1440
1441{-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-}
1442
1443-- | Find the indices of all non-overlapping occurences of a substring in a
1444-- string.
1445--
1446-- Note, prior to @0.10.6.0@ this function returned the indices of all
1447-- possibly-overlapping matches.
1448findSubstrings :: ByteString -- ^ String to search for.
1449               -> ByteString -- ^ String to seach in.
1450               -> [Int]
1451findSubstrings pat src
1452    | null pat        = [0 .. ls]
1453    | otherwise       = search 0
1454  where
1455    lp = length pat
1456    ls = length src
1457    search !n
1458        | (n > ls - lp) || null b = []
1459        | otherwise = let k = n + length a
1460                      in  k : search (k + lp)
1461      where
1462        (a, b) = breakSubstring pat (unsafeDrop n src)
1463
1464-- In
1465-- [0.10.6.0](<https://github.com/haskell/bytestring/commit/2160e091e215fecc9177d55a37cd50fc253ba86a?w=1>)
1466-- 'findSubstrings' was refactored to call an improved 'breakString'
1467-- implementation, but the refactored code no longer matches overlapping
1468-- strings.  The behaviour change appears to be inadvertent, but the function
1469-- had already been deprecated for more than seven years.  At this time
1470-- (@0.10.10.1@), the deprecation was twelve years in the past.
1471--
1472{-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-}
1473
1474-- ---------------------------------------------------------------------
1475-- Zipping
1476
1477-- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1478-- corresponding pairs of bytes. If one input ByteString is short,
1479-- excess elements of the longer ByteString are discarded. This is
1480-- equivalent to a pair of 'unpack' operations.
1481zip :: ByteString -> ByteString -> [(Word8,Word8)]
1482zip ps qs
1483    | null ps || null qs = []
1484    | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1485
1486-- | 'zipWith' generalises 'zip' by zipping with the function given as
1487-- the first argument, instead of a tupling function.  For example,
1488-- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1489-- corresponding sums.
1490zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1491zipWith f ps qs
1492    | null ps || null qs = []
1493    | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1494{-# NOINLINE [1] zipWith #-}
1495
1496--
1497-- | A specialised version of zipWith for the common case of a
1498-- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1499-- are used to automatically covert zipWith into zipWith' when a pack is
1500-- performed on the result of zipWith.
1501--
1502zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1503zipWith' f (PS fp s l) (PS fq t m) = unsafeDupablePerformIO $
1504    withForeignPtr fp $ \a ->
1505    withForeignPtr fq $ \b ->
1506    create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1507  where
1508    zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1509    zipWith_ !n !p1 !p2 !r
1510       | n >= len = return ()
1511       | otherwise = do
1512            x <- peekByteOff p1 n
1513            y <- peekByteOff p2 n
1514            pokeByteOff r n (f x y)
1515            zipWith_ (n+1) p1 p2 r
1516
1517    len = min l m
1518{-# INLINE zipWith' #-}
1519
1520{-# RULES
1521"ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1522    zipWith f p q = unpack (zipWith' f p q)
1523  #-}
1524
1525-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1526-- ByteStrings. Note that this performs two 'pack' operations.
1527unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1528unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1529{-# INLINE unzip #-}
1530
1531-- ---------------------------------------------------------------------
1532-- Special lists
1533
1534-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1535inits :: ByteString -> [ByteString]
1536inits (PS x s l) = [PS x s n | n <- [0..l]]
1537
1538-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1539tails :: ByteString -> [ByteString]
1540tails p | null p    = [empty]
1541        | otherwise = p : tails (unsafeTail p)
1542
1543-- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1544
1545-- ---------------------------------------------------------------------
1546-- ** Ordered 'ByteString's
1547
1548-- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1549sort :: ByteString -> ByteString
1550sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1551
1552    _ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1553    withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1554
1555    let go 256 !_   = return ()
1556        go i   !ptr = do n <- peekElemOff arr i
1557                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1558                         go (i + 1) (ptr `plusPtr` fromIntegral n)
1559    go 0 p
1560  where
1561    -- | Count the number of occurrences of each byte.
1562    -- Used by 'sort'
1563    --
1564    countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1565    countOccurrences !counts !str !len = go 0
1566     where
1567        go !i | i == len    = return ()
1568              | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1569                               x <- peekElemOff counts k
1570                               pokeElemOff counts k (x + 1)
1571                               go (i + 1)
1572
1573
1574-- ---------------------------------------------------------------------
1575-- Low level constructors
1576
1577-- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1578-- null-terminated @CString@.  The @CString@ is a copy and will be freed
1579-- automatically; it must not be stored or used after the
1580-- subcomputation finishes.
1581useAsCString :: ByteString -> (CString -> IO a) -> IO a
1582useAsCString (PS fp o l) action =
1583 allocaBytes (l+1) $ \buf ->
1584   withForeignPtr fp $ \p -> do
1585     memcpy buf (p `plusPtr` o) (fromIntegral l)
1586     pokeByteOff buf l (0::Word8)
1587     action (castPtr buf)
1588
1589-- | /O(n) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1590-- As for @useAsCString@ this function makes a copy of the original @ByteString@.
1591-- It must not be stored or used after the subcomputation finishes.
1592useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1593useAsCStringLen p@(PS _ _ l) f = useAsCString p $ \cstr -> f (cstr,l)
1594
1595------------------------------------------------------------------------
1596
1597-- | /O(n)./ Construct a new @ByteString@ from a @CString@. The
1598-- resulting @ByteString@ is an immutable copy of the original
1599-- @CString@, and is managed on the Haskell heap. The original
1600-- @CString@ must be null terminated.
1601packCString :: CString -> IO ByteString
1602packCString cstr = do
1603    len <- c_strlen cstr
1604    packCStringLen (cstr, fromIntegral len)
1605
1606-- | /O(n)./ Construct a new @ByteString@ from a @CStringLen@. The
1607-- resulting @ByteString@ is an immutable copy of the original @CStringLen@.
1608-- The @ByteString@ is a normal Haskell value and will be managed on the
1609-- Haskell heap.
1610packCStringLen :: CStringLen -> IO ByteString
1611packCStringLen (cstr, len) | len >= 0 = create len $ \p ->
1612    memcpy p (castPtr cstr) (fromIntegral len)
1613packCStringLen (_, len) =
1614    moduleErrorIO "packCStringLen" ("negative length: " ++ show len)
1615
1616------------------------------------------------------------------------
1617
1618-- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1619-- This is mainly useful to allow the rest of the data pointed
1620-- to by the 'ByteString' to be garbage collected, for example
1621-- if a large string has been read in, and only a small part of it
1622-- is needed in the rest of the program.
1623--
1624copy :: ByteString -> ByteString
1625copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1626    memcpy p (f `plusPtr` s) (fromIntegral l)
1627
1628-- ---------------------------------------------------------------------
1629-- Line IO
1630
1631-- | Read a line from stdin.
1632getLine :: IO ByteString
1633getLine = hGetLine stdin
1634
1635-- | Read a line from a handle
1636
1637hGetLine :: Handle -> IO ByteString
1638hGetLine h =
1639  wantReadableHandle_ "Data.ByteString.hGetLine" h $
1640    \ h_@Handle__{haByteBuffer} -> do
1641      flushCharReadBuffer h_
1642      buf <- readIORef haByteBuffer
1643      if isEmptyBuffer buf
1644         then fill h_ buf 0 []
1645         else haveBuf h_ buf 0 []
1646 where
1647
1648  fill h_@Handle__{haByteBuffer,haDevice} buf !len xss = do
1649    (r,buf') <- Buffered.fillReadBuffer haDevice buf
1650    if r == 0
1651       then do writeIORef haByteBuffer buf{ bufR=0, bufL=0 }
1652               if len > 0
1653                  then mkBigPS len xss
1654                  else ioe_EOF
1655       else haveBuf h_ buf' len xss
1656
1657  haveBuf h_@Handle__{haByteBuffer}
1658          buf@Buffer{ bufRaw=raw, bufR=w, bufL=r }
1659          len xss =
1660    do
1661        off <- findEOL r w raw
1662        let new_len = len + off - r
1663        xs <- mkPS raw r off
1664
1665      -- if eol == True, then off is the offset of the '\n'
1666      -- otherwise off == w and the buffer is now empty.
1667        if off /= w
1668            then do if w == off + 1
1669                            then writeIORef haByteBuffer buf{ bufL=0, bufR=0 }
1670                            else writeIORef haByteBuffer buf{ bufL = off + 1 }
1671                    mkBigPS new_len (xs:xss)
1672            else fill h_ buf{ bufL=0, bufR=0 } new_len (xs:xss)
1673
1674  -- find the end-of-line character, if there is one
1675  findEOL r w raw
1676        | r == w = return w
1677        | otherwise =  do
1678            c <- readWord8Buf raw r
1679            if c == fromIntegral (ord '\n')
1680                then return r -- NB. not r+1: don't include the '\n'
1681                else findEOL (r+1) w raw
1682
1683mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString
1684mkPS buf start end =
1685 create len $ \p ->
1686   withRawBuffer buf $ \pbuf -> copyBytes p (pbuf `plusPtr` start) len
1687 where
1688   len = end - start
1689
1690mkBigPS :: Int -> [ByteString] -> IO ByteString
1691mkBigPS _ [ps] = return ps
1692mkBigPS _ pss = return $! concat (P.reverse pss)
1693
1694-- ---------------------------------------------------------------------
1695-- Block IO
1696
1697-- | Outputs a 'ByteString' to the specified 'Handle'.
1698hPut :: Handle -> ByteString -> IO ()
1699hPut _ (PS _  _ 0) = return ()
1700hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1701
1702-- | Similar to 'hPut' except that it will never block. Instead it returns
1703-- any tail that did not get written. This tail may be 'empty' in the case that
1704-- the whole string was written, or the whole original string if nothing was
1705-- written. Partial writes are also possible.
1706--
1707-- Note: on Windows and with Haskell implementation other than GHC, this
1708-- function does not work correctly; it behaves identically to 'hPut'.
1709--
1710hPutNonBlocking :: Handle -> ByteString -> IO ByteString
1711hPutNonBlocking h bs@(PS ps s l) = do
1712  bytesWritten <- withForeignPtr ps $ \p-> hPutBufNonBlocking h (p `plusPtr` s) l
1713  return $! drop bytesWritten bs
1714
1715-- | A synonym for @hPut@, for compatibility
1716hPutStr :: Handle -> ByteString -> IO ()
1717hPutStr = hPut
1718
1719-- | Write a ByteString to a handle, appending a newline byte
1720hPutStrLn :: Handle -> ByteString -> IO ()
1721hPutStrLn h ps
1722    | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1723    | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1724
1725-- | Write a ByteString to stdout
1726putStr :: ByteString -> IO ()
1727putStr = hPut stdout
1728
1729-- | Write a ByteString to stdout, appending a newline byte
1730putStrLn :: ByteString -> IO ()
1731putStrLn = hPutStrLn stdout
1732
1733{-# DEPRECATED hPutStrLn
1734    "Use Data.ByteString.Char8.hPutStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)"
1735  #-}
1736{-# DEPRECATED putStrLn
1737    "Use Data.ByteString.Char8.putStrLn instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)"
1738  #-}
1739
1740------------------------------------------------------------------------
1741-- Low level IO
1742
1743-- | Read a 'ByteString' directly from the specified 'Handle'.  This
1744-- is far more efficient than reading the characters into a 'String'
1745-- and then using 'pack'. First argument is the Handle to read from,
1746-- and the second is the number of bytes to read. It returns the bytes
1747-- read, up to n, or 'empty' if EOF has been reached.
1748--
1749-- 'hGet' is implemented in terms of 'hGetBuf'.
1750--
1751-- If the handle is a pipe or socket, and the writing end
1752-- is closed, 'hGet' will behave as if EOF was reached.
1753--
1754hGet :: Handle -> Int -> IO ByteString
1755hGet h i
1756    | i >  0    = createAndTrim i $ \p -> hGetBuf h p i
1757    | i == 0    = return empty
1758    | otherwise = illegalBufferSize h "hGet" i
1759
1760-- | hGetNonBlocking is similar to 'hGet', except that it will never block
1761-- waiting for data to become available, instead it returns only whatever data
1762-- is available.  If there is no data available to be read, 'hGetNonBlocking'
1763-- returns 'empty'.
1764--
1765-- Note: on Windows and with Haskell implementation other than GHC, this
1766-- function does not work correctly; it behaves identically to 'hGet'.
1767--
1768hGetNonBlocking :: Handle -> Int -> IO ByteString
1769hGetNonBlocking h i
1770    | i >  0    = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1771    | i == 0    = return empty
1772    | otherwise = illegalBufferSize h "hGetNonBlocking" i
1773
1774-- | Like 'hGet', except that a shorter 'ByteString' may be returned
1775-- if there are not enough bytes immediately available to satisfy the
1776-- whole request.  'hGetSome' only blocks if there is no data
1777-- available, and EOF has not yet been reached.
1778--
1779hGetSome :: Handle -> Int -> IO ByteString
1780hGetSome hh i
1781#if MIN_VERSION_base(4,3,0)
1782    | i >  0    = createAndTrim i $ \p -> hGetBufSome hh p i
1783#else
1784    | i >  0    = let
1785                   loop = do
1786                     s <- hGetNonBlocking hh i
1787                     if not (null s)
1788                        then return s
1789                        else do eof <- hIsEOF hh
1790                                if eof then return s
1791                                       else hWaitForInput hh (-1) >> loop
1792                                         -- for this to work correctly, the
1793                                         -- Handle should be in binary mode
1794                                         -- (see GHC ticket #3808)
1795                  in loop
1796#endif
1797    | i == 0    = return empty
1798    | otherwise = illegalBufferSize hh "hGetSome" i
1799
1800illegalBufferSize :: Handle -> String -> Int -> IO a
1801illegalBufferSize handle fn sz =
1802    ioError (mkIOError illegalOperationErrorType msg (Just handle) Nothing)
1803    --TODO: System.IO uses InvalidArgument here, but it's not exported :-(
1804    where
1805      msg = fn ++ ": illegal ByteString size " ++ showsPrec 9 sz []
1806
1807
1808-- | Read a handle's entire contents strictly into a 'ByteString'.
1809--
1810-- This function reads chunks at a time, increasing the chunk size on each
1811-- read. The final string is then reallocated to the appropriate size. For
1812-- files > half of available memory, this may lead to memory exhaustion.
1813-- Consider using 'readFile' in this case.
1814--
1815-- The Handle is closed once the contents have been read,
1816-- or if an exception is thrown.
1817--
1818hGetContents :: Handle -> IO ByteString
1819hGetContents hnd = do
1820    bs <- hGetContentsSizeHint hnd 1024 2048
1821            `finally` hClose hnd
1822    -- don't waste too much space for small files:
1823    if length bs < 900
1824      then return $! copy bs
1825      else return bs
1826
1827hGetContentsSizeHint :: Handle
1828                     -> Int -- ^ first read size
1829                     -> Int -- ^ initial buffer size increment
1830                     -> IO ByteString
1831hGetContentsSizeHint hnd =
1832    readChunks []
1833  where
1834    readChunks chunks sz sz' = do
1835      fp        <- mallocByteString sz
1836      readcount <- withForeignPtr fp $ \buf -> hGetBuf hnd buf sz
1837      let chunk = PS fp 0 readcount
1838      -- We rely on the hGetBuf behaviour (not hGetBufSome) where it reads up
1839      -- to the size we ask for, or EOF. So short reads indicate EOF.
1840      if readcount < sz && sz > 0
1841        then return $! concat (P.reverse (chunk : chunks))
1842        else readChunks (chunk : chunks) sz' ((sz+sz') `min` 32752)
1843             -- we grow the buffer sizes, but not too huge
1844             -- we concatenate in the end anyway
1845
1846-- | getContents. Read stdin strictly. Equivalent to hGetContents stdin
1847-- The 'Handle' is closed after the contents have been read.
1848--
1849getContents :: IO ByteString
1850getContents = hGetContents stdin
1851
1852-- | The interact function takes a function of type @ByteString -> ByteString@
1853-- as its argument. The entire input from the standard input device is passed
1854-- to this function as its argument, and the resulting string is output on the
1855-- standard output device.
1856--
1857interact :: (ByteString -> ByteString) -> IO ()
1858interact transformer = putStr . transformer =<< getContents
1859
1860-- | Read an entire file strictly into a 'ByteString'.
1861--
1862readFile :: FilePath -> IO ByteString
1863readFile f =
1864    withBinaryFile f ReadMode $ \h -> do
1865      -- hFileSize fails if file is not regular file (like
1866      -- /dev/null). Catch exception and try reading anyway.
1867      filesz <- catch (hFileSize h) useZeroIfNotRegularFile
1868      let readsz = (fromIntegral filesz `max` 0) + 1
1869      hGetContentsSizeHint h readsz (readsz `max` 255)
1870      -- Our initial size is one bigger than the file size so that in the
1871      -- typical case we will read the whole file in one go and not have
1872      -- to allocate any more chunks. We'll still do the right thing if the
1873      -- file size is 0 or is changed before we do the read.
1874  where
1875    useZeroIfNotRegularFile :: IOException -> IO Integer
1876    useZeroIfNotRegularFile _ = return 0
1877
1878modifyFile :: IOMode -> FilePath -> ByteString -> IO ()
1879modifyFile mode f txt = withBinaryFile f mode (`hPut` txt)
1880
1881-- | Write a 'ByteString' to a file.
1882writeFile :: FilePath -> ByteString -> IO ()
1883writeFile = modifyFile WriteMode
1884
1885-- | Append a 'ByteString' to a file.
1886appendFile :: FilePath -> ByteString -> IO ()
1887appendFile = modifyFile AppendMode
1888
1889-- ---------------------------------------------------------------------
1890-- Internal utilities
1891
1892-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1893-- of the string if no element is found, rather than Nothing.
1894findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1895findIndexOrEnd k (PS x s l) =
1896    accursedUnutterablePerformIO $
1897      withForeignPtr x $ \f ->
1898        go (f `plusPtr` s) 0
1899  where
1900    go !ptr !n | n >= l    = return l
1901               | otherwise = do w <- peek ptr
1902                                if k w
1903                                  then return n
1904                                  else go (ptr `plusPtr` 1) (n+1)
1905{-# INLINE findIndexOrEnd #-}
1906
1907-- Common up near identical calls to `error' to reduce the number
1908-- constant strings created when compiled:
1909errorEmptyList :: String -> a
1910errorEmptyList fun = moduleError fun "empty ByteString"
1911{-# NOINLINE errorEmptyList #-}
1912
1913moduleError :: String -> String -> a
1914moduleError fun msg = error (moduleErrorMsg fun msg)
1915{-# NOINLINE moduleError #-}
1916
1917moduleErrorIO :: String -> String -> IO a
1918moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg
1919{-# NOINLINE moduleErrorIO #-}
1920
1921moduleErrorMsg :: String -> String -> String
1922moduleErrorMsg fun msg = "Data.ByteString." ++ fun ++ ':':' ':msg
1923
1924-- Find from the end of the string using predicate
1925findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1926findFromEndUntil f ps@(PS x s l)
1927  | null ps = 0
1928  | f (unsafeLast ps) = l
1929  | otherwise = findFromEndUntil f (PS x s (l - 1))
1930