1{-# LANGUAGE BangPatterns, ForeignFunctionInterface, CPP #-}
2module ByteStringUtils
3       (unsafeWithInternals, unpackPSfromUTF8, gzReadFilePS, mmapFilePS,
4        gzWriteFilePS, gzWriteFilePSs, ifHeadThenTail, dropSpace,
5        breakSpace, linesPS, unlinesPS, hashPS, breakFirstPS, breakLastPS,
6        substrPS, readIntPS, is_funky, fromHex2PS, fromPS2Hex,
7        betweenLinesPS, break_after_nth_newline, break_before_nth_newline,
8        intercalate)
9       where
10import Prelude hiding (catch)
11import qualified Data.ByteString as B
12import qualified Data.ByteString.Char8 as BC
13import qualified Data.ByteString.Internal as BI
14import Data.ByteString (intercalate, uncons)
15import Data.ByteString.Internal (fromForeignPtr)
16import Control.Exception (catch)
17import System.IO
18import System.IO.Unsafe (unsafePerformIO)
19import Foreign.Storable (peekElemOff, peek)
20import Foreign.Marshal.Alloc (free)
21import Foreign.Marshal.Array (mallocArray, peekArray, advancePtr)
22import Foreign.C.Types (CInt)
23import Data.Bits (rotateL)
24import Data.Char (chr, ord, isSpace)
25import Data.Word (Word8)
26import Data.Int (Int32)
27import Control.Monad (when)
28import Foreign.Ptr (nullPtr)
29import Foreign.ForeignPtr (ForeignPtr)
30import Foreign.Ptr (plusPtr, Ptr)
31import Foreign.ForeignPtr (withForeignPtr)
32import Foreign.ForeignPtr (addForeignPtrFinalizer)
33import Foreign.Ptr (FunPtr)
34import qualified Data.ByteString.Lazy as BL
35import qualified Codec.Compression.GZip as GZ
36import Foreign.C.String (CString, withCString)
37import System.IO.MMap (mmapFileByteString)
38import System.Mem (performGC)
39import System.Posix.Files (fileSize, getSymbolicLinkStatus)
40
41debugForeignPtr :: ForeignPtr a -> String -> IO ()
42
43foreign import ccall unsafe "static fpstring.h debug_alloc"
44               debug_alloc :: Ptr a -> CString -> IO ()
45
46foreign import ccall unsafe "static fpstring.h & debug_free"
47               debug_free :: FunPtr (Ptr a -> IO ())
48debugForeignPtr fp n
49  = withCString n $
50      \ cname ->
51        withForeignPtr fp $
52          \ p ->
53            do debug_alloc p cname
54               addForeignPtrFinalizer debug_free fp
55debugForeignPtr _ _ = return ()
56
57unsafeWithInternals ::
58                    B.ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a
59unsafeWithInternals ps f
60  = case BI.toForeignPtr ps of
61        (fp, s, l) -> withForeignPtr fp $ \ p -> f (p `plusPtr` s) l
62
63readIntPS :: B.ByteString -> Maybe (Int, B.ByteString)
64readIntPS = BC.readInt . BC.dropWhile isSpace
65
66unpackPSfromUTF8 :: B.ByteString -> String
67unpackPSfromUTF8 ps
68  = case BI.toForeignPtr ps of
69        (_, _, 0) -> ""
70        (x, s, l) -> unsafePerformIO $
71                       withForeignPtr x $
72                         \ p ->
73                           do outbuf <- mallocArray l
74                              lout <- fromIntegral `fmap`
75                                        utf8_to_ints outbuf (p `plusPtr` s) (fromIntegral l)
76                              when (lout < 0) $ error "Bad UTF8!"
77                              str <- (map (chr . fromIntegral)) `fmap` peekArray lout outbuf
78                              free outbuf
79                              return str
80
81foreign import ccall unsafe "static fpstring.h utf8_to_ints"
82               utf8_to_ints :: Ptr Int -> Ptr Word8 -> CInt -> IO CInt
83
84{-# INLINE ifHeadThenTail #-}
85
86ifHeadThenTail :: Word8 -> B.ByteString -> Maybe B.ByteString
87ifHeadThenTail c s
88  = case uncons s of
89        Just (w, t) | w == c -> Just t
90        _ -> Nothing
91
92isSpaceWord8 :: Word8 -> Bool
93isSpaceWord8 w = w == 32 || w == 9 || w == 10 || w == 13
94
95{-# INLINE isSpaceWord8 #-}
96
97firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int
98firstnonspace !ptr !n !m
99  | n >= m = return n
100  | otherwise =
101    do w <- peekElemOff ptr n
102       if isSpaceWord8 w then firstnonspace ptr (n + 1) m else return n
103
104firstspace :: Ptr Word8 -> Int -> Int -> IO Int
105firstspace !ptr !n !m
106  | n >= m = return n
107  | otherwise =
108    do w <- peekElemOff ptr n
109       if (not . isSpaceWord8) w then firstspace ptr (n + 1) m else
110         return n
111
112dropSpace :: B.ByteString -> B.ByteString
113dropSpace (BI.PS x s l)
114  = BI.inlinePerformIO $
115      withForeignPtr x $
116        \ p ->
117          do i <- firstnonspace (p `plusPtr` s) 0 l
118             return $! if i == l then B.empty else BI.PS x (s + i) (l - i)
119
120{-# INLINE dropSpace #-}
121
122breakSpace :: B.ByteString -> (B.ByteString, B.ByteString)
123breakSpace (BI.PS x s l)
124  = BI.inlinePerformIO $
125      withForeignPtr x $
126        \ p ->
127          do i <- firstspace (p `plusPtr` s) 0 l
128             return $!
129               case () of
130                   _ | i == 0 -> (B.empty, BI.PS x s l)
131                     | i == l -> (BI.PS x s l, B.empty)
132                     | otherwise -> (BI.PS x s i, BI.PS x (s + i) (l - i))
133
134{-# INLINE breakSpace #-}
135
136{-# INLINE is_funky #-}
137
138is_funky :: B.ByteString -> Bool
139is_funky ps
140  = case BI.toForeignPtr ps of
141        (x, s, l) -> unsafePerformIO $
142                       withForeignPtr x $
143                         \ p ->
144                           (/= 0) `fmap` has_funky_char (p `plusPtr` s) (fromIntegral l)
145
146foreign import ccall unsafe "fpstring.h has_funky_char"
147               has_funky_char :: Ptr Word8 -> CInt -> IO CInt
148
149{-# INLINE hashPS #-}
150
151hashPS :: B.ByteString -> Int32
152hashPS ps
153  = case BI.toForeignPtr ps of
154        (x, s, l) -> unsafePerformIO $
155                       withForeignPtr x $ \ p -> do hash (p `plusPtr` s) l
156
157hash :: Ptr Word8 -> Int -> IO Int32
158hash ptr len = f (0 :: Int32) ptr len
159  where f h _ 0 = return h
160        f h p n
161          = do x <- peek p
162               let !h' = (fromIntegral x) + (rotateL h 8)
163               f h' (p `advancePtr` 1) (n - 1)
164
165{-# INLINE substrPS #-}
166
167substrPS :: B.ByteString -> B.ByteString -> Maybe Int
168substrPS tok str
169  | B.null tok = Just 0
170  | B.length tok > B.length str = Nothing
171  | otherwise =
172    do n <- BC.elemIndex (BC.head tok) str
173       let ttok = B.tail tok
174           reststr = B.drop (n + 1) str
175       if ttok == B.take (B.length ttok) reststr then Just n else
176         ((n + 1) +) `fmap` substrPS tok reststr
177
178{-# INLINE breakFirstPS #-}
179
180breakFirstPS ::
181             Char -> B.ByteString -> Maybe (B.ByteString, B.ByteString)
182breakFirstPS c p
183  = case BC.elemIndex c p of
184        Nothing -> Nothing
185        Just n -> Just (B.take n p, B.drop (n + 1) p)
186
187{-# INLINE breakLastPS #-}
188
189breakLastPS ::
190            Char -> B.ByteString -> Maybe (B.ByteString, B.ByteString)
191breakLastPS c p
192  = case BC.elemIndexEnd c p of
193        Nothing -> Nothing
194        Just n -> Just (B.take n p, B.drop (n + 1) p)
195
196{-# INLINE linesPS #-}
197
198linesPS :: B.ByteString -> [B.ByteString]
199linesPS ps
200  | B.null ps = [B.empty]
201  | otherwise = BC.split '\n' ps
202
203unlinesPS :: [B.ByteString] -> B.ByteString
204unlinesPS [] = BC.empty
205unlinesPS x = BC.init $ BC.unlines x
206
207{-# INLINE unlinesPS #-}
208
209foreign import ccall unsafe "static zlib.h gzopen" c_gzopen ::
210               CString -> CString -> IO (Ptr ())
211
212foreign import ccall unsafe "static zlib.h gzclose" c_gzclose ::
213               Ptr () -> IO ()
214
215foreign import ccall unsafe "static zlib.h gzread" c_gzread ::
216               Ptr () -> Ptr Word8 -> CInt -> IO CInt
217
218foreign import ccall unsafe "static zlib.h gzwrite" c_gzwrite ::
219               Ptr () -> Ptr Word8 -> CInt -> IO CInt
220
221gzReadFilePS :: FilePath -> IO B.ByteString
222gzReadFilePS f
223  = do h <- openBinaryFile f ReadMode
224       header <- B.hGet h 2
225       if header /= BC.pack "\US\139" then
226         do hClose h
227            mmapFilePS f
228         else
229         do hSeek h SeekFromEnd (-4)
230            len <- hGetLittleEndInt h
231            hClose h
232            let decompress
233                  = GZ.decompressWith
234                      GZ.defaultDecompressParams{GZ.decompressBufferSize = len}
235            fmap (B.concat . BL.toChunks . decompress) $
236              fmap (BL.fromChunks . (: [])) $ B.readFile f BL.readFile f
237            withCString f $
238              \ fstr ->
239                withCString "rb" $
240                  \ rb ->
241                    do gzf <- c_gzopen fstr rb
242                       when (gzf == nullPtr) $ fail $ "problem opening file " ++ f
243                       fp <- BI.mallocByteString len
244                       debugForeignPtr fp $ "gzReadFilePS " ++ f
245                       lread <- withForeignPtr fp $
246                                  \ p -> c_gzread gzf p (fromIntegral len)
247                       c_gzclose gzf
248                       when (fromIntegral lread /= len) $
249                         fail $ "problem gzreading file " ++ f
250                       return $ fromForeignPtr fp 0 len
251
252hGetLittleEndInt :: Handle -> IO Int
253hGetLittleEndInt h
254  = do b1 <- ord `fmap` hGetChar h
255       b2 <- ord `fmap` hGetChar h
256       b3 <- ord `fmap` hGetChar h
257       b4 <- ord `fmap` hGetChar h
258       return $ b1 + 256 * b2 + 65536 * b3 + 16777216 * b4
259
260gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
261gzWriteFilePS f ps = gzWriteFilePSs f [ps]
262
263gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
264gzWriteFilePSs f pss
265  = BL.writeFile f $
266      GZ.compress $
267        BL.fromChunks pss withCString f $
268          \ fstr ->
269            withCString "wb" $
270              \ wb ->
271                do gzf <- c_gzopen fstr wb
272                   when (gzf == nullPtr) $
273                     fail $ "problem gzopening file for write: " ++ f
274                   mapM_ (gzWriteToGzf gzf) pss `catch`
275                     \ _ -> fail $ "problem gzwriting file: " ++ f
276                   c_gzclose gzf
277
278gzWriteToGzf :: Ptr () -> B.ByteString -> IO ()
279gzWriteToGzf gzf ps
280  = case BI.toForeignPtr ps of
281        (_, _, 0) -> return ()
282        (x, s, l) -> do lw <- withForeignPtr x $
283                                \ p -> c_gzwrite gzf (p `plusPtr` s) (fromIntegral l)
284                        when (fromIntegral lw /= l) $ fail $ "problem in gzWriteToGzf"
285
286mmapFilePS :: FilePath -> IO B.ByteString
287mmapFilePS f
288  = do x <- mmapFileByteString f Nothing `catch`
289              (\ _ ->
290                 do size <- fileSize `fmap` getSymbolicLinkStatus f
291                    if size == 0 then return B.empty else
292                      performGC >> mmapFileByteString f Nothing)
293       return x
294mmapFilePS = B.readFile
295
296foreign import ccall unsafe "static fpstring.h conv_to_hex"
297               conv_to_hex :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
298
299fromPS2Hex :: B.ByteString -> B.ByteString
300fromPS2Hex ps
301  = case BI.toForeignPtr ps of
302        (x, s, l) -> BI.unsafeCreate (2 * l) $
303                       \ p ->
304                         withForeignPtr x $
305                           \ f -> conv_to_hex p (f `plusPtr` s) $ fromIntegral l
306
307foreign import ccall unsafe "static fpstring.h conv_from_hex"
308               conv_from_hex :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
309
310fromHex2PS :: B.ByteString -> B.ByteString
311fromHex2PS ps
312  = case BI.toForeignPtr ps of
313        (x, s, l) -> BI.unsafeCreate (l `div` 2) $
314                       \ p ->
315                         withForeignPtr x $
316                           \ f -> conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2)
317
318betweenLinesPS ::
319               B.ByteString ->
320                 B.ByteString -> B.ByteString -> Maybe (B.ByteString)
321betweenLinesPS start end ps
322  = case break (start ==) (linesPS ps) of
323        (_, _ : rest@(bs1 : _)) -> case BI.toForeignPtr bs1 of
324                                       (ps1, s1, _) -> case break (end ==) rest of
325                                                           (_, bs2 : _) -> case BI.toForeignPtr bs2
326                                                                             of
327                                                                               (_, s2, _) -> Just $
328                                                                                               fromForeignPtr
329                                                                                                 ps1
330                                                                                                 s1
331                                                                                                 (s2
332                                                                                                    -
333                                                                                                    s1)
334                                                           _ -> Nothing
335        _ -> Nothing
336
337break_after_nth_newline ::
338                        Int -> B.ByteString -> Maybe (B.ByteString, B.ByteString)
339break_after_nth_newline 0 the_ps
340  | B.null the_ps = Just (B.empty, B.empty)
341break_after_nth_newline n the_ps
342  = case BI.toForeignPtr the_ps of
343        (fp, the_s, l) -> unsafePerformIO $
344                            withForeignPtr fp $
345                              \ p ->
346                                do let findit 0 s | s == end = return $ Just (the_ps, B.empty)
347                                       findit _ s | s == end = return Nothing
348                                       findit 0 s
349                                         = let left_l = s - the_s in
350                                             return $
351                                               Just
352                                                 (fromForeignPtr fp the_s left_l,
353                                                  fromForeignPtr fp s (l - left_l))
354                                       findit i s
355                                         = do w <- peekElemOff p s
356                                              if w == nl then findit (i - 1) (s + 1) else
357                                                findit i (s + 1)
358                                       nl = BI.c2w '\n'
359                                       end = the_s + l
360                                   findit n the_s
361
362break_before_nth_newline ::
363                         Int -> B.ByteString -> (B.ByteString, B.ByteString)
364break_before_nth_newline 0 the_ps
365  | B.null the_ps = (B.empty, B.empty)
366break_before_nth_newline n the_ps
367  = case BI.toForeignPtr the_ps of
368        (fp, the_s, l) -> unsafePerformIO $
369                            withForeignPtr fp $
370                              \ p ->
371                                do let findit _ s | s == end = return (the_ps, B.empty)
372                                       findit i s
373                                         = do w <- peekElemOff p s
374                                              if w == nl then
375                                                if i == 0 then
376                                                  let left_l = s - the_s in
377                                                    return
378                                                      (fromForeignPtr fp the_s left_l,
379                                                       fromForeignPtr fp s (l - left_l))
380                                                  else findit (i - 1) (s + 1)
381                                                else findit i (s + 1)
382                                       nl = BI.c2w '\n'
383                                       end = the_s + l
384                                   findit n the_s
385