1{-# LANGUAGE BangPatterns, ForeignFunctionInterface, CPP #-} 2module ByteStringUtils 3 (unsafeWithInternals, unpackPSfromUTF8, gzReadFilePS, mmapFilePS, 4 gzWriteFilePS, gzWriteFilePSs, ifHeadThenTail, dropSpace, 5 breakSpace, linesPS, unlinesPS, hashPS, breakFirstPS, breakLastPS, 6 substrPS, readIntPS, is_funky, fromHex2PS, fromPS2Hex, 7 betweenLinesPS, break_after_nth_newline, break_before_nth_newline, 8 intercalate) 9 where 10import Prelude hiding (catch) 11import qualified Data.ByteString as B 12import qualified Data.ByteString.Char8 as BC 13import qualified Data.ByteString.Internal as BI 14import Data.ByteString (intercalate, uncons) 15import Data.ByteString.Internal (fromForeignPtr) 16import Control.Exception (catch) 17import System.IO 18import System.IO.Unsafe (unsafePerformIO) 19import Foreign.Storable (peekElemOff, peek) 20import Foreign.Marshal.Alloc (free) 21import Foreign.Marshal.Array (mallocArray, peekArray, advancePtr) 22import Foreign.C.Types (CInt) 23import Data.Bits (rotateL) 24import Data.Char (chr, ord, isSpace) 25import Data.Word (Word8) 26import Data.Int (Int32) 27import Control.Monad (when) 28import Foreign.Ptr (nullPtr) 29import Foreign.ForeignPtr (ForeignPtr) 30import Foreign.Ptr (plusPtr, Ptr) 31import Foreign.ForeignPtr (withForeignPtr) 32import Foreign.ForeignPtr (addForeignPtrFinalizer) 33import Foreign.Ptr (FunPtr) 34import qualified Data.ByteString.Lazy as BL 35import qualified Codec.Compression.GZip as GZ 36import Foreign.C.String (CString, withCString) 37import System.IO.MMap (mmapFileByteString) 38import System.Mem (performGC) 39import System.Posix.Files (fileSize, getSymbolicLinkStatus) 40 41debugForeignPtr :: ForeignPtr a -> String -> IO () 42 43foreign import ccall unsafe "static fpstring.h debug_alloc" 44 debug_alloc :: Ptr a -> CString -> IO () 45 46foreign import ccall unsafe "static fpstring.h & debug_free" 47 debug_free :: FunPtr (Ptr a -> IO ()) 48debugForeignPtr fp n 49 = withCString n $ 50 \ cname -> 51 withForeignPtr fp $ 52 \ p -> 53 do debug_alloc p cname 54 addForeignPtrFinalizer debug_free fp 55debugForeignPtr _ _ = return () 56 57unsafeWithInternals :: 58 B.ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a 59unsafeWithInternals ps f 60 = case BI.toForeignPtr ps of 61 (fp, s, l) -> withForeignPtr fp $ \ p -> f (p `plusPtr` s) l 62 63readIntPS :: B.ByteString -> Maybe (Int, B.ByteString) 64readIntPS = BC.readInt . BC.dropWhile isSpace 65 66unpackPSfromUTF8 :: B.ByteString -> String 67unpackPSfromUTF8 ps 68 = case BI.toForeignPtr ps of 69 (_, _, 0) -> "" 70 (x, s, l) -> unsafePerformIO $ 71 withForeignPtr x $ 72 \ p -> 73 do outbuf <- mallocArray l 74 lout <- fromIntegral `fmap` 75 utf8_to_ints outbuf (p `plusPtr` s) (fromIntegral l) 76 when (lout < 0) $ error "Bad UTF8!" 77 str <- (map (chr . fromIntegral)) `fmap` peekArray lout outbuf 78 free outbuf 79 return str 80 81foreign import ccall unsafe "static fpstring.h utf8_to_ints" 82 utf8_to_ints :: Ptr Int -> Ptr Word8 -> CInt -> IO CInt 83 84{-# INLINE ifHeadThenTail #-} 85 86ifHeadThenTail :: Word8 -> B.ByteString -> Maybe B.ByteString 87ifHeadThenTail c s 88 = case uncons s of 89 Just (w, t) | w == c -> Just t 90 _ -> Nothing 91 92isSpaceWord8 :: Word8 -> Bool 93isSpaceWord8 w = w == 32 || w == 9 || w == 10 || w == 13 94 95{-# INLINE isSpaceWord8 #-} 96 97firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int 98firstnonspace !ptr !n !m 99 | n >= m = return n 100 | otherwise = 101 do w <- peekElemOff ptr n 102 if isSpaceWord8 w then firstnonspace ptr (n + 1) m else return n 103 104firstspace :: Ptr Word8 -> Int -> Int -> IO Int 105firstspace !ptr !n !m 106 | n >= m = return n 107 | otherwise = 108 do w <- peekElemOff ptr n 109 if (not . isSpaceWord8) w then firstspace ptr (n + 1) m else 110 return n 111 112dropSpace :: B.ByteString -> B.ByteString 113dropSpace (BI.PS x s l) 114 = BI.inlinePerformIO $ 115 withForeignPtr x $ 116 \ p -> 117 do i <- firstnonspace (p `plusPtr` s) 0 l 118 return $! if i == l then B.empty else BI.PS x (s + i) (l - i) 119 120{-# INLINE dropSpace #-} 121 122breakSpace :: B.ByteString -> (B.ByteString, B.ByteString) 123breakSpace (BI.PS x s l) 124 = BI.inlinePerformIO $ 125 withForeignPtr x $ 126 \ p -> 127 do i <- firstspace (p `plusPtr` s) 0 l 128 return $! 129 case () of 130 _ | i == 0 -> (B.empty, BI.PS x s l) 131 | i == l -> (BI.PS x s l, B.empty) 132 | otherwise -> (BI.PS x s i, BI.PS x (s + i) (l - i)) 133 134{-# INLINE breakSpace #-} 135 136{-# INLINE is_funky #-} 137 138is_funky :: B.ByteString -> Bool 139is_funky ps 140 = case BI.toForeignPtr ps of 141 (x, s, l) -> unsafePerformIO $ 142 withForeignPtr x $ 143 \ p -> 144 (/= 0) `fmap` has_funky_char (p `plusPtr` s) (fromIntegral l) 145 146foreign import ccall unsafe "fpstring.h has_funky_char" 147 has_funky_char :: Ptr Word8 -> CInt -> IO CInt 148 149{-# INLINE hashPS #-} 150 151hashPS :: B.ByteString -> Int32 152hashPS ps 153 = case BI.toForeignPtr ps of 154 (x, s, l) -> unsafePerformIO $ 155 withForeignPtr x $ \ p -> do hash (p `plusPtr` s) l 156 157hash :: Ptr Word8 -> Int -> IO Int32 158hash ptr len = f (0 :: Int32) ptr len 159 where f h _ 0 = return h 160 f h p n 161 = do x <- peek p 162 let !h' = (fromIntegral x) + (rotateL h 8) 163 f h' (p `advancePtr` 1) (n - 1) 164 165{-# INLINE substrPS #-} 166 167substrPS :: B.ByteString -> B.ByteString -> Maybe Int 168substrPS tok str 169 | B.null tok = Just 0 170 | B.length tok > B.length str = Nothing 171 | otherwise = 172 do n <- BC.elemIndex (BC.head tok) str 173 let ttok = B.tail tok 174 reststr = B.drop (n + 1) str 175 if ttok == B.take (B.length ttok) reststr then Just n else 176 ((n + 1) +) `fmap` substrPS tok reststr 177 178{-# INLINE breakFirstPS #-} 179 180breakFirstPS :: 181 Char -> B.ByteString -> Maybe (B.ByteString, B.ByteString) 182breakFirstPS c p 183 = case BC.elemIndex c p of 184 Nothing -> Nothing 185 Just n -> Just (B.take n p, B.drop (n + 1) p) 186 187{-# INLINE breakLastPS #-} 188 189breakLastPS :: 190 Char -> B.ByteString -> Maybe (B.ByteString, B.ByteString) 191breakLastPS c p 192 = case BC.elemIndexEnd c p of 193 Nothing -> Nothing 194 Just n -> Just (B.take n p, B.drop (n + 1) p) 195 196{-# INLINE linesPS #-} 197 198linesPS :: B.ByteString -> [B.ByteString] 199linesPS ps 200 | B.null ps = [B.empty] 201 | otherwise = BC.split '\n' ps 202 203unlinesPS :: [B.ByteString] -> B.ByteString 204unlinesPS [] = BC.empty 205unlinesPS x = BC.init $ BC.unlines x 206 207{-# INLINE unlinesPS #-} 208 209foreign import ccall unsafe "static zlib.h gzopen" c_gzopen :: 210 CString -> CString -> IO (Ptr ()) 211 212foreign import ccall unsafe "static zlib.h gzclose" c_gzclose :: 213 Ptr () -> IO () 214 215foreign import ccall unsafe "static zlib.h gzread" c_gzread :: 216 Ptr () -> Ptr Word8 -> CInt -> IO CInt 217 218foreign import ccall unsafe "static zlib.h gzwrite" c_gzwrite :: 219 Ptr () -> Ptr Word8 -> CInt -> IO CInt 220 221gzReadFilePS :: FilePath -> IO B.ByteString 222gzReadFilePS f 223 = do h <- openBinaryFile f ReadMode 224 header <- B.hGet h 2 225 if header /= BC.pack "\US\139" then 226 do hClose h 227 mmapFilePS f 228 else 229 do hSeek h SeekFromEnd (-4) 230 len <- hGetLittleEndInt h 231 hClose h 232 let decompress 233 = GZ.decompressWith 234 GZ.defaultDecompressParams{GZ.decompressBufferSize = len} 235 fmap (B.concat . BL.toChunks . decompress) $ 236 fmap (BL.fromChunks . (: [])) $ B.readFile f BL.readFile f 237 withCString f $ 238 \ fstr -> 239 withCString "rb" $ 240 \ rb -> 241 do gzf <- c_gzopen fstr rb 242 when (gzf == nullPtr) $ fail $ "problem opening file " ++ f 243 fp <- BI.mallocByteString len 244 debugForeignPtr fp $ "gzReadFilePS " ++ f 245 lread <- withForeignPtr fp $ 246 \ p -> c_gzread gzf p (fromIntegral len) 247 c_gzclose gzf 248 when (fromIntegral lread /= len) $ 249 fail $ "problem gzreading file " ++ f 250 return $ fromForeignPtr fp 0 len 251 252hGetLittleEndInt :: Handle -> IO Int 253hGetLittleEndInt h 254 = do b1 <- ord `fmap` hGetChar h 255 b2 <- ord `fmap` hGetChar h 256 b3 <- ord `fmap` hGetChar h 257 b4 <- ord `fmap` hGetChar h 258 return $ b1 + 256 * b2 + 65536 * b3 + 16777216 * b4 259 260gzWriteFilePS :: FilePath -> B.ByteString -> IO () 261gzWriteFilePS f ps = gzWriteFilePSs f [ps] 262 263gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO () 264gzWriteFilePSs f pss 265 = BL.writeFile f $ 266 GZ.compress $ 267 BL.fromChunks pss withCString f $ 268 \ fstr -> 269 withCString "wb" $ 270 \ wb -> 271 do gzf <- c_gzopen fstr wb 272 when (gzf == nullPtr) $ 273 fail $ "problem gzopening file for write: " ++ f 274 mapM_ (gzWriteToGzf gzf) pss `catch` 275 \ _ -> fail $ "problem gzwriting file: " ++ f 276 c_gzclose gzf 277 278gzWriteToGzf :: Ptr () -> B.ByteString -> IO () 279gzWriteToGzf gzf ps 280 = case BI.toForeignPtr ps of 281 (_, _, 0) -> return () 282 (x, s, l) -> do lw <- withForeignPtr x $ 283 \ p -> c_gzwrite gzf (p `plusPtr` s) (fromIntegral l) 284 when (fromIntegral lw /= l) $ fail $ "problem in gzWriteToGzf" 285 286mmapFilePS :: FilePath -> IO B.ByteString 287mmapFilePS f 288 = do x <- mmapFileByteString f Nothing `catch` 289 (\ _ -> 290 do size <- fileSize `fmap` getSymbolicLinkStatus f 291 if size == 0 then return B.empty else 292 performGC >> mmapFileByteString f Nothing) 293 return x 294mmapFilePS = B.readFile 295 296foreign import ccall unsafe "static fpstring.h conv_to_hex" 297 conv_to_hex :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () 298 299fromPS2Hex :: B.ByteString -> B.ByteString 300fromPS2Hex ps 301 = case BI.toForeignPtr ps of 302 (x, s, l) -> BI.unsafeCreate (2 * l) $ 303 \ p -> 304 withForeignPtr x $ 305 \ f -> conv_to_hex p (f `plusPtr` s) $ fromIntegral l 306 307foreign import ccall unsafe "static fpstring.h conv_from_hex" 308 conv_from_hex :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () 309 310fromHex2PS :: B.ByteString -> B.ByteString 311fromHex2PS ps 312 = case BI.toForeignPtr ps of 313 (x, s, l) -> BI.unsafeCreate (l `div` 2) $ 314 \ p -> 315 withForeignPtr x $ 316 \ f -> conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2) 317 318betweenLinesPS :: 319 B.ByteString -> 320 B.ByteString -> B.ByteString -> Maybe (B.ByteString) 321betweenLinesPS start end ps 322 = case break (start ==) (linesPS ps) of 323 (_, _ : rest@(bs1 : _)) -> case BI.toForeignPtr bs1 of 324 (ps1, s1, _) -> case break (end ==) rest of 325 (_, bs2 : _) -> case BI.toForeignPtr bs2 326 of 327 (_, s2, _) -> Just $ 328 fromForeignPtr 329 ps1 330 s1 331 (s2 332 - 333 s1) 334 _ -> Nothing 335 _ -> Nothing 336 337break_after_nth_newline :: 338 Int -> B.ByteString -> Maybe (B.ByteString, B.ByteString) 339break_after_nth_newline 0 the_ps 340 | B.null the_ps = Just (B.empty, B.empty) 341break_after_nth_newline n the_ps 342 = case BI.toForeignPtr the_ps of 343 (fp, the_s, l) -> unsafePerformIO $ 344 withForeignPtr fp $ 345 \ p -> 346 do let findit 0 s | s == end = return $ Just (the_ps, B.empty) 347 findit _ s | s == end = return Nothing 348 findit 0 s 349 = let left_l = s - the_s in 350 return $ 351 Just 352 (fromForeignPtr fp the_s left_l, 353 fromForeignPtr fp s (l - left_l)) 354 findit i s 355 = do w <- peekElemOff p s 356 if w == nl then findit (i - 1) (s + 1) else 357 findit i (s + 1) 358 nl = BI.c2w '\n' 359 end = the_s + l 360 findit n the_s 361 362break_before_nth_newline :: 363 Int -> B.ByteString -> (B.ByteString, B.ByteString) 364break_before_nth_newline 0 the_ps 365 | B.null the_ps = (B.empty, B.empty) 366break_before_nth_newline n the_ps 367 = case BI.toForeignPtr the_ps of 368 (fp, the_s, l) -> unsafePerformIO $ 369 withForeignPtr fp $ 370 \ p -> 371 do let findit _ s | s == end = return (the_ps, B.empty) 372 findit i s 373 = do w <- peekElemOff p s 374 if w == nl then 375 if i == 0 then 376 let left_l = s - the_s in 377 return 378 (fromForeignPtr fp the_s left_l, 379 fromForeignPtr fp s (l - left_l)) 380 else findit (i - 1) (s + 1) 381 else findit i (s + 1) 382 nl = BI.c2w '\n' 383 end = the_s + l 384 findit n the_s 385