1{-# LANGUAGE FlexibleContexts #-} 2{-# LANGUAGE Trustworthy #-} 3{-# LANGUAGE DefaultSignatures #-} 4----------------------------------------------------------------------------- 5-- | 6-- Module : Distribution.Compat.Binary.Class 7-- Copyright : Lennart Kolmodin 8-- License : BSD3-style (see LICENSE) 9-- 10-- Maintainer : Lennart Kolmodin <kolmodin@gmail.com> 11-- Stability : unstable 12-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances 13-- 14-- Typeclass and instances for binary serialization. 15-- 16----------------------------------------------------------------------------- 17 18module Distribution.Compat.Binary.Class ( 19 20 -- * The Binary class 21 Binary(..) 22 23 -- * Support for generics 24 , GBinary(..) 25 26 ) where 27 28import Data.Word 29 30import Data.Binary.Put 31import Data.Binary.Get 32 33import Control.Applicative ((<$>), (<*>), (*>)) 34import Foreign 35 36import Data.ByteString.Lazy (ByteString) 37import qualified Data.ByteString.Lazy as L 38 39import Data.Char (chr,ord) 40import Data.List (unfoldr) 41import Data.Foldable (traverse_) 42 43-- And needed for the instances: 44import qualified Data.ByteString as B 45import qualified Data.Map as Map 46import qualified Data.Set as Set 47import qualified Data.IntMap as IntMap 48import qualified Data.IntSet as IntSet 49import qualified Data.Ratio as R 50 51import qualified Data.Tree as T 52 53import Data.Array.Unboxed 54 55import GHC.Generics 56 57import qualified Data.Sequence as Seq 58import qualified Data.Foldable as Fold 59 60------------------------------------------------------------------------ 61 62class GBinary f where 63 gput :: f t -> Put 64 gget :: Get (f t) 65 66-- | The 'Binary' class provides 'put' and 'get', methods to encode and 67-- decode a Haskell value to a lazy 'ByteString'. It mirrors the 'Read' and 68-- 'Show' classes for textual representation of Haskell types, and is 69-- suitable for serialising Haskell values to disk, over the network. 70-- 71-- For decoding and generating simple external binary formats (e.g. C 72-- structures), Binary may be used, but in general is not suitable 73-- for complex protocols. Instead use the 'Put' and 'Get' primitives 74-- directly. 75-- 76-- Instances of Binary should satisfy the following property: 77-- 78-- > decode . encode == id 79-- 80-- That is, the 'get' and 'put' methods should be the inverse of each 81-- other. A range of instances are provided for basic Haskell types. 82-- 83class Binary t where 84 -- | Encode a value in the Put monad. 85 put :: t -> Put 86 -- | Decode a value in the Get monad 87 get :: Get t 88 89 default put :: (Generic t, GBinary (Rep t)) => t -> Put 90 put = gput . from 91 92 default get :: (Generic t, GBinary (Rep t)) => Get t 93 get = to `fmap` gget 94 95------------------------------------------------------------------------ 96-- Simple instances 97 98-- The () type need never be written to disk: values of singleton type 99-- can be reconstructed from the type alone 100instance Binary () where 101 put () = return () 102 get = return () 103 104-- Bools are encoded as a byte in the range 0 .. 1 105instance Binary Bool where 106 put = putWord8 . fromIntegral . fromEnum 107 get = fmap (toEnum . fromIntegral) getWord8 108 109-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2 110instance Binary Ordering where 111 put = putWord8 . fromIntegral . fromEnum 112 get = fmap (toEnum . fromIntegral) getWord8 113 114------------------------------------------------------------------------ 115-- Words and Ints 116 117-- Words8s are written as bytes 118instance Binary Word8 where 119 put = putWord8 120 get = getWord8 121 122-- Words16s are written as 2 bytes in big-endian (network) order 123instance Binary Word16 where 124 put = putWord16be 125 get = getWord16be 126 127-- Words32s are written as 4 bytes in big-endian (network) order 128instance Binary Word32 where 129 put = putWord32be 130 get = getWord32be 131 132-- Words64s are written as 8 bytes in big-endian (network) order 133instance Binary Word64 where 134 put = putWord64be 135 get = getWord64be 136 137-- Int8s are written as a single byte. 138instance Binary Int8 where 139 put i = put (fromIntegral i :: Word8) 140 get = fmap fromIntegral (get :: Get Word8) 141 142-- Int16s are written as a 2 bytes in big endian format 143instance Binary Int16 where 144 put i = put (fromIntegral i :: Word16) 145 get = fmap fromIntegral (get :: Get Word16) 146 147-- Int32s are written as a 4 bytes in big endian format 148instance Binary Int32 where 149 put i = put (fromIntegral i :: Word32) 150 get = fmap fromIntegral (get :: Get Word32) 151 152-- Int64s are written as a 4 bytes in big endian format 153instance Binary Int64 where 154 put i = put (fromIntegral i :: Word64) 155 get = fmap fromIntegral (get :: Get Word64) 156 157------------------------------------------------------------------------ 158 159-- Words are are written as Word64s, that is, 8 bytes in big endian format 160instance Binary Word where 161 put i = put (fromIntegral i :: Word64) 162 get = fmap fromIntegral (get :: Get Word64) 163 164-- Ints are are written as Int64s, that is, 8 bytes in big endian format 165instance Binary Int where 166 put i = put (fromIntegral i :: Int64) 167 get = fmap fromIntegral (get :: Get Int64) 168 169------------------------------------------------------------------------ 170-- 171-- Portable, and pretty efficient, serialisation of Integer 172-- 173 174-- Fixed-size type for a subset of Integer 175type SmallInt = Int32 176 177-- Integers are encoded in two ways: if they fit inside a SmallInt, 178-- they're written as a byte tag, and that value. If the Integer value 179-- is too large to fit in a SmallInt, it is written as a byte array, 180-- along with a sign and length field. 181 182instance Binary Integer where 183 184 {-# INLINE put #-} 185 put n | n >= lo && n <= hi = do 186 putWord8 0 187 put (fromIntegral n :: SmallInt) -- fast path 188 where 189 lo = fromIntegral (minBound :: SmallInt) :: Integer 190 hi = fromIntegral (maxBound :: SmallInt) :: Integer 191 192 put n = do 193 putWord8 1 194 put sign 195 put (unroll (abs n)) -- unroll the bytes 196 where 197 sign = fromIntegral (signum n) :: Word8 198 199 {-# INLINE get #-} 200 get = do 201 tag <- get :: Get Word8 202 case tag of 203 0 -> fmap fromIntegral (get :: Get SmallInt) 204 _ -> do sign <- get 205 bytes <- get 206 let v = roll bytes 207 return $! if sign == (1 :: Word8) then v else - v 208 209-- 210-- Fold and unfold an Integer to and from a list of its bytes 211-- 212unroll :: Integer -> [Word8] 213unroll = unfoldr step 214 where 215 step 0 = Nothing 216 step i = Just (fromIntegral i, i `shiftR` 8) 217 218roll :: [Word8] -> Integer 219roll = foldr unstep 0 220 where 221 unstep b a = a `shiftL` 8 .|. fromIntegral b 222 223{- 224 225-- 226-- An efficient, raw serialisation for Integer (GHC only) 227-- 228 229-- TODO This instance is not architecture portable. GMP stores numbers as 230-- arrays of machine sized words, so the byte format is not portable across 231-- architectures with different endianness and word size. 232 233import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy) 234import GHC.Base hiding (ord, chr) 235import GHC.Prim 236import GHC.Ptr (Ptr(..)) 237import GHC.IOBase (IO(..)) 238 239instance Binary Integer where 240 put (S# i) = putWord8 0 *> put (I# i) 241 put (J# s ba) = do 242 putWord8 1 243 put (I# s) 244 put (BA ba) 245 246 get = do 247 b <- getWord8 248 case b of 249 0 -> do (I# i#) <- get 250 return (S# i#) 251 _ -> do (I# s#) <- get 252 (BA a#) <- get 253 return (J# s# a#) 254 255instance Binary ByteArray where 256 257 -- Pretty safe. 258 put (BA ba) = 259 let sz = sizeofByteArray# ba -- (primitive) in *bytes* 260 addr = byteArrayContents# ba 261 bs = unsafePackAddress (I# sz) addr 262 in put bs -- write as a ByteString. easy, yay! 263 264 -- Pretty scary. Should be quick though 265 get = do 266 (fp, off, n@(I# sz)) <- fmap toForeignPtr get -- so decode a ByteString 267 assert (off == 0) $ return $ unsafePerformIO $ do 268 (MBA arr) <- newByteArray sz -- and copy it into a ByteArray# 269 let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe? 270 withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n) 271 freezeByteArray arr 272 273-- wrapper for ByteArray# 274data ByteArray = BA {-# UNPACK #-} !ByteArray# 275data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld) 276 277newByteArray :: Int# -> IO MBA 278newByteArray sz = IO $ \s -> 279 case newPinnedByteArray# sz s of { (# s', arr #) -> 280 (# s', MBA arr #) } 281 282freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray 283freezeByteArray arr = IO $ \s -> 284 case unsafeFreezeByteArray# arr s of { (# s', arr' #) -> 285 (# s', BA arr' #) } 286 287-} 288 289instance (Binary a,Integral a) => Binary (R.Ratio a) where 290 put r = put (R.numerator r) *> put (R.denominator r) 291 get = (R.%) <$> get <*> get 292 293------------------------------------------------------------------------ 294 295-- Char is serialised as UTF-8 296instance Binary Char where 297 put a | c <= 0x7f = put (fromIntegral c :: Word8) 298 | c <= 0x7ff = do put (0xc0 .|. y) 299 put (0x80 .|. z) 300 | c <= 0xffff = do put (0xe0 .|. x) 301 put (0x80 .|. y) 302 put (0x80 .|. z) 303 | c <= 0x10ffff = do put (0xf0 .|. w) 304 put (0x80 .|. x) 305 put (0x80 .|. y) 306 put (0x80 .|. z) 307 | otherwise = error "Not a valid Unicode code point" 308 where 309 c = ord a 310 z, y, x, w :: Word8 311 z = fromIntegral (c .&. 0x3f) 312 y = fromIntegral (shiftR c 6 .&. 0x3f) 313 x = fromIntegral (shiftR c 12 .&. 0x3f) 314 w = fromIntegral (shiftR c 18 .&. 0x7) 315 316 get = do 317 let getByte = fmap (fromIntegral :: Word8 -> Int) get 318 shiftL6 = flip shiftL 6 :: Int -> Int 319 w <- getByte 320 r <- case () of 321 _ | w < 0x80 -> return w 322 | w < 0xe0 -> do 323 x <- fmap (xor 0x80) getByte 324 return (x .|. shiftL6 (xor 0xc0 w)) 325 | w < 0xf0 -> do 326 x <- fmap (xor 0x80) getByte 327 y <- fmap (xor 0x80) getByte 328 return (y .|. shiftL6 (x .|. shiftL6 329 (xor 0xe0 w))) 330 | otherwise -> do 331 x <- fmap (xor 0x80) getByte 332 y <- fmap (xor 0x80) getByte 333 z <- fmap (xor 0x80) getByte 334 return (z .|. shiftL6 (y .|. shiftL6 335 (x .|. shiftL6 (xor 0xf0 w)))) 336 return $! chr r 337 338------------------------------------------------------------------------ 339-- Instances for the first few tuples 340 341instance (Binary a, Binary b) => Binary (a,b) where 342 put (a,b) = put a *> put b 343 get = (,) <$> get <*> get 344 345instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where 346 put (a,b,c) = put a *> put b *> put c 347 get = (,,) <$> get <*> get <*> get 348 349instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where 350 put (a,b,c,d) = put a *> put b *> put c *> put d 351 get = (,,,) <$> get <*> get <*> get <*> get 352 353instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where 354 put (a,b,c,d,e) = put a *> put b *> put c *> put d *> put e 355 get = (,,,,) <$> get <*> get <*> get <*> get <*> get 356 357-- 358-- and now just recurse: 359-- 360 361instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f) 362 => Binary (a,b,c,d,e,f) where 363 put (a,b,c,d,e,f) = put (a,(b,c,d,e,f)) 364 get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f) 365 366instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g) 367 => Binary (a,b,c,d,e,f,g) where 368 put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g)) 369 get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g) 370 371instance (Binary a, Binary b, Binary c, Binary d, Binary e, 372 Binary f, Binary g, Binary h) 373 => Binary (a,b,c,d,e,f,g,h) where 374 put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h)) 375 get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h) 376 377instance (Binary a, Binary b, Binary c, Binary d, Binary e, 378 Binary f, Binary g, Binary h, Binary i) 379 => Binary (a,b,c,d,e,f,g,h,i) where 380 put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i)) 381 get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i) 382 383instance (Binary a, Binary b, Binary c, Binary d, Binary e, 384 Binary f, Binary g, Binary h, Binary i, Binary j) 385 => Binary (a,b,c,d,e,f,g,h,i,j) where 386 put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j)) 387 get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j) 388 389------------------------------------------------------------------------ 390-- Container types 391 392instance Binary a => Binary [a] where 393 put l = put (length l) *> traverse_ put l 394 get = do n <- get :: Get Int 395 getMany n 396 397-- | 'getMany n' get 'n' elements in order, without blowing the stack. 398getMany :: Binary a => Int -> Get [a] 399getMany n = go [] n 400 where 401 go xs 0 = return $! reverse xs 402 go xs i = do x <- get 403 -- we must seq x to avoid stack overflows due to laziness in 404 -- (>>=) 405 x `seq` go (x:xs) (i-1) 406{-# INLINE getMany #-} 407 408instance (Binary a) => Binary (Maybe a) where 409 put Nothing = putWord8 0 410 put (Just x) = putWord8 1 *> put x 411 get = do 412 w <- getWord8 413 case w of 414 0 -> return Nothing 415 _ -> fmap Just get 416 417instance (Binary a, Binary b) => Binary (Either a b) where 418 put (Left a) = putWord8 0 *> put a 419 put (Right b) = putWord8 1 *> put b 420 get = do 421 w <- getWord8 422 case w of 423 0 -> fmap Left get 424 _ -> fmap Right get 425 426------------------------------------------------------------------------ 427-- ByteStrings (have specially efficient instances) 428 429instance Binary B.ByteString where 430 put bs = do put (B.length bs) 431 putByteString bs 432 get = get >>= getByteString 433 434-- 435-- Using old versions of fps, this is a type synonym, and non portable 436-- 437-- Requires 'flexible instances' 438-- 439instance Binary ByteString where 440 put bs = do put (fromIntegral (L.length bs) :: Int) 441 putLazyByteString bs 442 get = get >>= getLazyByteString 443 444------------------------------------------------------------------------ 445-- Maps and Sets 446 447instance (Binary a) => Binary (Set.Set a) where 448 put s = put (Set.size s) *> traverse_ put (Set.toAscList s) 449 get = fmap Set.fromDistinctAscList get 450 451instance (Binary k, Binary e) => Binary (Map.Map k e) where 452 put m = put (Map.size m) *> traverse_ put (Map.toAscList m) 453 get = fmap Map.fromDistinctAscList get 454 455instance Binary IntSet.IntSet where 456 put s = put (IntSet.size s) *> traverse_ put (IntSet.toAscList s) 457 get = fmap IntSet.fromDistinctAscList get 458 459instance (Binary e) => Binary (IntMap.IntMap e) where 460 put m = put (IntMap.size m) *> traverse_ put (IntMap.toAscList m) 461 get = fmap IntMap.fromDistinctAscList get 462 463------------------------------------------------------------------------ 464-- Queues and Sequences 465 466instance (Binary e) => Binary (Seq.Seq e) where 467 put s = put (Seq.length s) *> Fold.traverse_ put s 468 get = do n <- get :: Get Int 469 rep Seq.empty n get 470 where rep xs 0 _ = return $! xs 471 rep xs n g = xs `seq` n `seq` do 472 x <- g 473 rep (xs Seq.|> x) (n-1) g 474 475------------------------------------------------------------------------ 476-- Floating point 477 478instance Binary Double where 479 put d = put (decodeFloat d) 480 get = encodeFloat <$> get <*> get 481 482instance Binary Float where 483 put f = put (decodeFloat f) 484 get = encodeFloat <$> get <*> get 485 486------------------------------------------------------------------------ 487-- Trees 488 489instance (Binary e) => Binary (T.Tree e) where 490 put (T.Node r s) = put r *> put s 491 get = T.Node <$> get <*> get 492 493------------------------------------------------------------------------ 494-- Arrays 495 496instance (Binary i, Ix i, Binary e) => Binary (Array i e) where 497 put a = do 498 put (bounds a) 499 put (rangeSize $ bounds a) -- write the length 500 traverse_ put (elems a) -- now the elems. 501 get = do 502 bs <- get 503 n <- get -- read the length 504 xs <- getMany n -- now the elems. 505 return (listArray bs xs) 506 507-- 508-- The IArray UArray e constraint is non portable. Requires flexible instances 509-- 510instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where 511 put a = do 512 put (bounds a) 513 put (rangeSize $ bounds a) -- now write the length 514 traverse_ put (elems a) 515 get = do 516 bs <- get 517 n <- get 518 xs <- getMany n 519 return (listArray bs xs) 520