1{-# LANGUAGE FlexibleContexts #-}
2{-# LANGUAGE Trustworthy #-}
3{-# LANGUAGE DefaultSignatures #-}
4-----------------------------------------------------------------------------
5-- |
6-- Module      : Distribution.Compat.Binary.Class
7-- Copyright   : Lennart Kolmodin
8-- License     : BSD3-style (see LICENSE)
9--
10-- Maintainer  : Lennart Kolmodin <kolmodin@gmail.com>
11-- Stability   : unstable
12-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
13--
14-- Typeclass and instances for binary serialization.
15--
16-----------------------------------------------------------------------------
17
18module Distribution.Compat.Binary.Class (
19
20    -- * The Binary class
21      Binary(..)
22
23    -- * Support for generics
24    , GBinary(..)
25
26    ) where
27
28import Data.Word
29
30import Data.Binary.Put
31import Data.Binary.Get
32
33import Control.Applicative ((<$>), (<*>), (*>))
34import Foreign
35
36import Data.ByteString.Lazy (ByteString)
37import qualified Data.ByteString.Lazy as L
38
39import Data.Char     (chr,ord)
40import Data.List     (unfoldr)
41import Data.Foldable (traverse_)
42
43-- And needed for the instances:
44import qualified Data.ByteString as B
45import qualified Data.Map        as Map
46import qualified Data.Set        as Set
47import qualified Data.IntMap     as IntMap
48import qualified Data.IntSet     as IntSet
49import qualified Data.Ratio      as R
50
51import qualified Data.Tree as T
52
53import Data.Array.Unboxed
54
55import GHC.Generics
56
57import qualified Data.Sequence as Seq
58import qualified Data.Foldable as Fold
59
60------------------------------------------------------------------------
61
62class GBinary f where
63    gput :: f t -> Put
64    gget :: Get (f t)
65
66-- | The 'Binary' class provides 'put' and 'get', methods to encode and
67-- decode a Haskell value to a lazy 'ByteString'. It mirrors the 'Read' and
68-- 'Show' classes for textual representation of Haskell types, and is
69-- suitable for serialising Haskell values to disk, over the network.
70--
71-- For decoding and generating simple external binary formats (e.g. C
72-- structures), Binary may be used, but in general is not suitable
73-- for complex protocols. Instead use the 'Put' and 'Get' primitives
74-- directly.
75--
76-- Instances of Binary should satisfy the following property:
77--
78-- > decode . encode == id
79--
80-- That is, the 'get' and 'put' methods should be the inverse of each
81-- other. A range of instances are provided for basic Haskell types.
82--
83class Binary t where
84    -- | Encode a value in the Put monad.
85    put :: t -> Put
86    -- | Decode a value in the Get monad
87    get :: Get t
88
89    default put :: (Generic t, GBinary (Rep t)) => t -> Put
90    put = gput . from
91
92    default get :: (Generic t, GBinary (Rep t)) => Get t
93    get = to `fmap` gget
94
95------------------------------------------------------------------------
96-- Simple instances
97
98-- The () type need never be written to disk: values of singleton type
99-- can be reconstructed from the type alone
100instance Binary () where
101    put ()  = return ()
102    get     = return ()
103
104-- Bools are encoded as a byte in the range 0 .. 1
105instance Binary Bool where
106    put     = putWord8 . fromIntegral . fromEnum
107    get     = fmap (toEnum . fromIntegral) getWord8
108
109-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
110instance Binary Ordering where
111    put     = putWord8 . fromIntegral . fromEnum
112    get     = fmap (toEnum . fromIntegral) getWord8
113
114------------------------------------------------------------------------
115-- Words and Ints
116
117-- Words8s are written as bytes
118instance Binary Word8 where
119    put     = putWord8
120    get     = getWord8
121
122-- Words16s are written as 2 bytes in big-endian (network) order
123instance Binary Word16 where
124    put     = putWord16be
125    get     = getWord16be
126
127-- Words32s are written as 4 bytes in big-endian (network) order
128instance Binary Word32 where
129    put     = putWord32be
130    get     = getWord32be
131
132-- Words64s are written as 8 bytes in big-endian (network) order
133instance Binary Word64 where
134    put     = putWord64be
135    get     = getWord64be
136
137-- Int8s are written as a single byte.
138instance Binary Int8 where
139    put i   = put (fromIntegral i :: Word8)
140    get     = fmap fromIntegral (get :: Get Word8)
141
142-- Int16s are written as a 2 bytes in big endian format
143instance Binary Int16 where
144    put i   = put (fromIntegral i :: Word16)
145    get     = fmap fromIntegral (get :: Get Word16)
146
147-- Int32s are written as a 4 bytes in big endian format
148instance Binary Int32 where
149    put i   = put (fromIntegral i :: Word32)
150    get     = fmap fromIntegral (get :: Get Word32)
151
152-- Int64s are written as a 4 bytes in big endian format
153instance Binary Int64 where
154    put i   = put (fromIntegral i :: Word64)
155    get     = fmap fromIntegral (get :: Get Word64)
156
157------------------------------------------------------------------------
158
159-- Words are are written as Word64s, that is, 8 bytes in big endian format
160instance Binary Word where
161    put i   = put (fromIntegral i :: Word64)
162    get     = fmap fromIntegral (get :: Get Word64)
163
164-- Ints are are written as Int64s, that is, 8 bytes in big endian format
165instance Binary Int where
166    put i   = put (fromIntegral i :: Int64)
167    get     = fmap fromIntegral (get :: Get Int64)
168
169------------------------------------------------------------------------
170--
171-- Portable, and pretty efficient, serialisation of Integer
172--
173
174-- Fixed-size type for a subset of Integer
175type SmallInt = Int32
176
177-- Integers are encoded in two ways: if they fit inside a SmallInt,
178-- they're written as a byte tag, and that value.  If the Integer value
179-- is too large to fit in a SmallInt, it is written as a byte array,
180-- along with a sign and length field.
181
182instance Binary Integer where
183
184    {-# INLINE put #-}
185    put n | n >= lo && n <= hi = do
186        putWord8 0
187        put (fromIntegral n :: SmallInt)  -- fast path
188     where
189        lo = fromIntegral (minBound :: SmallInt) :: Integer
190        hi = fromIntegral (maxBound :: SmallInt) :: Integer
191
192    put n = do
193        putWord8 1
194        put sign
195        put (unroll (abs n))         -- unroll the bytes
196     where
197        sign = fromIntegral (signum n) :: Word8
198
199    {-# INLINE get #-}
200    get = do
201        tag <- get :: Get Word8
202        case tag of
203            0 -> fmap fromIntegral (get :: Get SmallInt)
204            _ -> do sign  <- get
205                    bytes <- get
206                    let v = roll bytes
207                    return $! if sign == (1 :: Word8) then v else - v
208
209--
210-- Fold and unfold an Integer to and from a list of its bytes
211--
212unroll :: Integer -> [Word8]
213unroll = unfoldr step
214  where
215    step 0 = Nothing
216    step i = Just (fromIntegral i, i `shiftR` 8)
217
218roll :: [Word8] -> Integer
219roll   = foldr unstep 0
220  where
221    unstep b a = a `shiftL` 8 .|. fromIntegral b
222
223{-
224
225--
226-- An efficient, raw serialisation for Integer (GHC only)
227--
228
229-- TODO  This instance is not architecture portable.  GMP stores numbers as
230-- arrays of machine sized words, so the byte format is not portable across
231-- architectures with different endianness and word size.
232
233import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
234import GHC.Base     hiding (ord, chr)
235import GHC.Prim
236import GHC.Ptr (Ptr(..))
237import GHC.IOBase (IO(..))
238
239instance Binary Integer where
240    put (S# i)    = putWord8 0 *> put (I# i)
241    put (J# s ba) = do
242        putWord8 1
243        put (I# s)
244        put (BA ba)
245
246    get = do
247        b <- getWord8
248        case b of
249            0 -> do (I# i#) <- get
250                    return (S# i#)
251            _ -> do (I# s#) <- get
252                    (BA a#) <- get
253                    return (J# s# a#)
254
255instance Binary ByteArray where
256
257    -- Pretty safe.
258    put (BA ba) =
259        let sz   = sizeofByteArray# ba   -- (primitive) in *bytes*
260            addr = byteArrayContents# ba
261            bs   = unsafePackAddress (I# sz) addr
262        in put bs   -- write as a ByteString. easy, yay!
263
264    -- Pretty scary. Should be quick though
265    get = do
266        (fp, off, n@(I# sz)) <- fmap toForeignPtr get      -- so decode a ByteString
267        assert (off == 0) $ return $ unsafePerformIO $ do
268            (MBA arr) <- newByteArray sz                    -- and copy it into a ByteArray#
269            let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
270            withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
271            freezeByteArray arr
272
273-- wrapper for ByteArray#
274data ByteArray = BA  {-# UNPACK #-} !ByteArray#
275data MBA       = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
276
277newByteArray :: Int# -> IO MBA
278newByteArray sz = IO $ \s ->
279  case newPinnedByteArray# sz s of { (# s', arr #) ->
280  (# s', MBA arr #) }
281
282freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
283freezeByteArray arr = IO $ \s ->
284  case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
285  (# s', BA arr' #) }
286
287-}
288
289instance (Binary a,Integral a) => Binary (R.Ratio a) where
290    put r = put (R.numerator r) *> put (R.denominator r)
291    get = (R.%) <$> get <*> get
292
293------------------------------------------------------------------------
294
295-- Char is serialised as UTF-8
296instance Binary Char where
297    put a | c <= 0x7f     = put (fromIntegral c :: Word8)
298          | c <= 0x7ff    = do put (0xc0 .|. y)
299                               put (0x80 .|. z)
300          | c <= 0xffff   = do put (0xe0 .|. x)
301                               put (0x80 .|. y)
302                               put (0x80 .|. z)
303          | c <= 0x10ffff = do put (0xf0 .|. w)
304                               put (0x80 .|. x)
305                               put (0x80 .|. y)
306                               put (0x80 .|. z)
307          | otherwise     = error "Not a valid Unicode code point"
308     where
309        c = ord a
310        z, y, x, w :: Word8
311        z = fromIntegral (c           .&. 0x3f)
312        y = fromIntegral (shiftR c 6  .&. 0x3f)
313        x = fromIntegral (shiftR c 12 .&. 0x3f)
314        w = fromIntegral (shiftR c 18 .&. 0x7)
315
316    get = do
317        let getByte = fmap (fromIntegral :: Word8 -> Int) get
318            shiftL6 = flip shiftL 6 :: Int -> Int
319        w <- getByte
320        r <- case () of
321                _ | w < 0x80  -> return w
322                  | w < 0xe0  -> do
323                                    x <- fmap (xor 0x80) getByte
324                                    return (x .|. shiftL6 (xor 0xc0 w))
325                  | w < 0xf0  -> do
326                                    x <- fmap (xor 0x80) getByte
327                                    y <- fmap (xor 0x80) getByte
328                                    return (y .|. shiftL6 (x .|. shiftL6
329                                            (xor 0xe0 w)))
330                  | otherwise -> do
331                                x <- fmap (xor 0x80) getByte
332                                y <- fmap (xor 0x80) getByte
333                                z <- fmap (xor 0x80) getByte
334                                return (z .|. shiftL6 (y .|. shiftL6
335                                        (x .|. shiftL6 (xor 0xf0 w))))
336        return $! chr r
337
338------------------------------------------------------------------------
339-- Instances for the first few tuples
340
341instance (Binary a, Binary b) => Binary (a,b) where
342    put (a,b)           = put a *> put b
343    get                 = (,) <$> get <*> get
344
345instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
346    put (a,b,c)         = put a *> put b *> put c
347    get                 = (,,) <$> get <*> get <*> get
348
349instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
350    put (a,b,c,d)       = put a *> put b *> put c *> put d
351    get                 = (,,,) <$> get <*> get <*> get <*> get
352
353instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
354    put (a,b,c,d,e)     = put a *> put b *> put c *> put d *> put e
355    get                 = (,,,,) <$> get <*> get <*> get <*> get <*> get
356
357--
358-- and now just recurse:
359--
360
361instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
362        => Binary (a,b,c,d,e,f) where
363    put (a,b,c,d,e,f)   = put (a,(b,c,d,e,f))
364    get                 = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
365
366instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
367        => Binary (a,b,c,d,e,f,g) where
368    put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
369    get                 = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
370
371instance (Binary a, Binary b, Binary c, Binary d, Binary e,
372          Binary f, Binary g, Binary h)
373        => Binary (a,b,c,d,e,f,g,h) where
374    put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
375    get                   = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
376
377instance (Binary a, Binary b, Binary c, Binary d, Binary e,
378          Binary f, Binary g, Binary h, Binary i)
379        => Binary (a,b,c,d,e,f,g,h,i) where
380    put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
381    get                     = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
382
383instance (Binary a, Binary b, Binary c, Binary d, Binary e,
384          Binary f, Binary g, Binary h, Binary i, Binary j)
385        => Binary (a,b,c,d,e,f,g,h,i,j) where
386    put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
387    get                       = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
388
389------------------------------------------------------------------------
390-- Container types
391
392instance Binary a => Binary [a] where
393    put l  = put (length l) *> traverse_ put l
394    get    = do n <- get :: Get Int
395                getMany n
396
397-- | 'getMany n' get 'n' elements in order, without blowing the stack.
398getMany :: Binary a => Int -> Get [a]
399getMany n = go [] n
400 where
401    go xs 0 = return $! reverse xs
402    go xs i = do x <- get
403                 -- we must seq x to avoid stack overflows due to laziness in
404                 -- (>>=)
405                 x `seq` go (x:xs) (i-1)
406{-# INLINE getMany #-}
407
408instance (Binary a) => Binary (Maybe a) where
409    put Nothing  = putWord8 0
410    put (Just x) = putWord8 1 *> put x
411    get = do
412        w <- getWord8
413        case w of
414            0 -> return Nothing
415            _ -> fmap Just get
416
417instance (Binary a, Binary b) => Binary (Either a b) where
418    put (Left  a) = putWord8 0 *> put a
419    put (Right b) = putWord8 1 *> put b
420    get = do
421        w <- getWord8
422        case w of
423            0 -> fmap Left  get
424            _ -> fmap Right get
425
426------------------------------------------------------------------------
427-- ByteStrings (have specially efficient instances)
428
429instance Binary B.ByteString where
430    put bs = do put (B.length bs)
431                putByteString bs
432    get    = get >>= getByteString
433
434--
435-- Using old versions of fps, this is a type synonym, and non portable
436--
437-- Requires 'flexible instances'
438--
439instance Binary ByteString where
440    put bs = do put (fromIntegral (L.length bs) :: Int)
441                putLazyByteString bs
442    get    = get >>= getLazyByteString
443
444------------------------------------------------------------------------
445-- Maps and Sets
446
447instance (Binary a) => Binary (Set.Set a) where
448    put s = put (Set.size s) *> traverse_ put (Set.toAscList s)
449    get   = fmap Set.fromDistinctAscList get
450
451instance (Binary k, Binary e) => Binary (Map.Map k e) where
452    put m = put (Map.size m) *> traverse_ put (Map.toAscList m)
453    get   = fmap Map.fromDistinctAscList get
454
455instance Binary IntSet.IntSet where
456    put s = put (IntSet.size s) *> traverse_ put (IntSet.toAscList s)
457    get   = fmap IntSet.fromDistinctAscList get
458
459instance (Binary e) => Binary (IntMap.IntMap e) where
460    put m = put (IntMap.size m) *> traverse_ put (IntMap.toAscList m)
461    get   = fmap IntMap.fromDistinctAscList get
462
463------------------------------------------------------------------------
464-- Queues and Sequences
465
466instance (Binary e) => Binary (Seq.Seq e) where
467    put s = put (Seq.length s) *> Fold.traverse_ put s
468    get = do n <- get :: Get Int
469             rep Seq.empty n get
470      where rep xs 0 _ = return $! xs
471            rep xs n g = xs `seq` n `seq` do
472                           x <- g
473                           rep (xs Seq.|> x) (n-1) g
474
475------------------------------------------------------------------------
476-- Floating point
477
478instance Binary Double where
479    put d = put (decodeFloat d)
480    get   = encodeFloat <$> get <*> get
481
482instance Binary Float where
483    put f = put (decodeFloat f)
484    get   = encodeFloat <$> get <*> get
485
486------------------------------------------------------------------------
487-- Trees
488
489instance (Binary e) => Binary (T.Tree e) where
490    put (T.Node r s) = put r *> put s
491    get = T.Node <$> get <*> get
492
493------------------------------------------------------------------------
494-- Arrays
495
496instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
497    put a = do
498        put (bounds a)
499        put (rangeSize $ bounds a) -- write the length
500        traverse_ put (elems a)        -- now the elems.
501    get = do
502        bs <- get
503        n  <- get                  -- read the length
504        xs <- getMany n            -- now the elems.
505        return (listArray bs xs)
506
507--
508-- The IArray UArray e constraint is non portable. Requires flexible instances
509--
510instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
511    put a = do
512        put (bounds a)
513        put (rangeSize $ bounds a) -- now write the length
514        traverse_ put (elems a)
515    get = do
516        bs <- get
517        n  <- get
518        xs <- getMany n
519        return (listArray bs xs)
520