1{-# LANGUAGE CPP, BangPatterns #-}
2-----------------------------------------------------------------------------
3-- |
4-- Module      : Codec.CBOR
5-- Copyright   : 2013 Simon Meier <iridcode@gmail.com>,
6--               2013-2014 Duncan Coutts,
7-- License     : BSD3-style (see LICENSE.txt)
8--
9-- Maintainer  : Duncan Coutts
10-- Stability   :
11-- Portability : portable
12--
13-- CBOR format support.
14--
15-----------------------------------------------------------------------------
16
17module Tests.Reference.Implementation (
18    serialise,
19    deserialise,
20
21    Term(..),
22    Token(..),
23    canonicaliseTerm,
24    isCanonicalTerm,
25
26    UInt(..),
27    fromUInt,
28    toUInt,
29    canonicaliseUInt,
30
31    Simple(..),
32    fromSimple,
33    toSimple,
34    reservedSimple,
35    unassignedSimple,
36    reservedTag,
37
38    Decoder,
39    runDecoder,
40    testDecode,
41
42    decodeTerm,
43    decodeTokens,
44    decodeToken,
45    decodeTagged,
46
47    diagnosticNotation,
48
49    Encoder,
50    encodeTerm,
51    encodeToken,
52
53    prop_InitialByte,
54    prop_AdditionalInfo,
55    prop_TokenHeader,
56    prop_TokenHeader2,
57    prop_Token,
58    prop_Term,
59
60    -- properties of internal helpers
61    prop_integerToFromBytes,
62    prop_word16ToFromNet,
63    prop_word32ToFromNet,
64    prop_word64ToFromNet,
65    prop_halfToFromFloat,
66    ) where
67
68
69import qualified Control.Monad.Fail as Fail
70import           Data.Bits
71import           Data.Word
72import qualified Numeric.Half as Half
73import           Data.List
74import           Numeric
75import           GHC.Float (float2Double)
76import qualified Data.ByteString      as BS
77import qualified Data.ByteString.Lazy as LBS
78import qualified Data.Text as T
79import qualified Data.Text.Encoding as T
80import           Data.Monoid ((<>))
81import           Control.Monad (ap)
82
83import           Test.QuickCheck.Arbitrary
84import           Test.QuickCheck.Gen
85
86#if !MIN_VERSION_base(4,8,0)
87import           Data.Monoid (Monoid(..))
88import           Control.Applicative
89#endif
90
91import           Tests.Reference.Generators
92
93
94serialise :: Term -> LBS.ByteString
95serialise = LBS.pack . encodeTerm
96
97deserialise :: LBS.ByteString -> Term
98deserialise bytes =
99    case runDecoder decodeTerm (LBS.unpack bytes) of
100      Just (term, []) -> term
101      Just _          -> error "ReferenceImpl.deserialise: trailing data"
102      Nothing         -> error "ReferenceImpl.deserialise: decoding failed"
103
104
105------------------------------------------------------------------------
106
107newtype Decoder a = Decoder { runDecoder :: [Word8] -> Maybe (a, [Word8]) }
108
109instance Functor Decoder where
110  fmap f a = a >>= return . f
111
112instance Applicative Decoder where
113  pure  = return
114  (<*>) = ap
115
116instance Monad Decoder where
117  return x = Decoder (\ws -> Just (x, ws))
118  d >>= f  = Decoder (\ws -> case runDecoder d ws of
119                               Nothing       -> Nothing
120                               Just (x, ws') -> runDecoder (f x) ws')
121#if !MIN_VERSION_base(4,13,0)
122  fail = Fail.fail
123#endif
124
125instance Fail.MonadFail Decoder where
126  fail _   = Decoder (\_ -> Nothing)
127
128getByte :: Decoder Word8
129getByte =
130  Decoder $ \ws ->
131    case ws of
132      w:ws' -> Just (w, ws')
133      _     -> Nothing
134
135getBytes :: Integral n => n -> Decoder [Word8]
136getBytes n =
137  Decoder $ \ws ->
138    case genericSplitAt n ws of
139      (ws', [])   | genericLength ws' == n -> Just (ws', [])
140                  | otherwise              -> Nothing
141      (ws', ws'')                          -> Just (ws', ws'')
142
143eof :: Decoder Bool
144eof = Decoder $ \ws -> Just (null ws, ws)
145
146type Encoder a = a -> [Word8]
147
148-- The initial byte of each data item contains both information about
149-- the major type (the high-order 3 bits, described in Section 2.1) and
150-- additional information (the low-order 5 bits).
151
152data MajorType = MajorType0 | MajorType1 | MajorType2 | MajorType3
153               | MajorType4 | MajorType5 | MajorType6 | MajorType7
154  deriving (Show, Eq, Ord, Enum)
155
156instance Arbitrary MajorType where
157  arbitrary = elements [MajorType0 .. MajorType7]
158
159encodeInitialByte :: MajorType -> Word -> Word8
160encodeInitialByte mt ai
161  | ai < 2^(5 :: Int)
162  = fromIntegral (fromIntegral (fromEnum mt) `shiftL` 5 .|. ai)
163
164  | otherwise
165  = error "encodeInitialByte: invalid additional info value"
166
167decodeInitialByte :: Word8 -> (MajorType, Word)
168decodeInitialByte ib = ( toEnum $ fromIntegral $ ib `shiftR` 5
169                       , fromIntegral $ ib .&. 0x1f)
170
171prop_InitialByte :: Bool
172prop_InitialByte =
173    and [ (uncurry encodeInitialByte . decodeInitialByte) w8 == w8
174        | w8 <- [minBound..maxBound] ]
175
176-- When the value of the
177-- additional information is less than 24, it is directly used as a
178-- small unsigned integer.  When it is 24 to 27, the additional bytes
179-- for a variable-length integer immediately follow; the values 24 to 27
180-- of the additional information specify that its length is a 1-, 2-,
181-- 4-, or 8-byte unsigned integer, respectively.  Additional information
182-- value 31 is used for indefinite-length items, described in
183-- Section 2.2.  Additional information values 28 to 30 are reserved for
184-- future expansion.
185--
186-- In all additional information values, the resulting integer is
187-- interpreted depending on the major type.  It may represent the actual
188-- data: for example, in integer types, the resulting integer is used
189-- for the value itself.  It may instead supply length information: for
190-- example, in byte strings it gives the length of the byte string data
191-- that follows.
192
193data UInt =
194       UIntSmall Word
195     | UInt8     Word8
196     | UInt16    Word16
197     | UInt32    Word32
198     | UInt64    Word64
199  deriving (Eq, Show)
200
201data AdditionalInformation =
202       AiValue    UInt
203     | AiIndefLen
204     | AiReserved Word
205  deriving (Eq, Show)
206
207instance Arbitrary UInt where
208  arbitrary =
209    sized $ \n ->
210      oneof $ take (1 + n `div` 2)
211        [ UIntSmall <$> choose (0, 23)
212        , UInt8     <$> arbitraryBoundedIntegral
213        , UInt16    <$> arbitraryBoundedIntegral
214        , UInt32    <$> arbitraryBoundedIntegral
215        , UInt64    <$> arbitraryBoundedIntegral
216        ]
217  shrink (UIntSmall n) = [ UIntSmall n' | n' <- shrink n ]
218  shrink (UInt8  n)    = [ UInt8  n'    | n' <- shrink n ]
219                      ++ [ UIntSmall (fromIntegral n) | n <= 23 ]
220  shrink (UInt16 n)    = [ UInt16 n'    | n' <- shrink n ]
221                      ++ [ UInt8 (fromIntegral n)
222                         | n <= fromIntegral (maxBound :: Word8) ]
223  shrink (UInt32 n)    = [ UInt32 n'    | n' <- shrink n ]
224                      ++ [ UInt16 (fromIntegral n)
225                         | n <= fromIntegral (maxBound :: Word16) ]
226  shrink (UInt64 n)    = [ UInt64 n'    | n' <- shrink n ]
227                      ++ [ UInt32 (fromIntegral n)
228                         | n <= fromIntegral (maxBound :: Word32) ]
229
230instance Arbitrary AdditionalInformation where
231  arbitrary =
232    frequency
233      [ (7, AiValue <$> arbitrary)
234      , (2, pure AiIndefLen)
235      , (1, AiReserved <$> choose (28, 30))
236      ]
237
238decodeAdditionalInfo :: Word -> Decoder AdditionalInformation
239decodeAdditionalInfo = dec
240  where
241    dec n
242      | n < 24 = return (AiValue (UIntSmall n))
243    dec 24     = do w <- getByte
244                    return (AiValue (UInt8 w))
245    dec 25     = do [w1,w0] <- getBytes (2 :: Int)
246                    let w = word16FromNet w1 w0
247                    return (AiValue (UInt16 w))
248    dec 26     = do [w3,w2,w1,w0] <- getBytes (4 :: Int)
249                    let w = word32FromNet w3 w2 w1 w0
250                    return (AiValue (UInt32 w))
251    dec 27     = do [w7,w6,w5,w4,w3,w2,w1,w0] <- getBytes (8 :: Int)
252                    let w = word64FromNet w7 w6 w5 w4 w3 w2 w1 w0
253                    return (AiValue (UInt64 w))
254    dec 31     = return AiIndefLen
255    dec n
256      | n < 31 = return (AiReserved n)
257    dec _      = fail ""
258
259encodeAdditionalInfo :: AdditionalInformation -> (Word, [Word8])
260encodeAdditionalInfo = enc
261  where
262    enc (AiValue (UIntSmall n))
263      | n < 24               = (n, [])
264      | otherwise            = error "invalid UIntSmall value"
265    enc (AiValue (UInt8  w)) = (24, [w])
266    enc (AiValue (UInt16 w)) = (25, [w1, w0])
267                               where (w1, w0) = word16ToNet w
268    enc (AiValue (UInt32 w)) = (26, [w3, w2, w1, w0])
269                               where (w3, w2, w1, w0) = word32ToNet w
270    enc (AiValue (UInt64 w)) = (27, [w7, w6, w5, w4,
271                                     w3, w2, w1, w0])
272                               where (w7, w6, w5, w4,
273                                      w3, w2, w1, w0) = word64ToNet w
274    enc  AiIndefLen          = (31, [])
275    enc (AiReserved n)
276      | n >= 28 && n < 31    = (n,  [])
277      | otherwise            = error "invalid AiReserved value"
278
279prop_AdditionalInfo :: AdditionalInformation -> Bool
280prop_AdditionalInfo ai =
281    let (w, ws) = encodeAdditionalInfo ai
282        Just (ai', _) = runDecoder (decodeAdditionalInfo w) ws
283     in ai == ai'
284
285
286data TokenHeader = TokenHeader MajorType AdditionalInformation
287  deriving (Show, Eq)
288
289instance Arbitrary TokenHeader where
290  arbitrary = TokenHeader <$> arbitrary <*> arbitrary
291
292decodeTokenHeader :: Decoder TokenHeader
293decodeTokenHeader = do
294    b <- getByte
295    let (mt, ai) = decodeInitialByte b
296    ai' <- decodeAdditionalInfo ai
297    return (TokenHeader mt ai')
298
299encodeTokenHeader :: Encoder TokenHeader
300encodeTokenHeader (TokenHeader mt ai) =
301    let (w, ws) = encodeAdditionalInfo ai
302     in encodeInitialByte mt w : ws
303
304prop_TokenHeader :: TokenHeader -> Bool
305prop_TokenHeader header =
306    let ws                = encodeTokenHeader header
307        Just (header', _) = runDecoder decodeTokenHeader ws
308     in header == header'
309
310prop_TokenHeader2 :: Bool
311prop_TokenHeader2 =
312    and [ w8 : extraused == encoded
313        | w8 <- [minBound..maxBound]
314        , let extra = [1..8]
315              Just (header, unused) = runDecoder decodeTokenHeader (w8 : extra)
316              encoded   = encodeTokenHeader header
317              extraused = take (8 - length unused) extra
318        ]
319
320data Simple = SimpleSmall Word  --  0 .. 23
321            | SimpleLarge Word8 --  0 .. 255, but  0..23 are non-canonical
322                                --            and 24..31 are reserved
323  deriving (Eq, Show)
324
325fromSimple :: Simple -> Word8
326fromSimple (SimpleSmall w) = fromIntegral w
327fromSimple (SimpleLarge w) = w
328
329toSimple :: Word8 -> Simple
330toSimple w | w <= 23   = SimpleSmall (fromIntegral w)
331           | otherwise = SimpleLarge w
332
333reservedSimple :: Word8 -> Bool
334reservedSimple w = w >= 24 && w <= 31
335
336unassignedSimple :: Word8 -> Bool
337unassignedSimple w = w < 20 || w > 31
338
339instance Arbitrary Simple where
340  arbitrary = oneof [ SimpleSmall <$> choose (0, 23)
341                    , SimpleLarge <$> choose (0, 31)
342                    , SimpleLarge <$> choose (32, 255)
343                    ]
344  shrink (SimpleSmall n) = [ SimpleSmall n' | n' <- shrink n ]
345  shrink (SimpleLarge n) = [ SimpleSmall (fromIntegral n')
346                           | n' <- shrink n, n' <= 23 ]
347                        ++ [ SimpleLarge n' | n' <- shrink n ]
348
349
350data Token =
351     MT0_UnsignedInt UInt
352   | MT1_NegativeInt UInt
353   | MT2_ByteString  UInt [Word8]
354   | MT2_ByteStringIndef
355   | MT3_String      UInt [Word8]
356   | MT3_StringIndef
357   | MT4_ArrayLen    UInt
358   | MT4_ArrayLenIndef
359   | MT5_MapLen      UInt
360   | MT5_MapLenIndef
361   | MT6_Tag     UInt
362   | MT7_Simple  Simple
363   | MT7_Float16 HalfSpecials
364   | MT7_Float32 FloatSpecials
365   | MT7_Float64 DoubleSpecials
366   | MT7_Break
367  deriving (Show, Eq)
368
369instance Arbitrary Token where
370  arbitrary =
371    oneof
372      [ MT0_UnsignedInt <$> arbitrary
373      , MT1_NegativeInt <$> arbitrary
374      , do ws <- arbitrary
375           MT2_ByteString <$> arbitraryLengthUInt ws <*> pure ws
376      , pure MT2_ByteStringIndef
377      , do cs <- arbitrary
378           let ws = encodeUTF8 cs
379           MT3_String <$> arbitraryLengthUInt ws <*> pure ws
380      , pure MT3_StringIndef
381      , MT4_ArrayLen <$> arbitrary
382      , pure MT4_ArrayLenIndef
383      , MT5_MapLen <$> arbitrary
384      , pure MT5_MapLenIndef
385      , MT6_Tag     <$> arbitrary
386      , MT7_Simple  <$> arbitrary
387      , MT7_Float16 <$> arbitrary
388      , MT7_Float32 <$> arbitrary
389      , MT7_Float64 <$> arbitrary
390      , pure MT7_Break
391      ]
392    where
393      arbitraryLengthUInt xs =
394        let n = length xs in
395        elements $
396             [ UIntSmall (fromIntegral n) | n < 24  ]
397          ++ [ UInt8     (fromIntegral n) | n < 255 ]
398          ++ [ UInt16    (fromIntegral n) | n < 65536 ]
399          ++ [ UInt32    (fromIntegral n)
400             , UInt64    (fromIntegral n) ]
401
402testDecode :: [Word8] -> Term
403testDecode ws =
404    case runDecoder decodeTerm ws of
405      Just (x, []) -> x
406      _            -> error "testDecode: parse error"
407
408decodeTokens :: Decoder [Token]
409decodeTokens = do
410    done <- eof
411    if done
412      then return []
413      else do tok  <- decodeToken
414              toks <- decodeTokens
415              return (tok:toks)
416
417decodeToken :: Decoder Token
418decodeToken = do
419    header <- decodeTokenHeader
420    extra  <- getBytes (tokenExtraLen header)
421    either fail return (packToken header extra)
422
423tokenExtraLen :: TokenHeader -> Word64
424tokenExtraLen (TokenHeader MajorType2 (AiValue n)) = fromUInt n  -- bytestrings
425tokenExtraLen (TokenHeader MajorType3 (AiValue n)) = fromUInt n  -- unicode strings
426tokenExtraLen _                                    = 0
427
428packToken :: TokenHeader -> [Word8] -> Either String Token
429packToken (TokenHeader mt ai) extra = case (mt, ai) of
430    -- Major type 0:  an unsigned integer.  The 5-bit additional information
431    -- is either the integer itself (for additional information values 0
432    -- through 23) or the length of additional data.
433    (MajorType0, AiValue n)  -> return (MT0_UnsignedInt n)
434
435    -- Major type 1:  a negative integer.  The encoding follows the rules
436    -- for unsigned integers (major type 0), except that the value is
437    -- then -1 minus the encoded unsigned integer.
438    (MajorType1, AiValue n)  -> return (MT1_NegativeInt n)
439
440    -- Major type 2:  a byte string.  The string's length in bytes is
441    -- represented following the rules for positive integers (major type 0).
442    (MajorType2, AiValue n)  -> return (MT2_ByteString n extra)
443    (MajorType2, AiIndefLen) -> return MT2_ByteStringIndef
444
445    -- Major type 3:  a text string, specifically a string of Unicode
446    -- characters that is encoded as UTF-8 [RFC3629].  The format of this
447    -- type is identical to that of byte strings (major type 2), that is,
448    -- as with major type 2, the length gives the number of bytes.
449    (MajorType3, AiValue n)  -> return (MT3_String n extra)
450    (MajorType3, AiIndefLen) -> return MT3_StringIndef
451
452    -- Major type 4:  an array of data items. The array's length follows the
453    -- rules for byte strings (major type 2), except that the length
454    -- denotes the number of data items, not the length in bytes that the
455    -- array takes up.
456    (MajorType4, AiValue n)  -> return (MT4_ArrayLen n)
457    (MajorType4, AiIndefLen) -> return  MT4_ArrayLenIndef
458
459    -- Major type 5:  a map of pairs of data items. A map is comprised of
460    -- pairs of data items, each pair consisting of a key that is
461    -- immediately followed by a value. The map's length follows the
462    -- rules for byte strings (major type 2), except that the length
463    -- denotes the number of pairs, not the length in bytes that the map
464    -- takes up.
465    (MajorType5, AiValue n)  -> return (MT5_MapLen n)
466    (MajorType5, AiIndefLen) -> return  MT5_MapLenIndef
467
468    -- Major type 6:  optional semantic tagging of other major types.
469    -- The initial bytes of the tag follow the rules for positive integers
470    -- (major type 0).
471    (MajorType6, AiValue n)  -> return (MT6_Tag n)
472
473    -- Major type 7 is for two types of data: floating-point numbers and
474    -- "simple values" that do not need any content.  Each value of the
475    -- 5-bit additional information in the initial byte has its own separate
476    -- meaning, as defined in Table 1.
477    --   | 0..23       | Simple value (value 0..23)                       |
478    --   | 24          | Simple value (value 32..255 in following byte)   |
479    --   | 25          | IEEE 754 Half-Precision Float (16 bits follow)   |
480    --   | 26          | IEEE 754 Single-Precision Float (32 bits follow) |
481    --   | 27          | IEEE 754 Double-Precision Float (64 bits follow) |
482    --   | 28-30       | (Unassigned)                                     |
483    --   | 31          | "break" stop code for indefinite-length items    |
484    (MajorType7, AiValue (UIntSmall w)) -> return (MT7_Simple (SimpleSmall w))
485    (MajorType7, AiValue (UInt8     w)) -> return (MT7_Simple (SimpleLarge w))
486    (MajorType7, AiValue (UInt16    w)) -> return (MT7_Float16 (HalfSpecials (wordToHalf w)))
487    (MajorType7, AiValue (UInt32    w)) -> return (MT7_Float32 (FloatSpecials (wordToFloat w)))
488    (MajorType7, AiValue (UInt64    w)) -> return (MT7_Float64 (DoubleSpecials (wordToDouble w)))
489    (MajorType7, AiIndefLen)            -> return (MT7_Break)
490    _                                   -> Left "invalid token header"
491
492
493encodeToken :: Encoder Token
494encodeToken tok =
495    let (header, extra) = unpackToken tok
496     in encodeTokenHeader header ++ extra
497
498
499unpackToken :: Token -> (TokenHeader, [Word8])
500unpackToken tok = (\(mt, ai, ws) -> (TokenHeader mt ai, ws)) $ case tok of
501    (MT0_UnsignedInt n)    -> (MajorType0, AiValue n,  [])
502    (MT1_NegativeInt n)    -> (MajorType1, AiValue n,  [])
503    (MT2_ByteString  n ws) -> (MajorType2, AiValue n,  ws)
504    MT2_ByteStringIndef    -> (MajorType2, AiIndefLen, [])
505    (MT3_String      n ws) -> (MajorType3, AiValue n,  ws)
506    MT3_StringIndef        -> (MajorType3, AiIndefLen, [])
507    (MT4_ArrayLen    n)    -> (MajorType4, AiValue n,  [])
508    MT4_ArrayLenIndef      -> (MajorType4, AiIndefLen, [])
509    (MT5_MapLen      n)    -> (MajorType5, AiValue n,  [])
510    MT5_MapLenIndef        -> (MajorType5, AiIndefLen, [])
511    (MT6_Tag     n)        -> (MajorType6, AiValue n,  [])
512    (MT7_Simple
513        (SimpleSmall n))   -> (MajorType7, AiValue (UIntSmall (fromIntegral n)), [])
514    (MT7_Simple
515        (SimpleLarge n))   -> (MajorType7, AiValue (UInt8  n), [])
516    (MT7_Float16
517        (HalfSpecials f))  -> (MajorType7, AiValue (UInt16 (halfToWord f)),   [])
518    (MT7_Float32
519        (FloatSpecials f)) -> (MajorType7, AiValue (UInt32 (floatToWord f)),  [])
520    (MT7_Float64
521        (DoubleSpecials f))-> (MajorType7, AiValue (UInt64 (doubleToWord f)), [])
522    MT7_Break              -> (MajorType7, AiIndefLen, [])
523
524
525fromUInt :: UInt -> Word64
526fromUInt (UIntSmall w) = fromIntegral w
527fromUInt (UInt8     w) = fromIntegral w
528fromUInt (UInt16    w) = fromIntegral w
529fromUInt (UInt32    w) = fromIntegral w
530fromUInt (UInt64    w) = fromIntegral w
531
532toUInt :: Word64 -> UInt
533toUInt n
534  | n < 24                                 = UIntSmall (fromIntegral n)
535  | n <= fromIntegral (maxBound :: Word8)  = UInt8     (fromIntegral n)
536  | n <= fromIntegral (maxBound :: Word16) = UInt16    (fromIntegral n)
537  | n <= fromIntegral (maxBound :: Word32) = UInt32    (fromIntegral n)
538  | otherwise                              = UInt64    n
539
540lengthUInt :: [a] -> UInt
541lengthUInt = toUInt . fromIntegral . length
542
543decodeUTF8 :: [Word8] -> Either String [Char]
544decodeUTF8 = either (Left . show) (return . T.unpack) . T.decodeUtf8' . BS.pack
545
546encodeUTF8 :: [Char] -> [Word8]
547encodeUTF8 = BS.unpack . T.encodeUtf8 . T.pack
548
549reservedTag :: Word64 -> Bool
550reservedTag w = w <= 5
551
552prop_Token :: Token -> Bool
553prop_Token token =
554    let ws = encodeToken token
555        Just (token', []) = runDecoder decodeToken ws
556     in token == token'
557
558data Term = TUInt   UInt
559          | TNInt   UInt
560          | TBigInt Integer
561          | TBytes    [Word8]
562          | TBytess  [[Word8]]
563          | TString   [Char]
564          | TStrings [[Char]]
565          | TArray  [Term]
566          | TArrayI [Term]
567          | TMap    [(Term, Term)]
568          | TMapI   [(Term, Term)]
569          | TTagged UInt Term
570          | TTrue
571          | TFalse
572          | TNull
573          | TUndef
574          | TSimple  Simple
575          | TFloat16 HalfSpecials
576          | TFloat32 FloatSpecials
577          | TFloat64 DoubleSpecials
578  deriving (Show, Eq)
579
580instance Arbitrary Term where
581  arbitrary =
582      frequency
583        [ (1, TUInt    <$> arbitrary)
584        , (1, TNInt    <$> arbitrary)
585        , (1, TBigInt . getLargeInteger <$> arbitrary)
586        , (1, TBytes   <$> arbitrary)
587        , (1, TBytess  <$> arbitrary)
588        , (1, TString  <$> arbitrary)
589        , (1, TStrings <$> arbitrary)
590        , (2, TArray   <$> listOfSmaller arbitrary)
591        , (2, TArrayI  <$> listOfSmaller arbitrary)
592        , (2, TMap     <$> listOfSmaller ((,) <$> arbitrary <*> arbitrary))
593        , (2, TMapI    <$> listOfSmaller ((,) <$> arbitrary <*> arbitrary))
594        , (1, TTagged  <$> arbitraryTag <*> sized (\sz -> resize (max 0 (sz-1)) arbitrary))
595        , (1, pure TFalse)
596        , (1, pure TTrue)
597        , (1, pure TNull)
598        , (1, pure TUndef)
599        , (1, TSimple  <$> arbitrary `suchThat` (unassignedSimple . fromSimple))
600        , (1, TFloat16 <$> arbitrary)
601        , (1, TFloat32 <$> arbitrary)
602        , (1, TFloat64 <$> arbitrary)
603        ]
604    where
605      listOfSmaller :: Gen a -> Gen [a]
606      listOfSmaller gen =
607        sized $ \n -> do
608          k <- choose (0,n)
609          vectorOf k (resize (n `div` (k+1)) gen)
610
611      arbitraryTag = arbitrary `suchThat` (not . reservedTag . fromUInt)
612
613  shrink (TUInt   n)    = [ TUInt    n'   | n' <- shrink n ]
614  shrink (TNInt   n)    = [ TNInt    n'   | n' <- shrink n ]
615  shrink (TBigInt n)    = [ TBigInt  n'   | n' <- shrink n ]
616
617  shrink (TBytes  ws)   = [ TBytes   ws'  | ws'  <- shrink ws  ]
618  shrink (TBytess wss)  = [ TBytess  wss' | wss' <- shrink wss ]
619  shrink (TString  ws)  = [ TString  ws'  | ws'  <- shrink ws  ]
620  shrink (TStrings wss) = [ TStrings wss' | wss' <- shrink wss ]
621
622  shrink (TArray  xs@[x]) = x : [ TArray  xs' | xs' <- shrink xs ]
623  shrink (TArray  xs)     =     [ TArray  xs' | xs' <- shrink xs ]
624  shrink (TArrayI xs@[x]) = x : [ TArrayI xs' | xs' <- shrink xs ]
625  shrink (TArrayI xs)     =     [ TArrayI xs' | xs' <- shrink xs ]
626
627  shrink (TMap  xys@[(x,y)]) = x : y : [ TMap  xys' | xys' <- shrink xys ]
628  shrink (TMap  xys)         =         [ TMap  xys' | xys' <- shrink xys ]
629  shrink (TMapI xys@[(x,y)]) = x : y : [ TMapI xys' | xys' <- shrink xys ]
630  shrink (TMapI xys)         =         [ TMapI xys' | xys' <- shrink xys ]
631
632  shrink (TTagged w t) = [ TTagged w' t' | (w', t') <- shrink (w, t)
633                                         , not (reservedTag (fromUInt w')) ]
634
635  shrink TFalse = []
636  shrink TTrue  = []
637  shrink TNull  = []
638  shrink TUndef = []
639
640  shrink (TSimple  n) = [ TSimple  n' | n' <- shrink n
641                                      , unassignedSimple (fromSimple n') ]
642  shrink (TFloat16 f) = [ TFloat16 f' | f' <- shrink f ]
643  shrink (TFloat32 f) = [ TFloat32 f' | f' <- shrink f ]
644  shrink (TFloat64 f) = [ TFloat64 f' | f' <- shrink f ]
645
646
647decodeTerm :: Decoder Term
648decodeTerm = decodeToken >>= decodeTermFrom
649
650decodeTermFrom :: Token -> Decoder Term
651decodeTermFrom tk =
652    case tk of
653      MT0_UnsignedInt n  -> return (TUInt n)
654      MT1_NegativeInt n  -> return (TNInt n)
655
656      MT2_ByteString _ bs -> return (TBytes bs)
657      MT2_ByteStringIndef -> decodeBytess []
658
659      MT3_String _ ws    -> either fail (return . TString) (decodeUTF8 ws)
660      MT3_StringIndef    -> decodeStrings []
661
662      MT4_ArrayLen len   -> decodeArrayN (fromUInt len) []
663      MT4_ArrayLenIndef  -> decodeArray []
664
665      MT5_MapLen  len    -> decodeMapN (fromUInt len) []
666      MT5_MapLenIndef    -> decodeMap  []
667
668      MT6_Tag     tag    -> decodeTagged tag
669
670      MT7_Simple  n
671        | n' == 20       -> return TFalse
672        | n' == 21       -> return TTrue
673        | n' == 22       -> return TNull
674        | n' == 23       -> return TUndef
675        | otherwise      -> return (TSimple n)
676        where
677          n' = fromSimple n
678      MT7_Float16 f      -> return (TFloat16 f)
679      MT7_Float32 f      -> return (TFloat32 f)
680      MT7_Float64 f      -> return (TFloat64 f)
681      MT7_Break          -> fail "unexpected"
682
683
684decodeBytess :: [[Word8]] -> Decoder Term
685decodeBytess acc = do
686    tk <- decodeToken
687    case tk of
688      MT7_Break            -> return $! TBytess (reverse acc)
689      MT2_ByteString _ bs  -> decodeBytess (bs : acc)
690      _                    -> fail "unexpected"
691
692decodeStrings :: [String] -> Decoder Term
693decodeStrings acc = do
694    tk <- decodeToken
695    case tk of
696      MT7_Break        -> return $! TStrings (reverse acc)
697      MT3_String _ ws  -> do cs <- either fail return (decodeUTF8 ws)
698                             decodeStrings (cs : acc)
699      _                -> fail "unexpected"
700
701decodeArrayN :: Word64 -> [Term] -> Decoder Term
702decodeArrayN n acc =
703    case n of
704      0 -> return $! TArray (reverse acc)
705      _ -> do t <- decodeTerm
706              decodeArrayN (n-1) (t : acc)
707
708decodeArray :: [Term] -> Decoder Term
709decodeArray acc = do
710    tk <- decodeToken
711    case tk of
712      MT7_Break -> return $! TArrayI (reverse acc)
713      _         -> do
714        tm <- decodeTermFrom tk
715        decodeArray (tm : acc)
716
717decodeMapN :: Word64 -> [(Term, Term)] -> Decoder Term
718decodeMapN n acc =
719    case n of
720      0 -> return $! TMap (reverse acc)
721      _ -> do
722        tm   <- decodeTerm
723        tm'  <- decodeTerm
724        decodeMapN (n-1) ((tm, tm') : acc)
725
726decodeMap :: [(Term, Term)] -> Decoder Term
727decodeMap acc = do
728    tk <- decodeToken
729    case tk of
730      MT7_Break -> return $! TMapI (reverse acc)
731      _         -> do
732        tm  <- decodeTermFrom tk
733        tm' <- decodeTerm
734        decodeMap ((tm, tm') : acc)
735
736decodeTagged :: UInt -> Decoder Term
737decodeTagged tag | fromUInt tag == 2 = do
738    MT2_ByteString _ bs <- decodeToken
739    let !n = integerFromBytes bs
740    return (TBigInt n)
741decodeTagged tag | fromUInt tag == 3 = do
742    MT2_ByteString _ bs <- decodeToken
743    let !n = integerFromBytes bs
744    return (TBigInt (-1 - n))
745decodeTagged tag = do
746    tm <- decodeTerm
747    return (TTagged tag tm)
748
749integerFromBytes :: [Word8] -> Integer
750integerFromBytes []       = 0
751integerFromBytes (w0:ws0) = go (fromIntegral w0) ws0
752  where
753    go !acc []     = acc
754    go !acc (w:ws) = go (acc `shiftL` 8 + fromIntegral w) ws
755
756integerToBytes :: Integer -> [Word8]
757integerToBytes n0
758  | n0 == 0   = [0]
759  | n0 < 0    = reverse (go (-n0))
760  | otherwise = reverse (go n0)
761  where
762    go n | n == 0    = []
763         | otherwise = narrow n : go (n `shiftR` 8)
764
765    narrow :: Integer -> Word8
766    narrow = fromIntegral
767
768prop_integerToFromBytes :: LargeInteger -> Bool
769prop_integerToFromBytes (LargeInteger n)
770  | n >= 0 =
771    let ws = integerToBytes n
772        n' = integerFromBytes ws
773     in n == n'
774  | otherwise =
775    let ws = integerToBytes n
776        n' = integerFromBytes ws
777     in n == -n'
778
779-------------------------------------------------------------------------------
780
781encodeTerm :: Encoder Term
782encodeTerm (TUInt n)       = encodeToken (MT0_UnsignedInt n)
783encodeTerm (TNInt n)       = encodeToken (MT1_NegativeInt n)
784encodeTerm (TBigInt n)
785               | n >= 0    = encodeToken (MT6_Tag (UIntSmall 2))
786                          <> let ws  = integerToBytes n
787                                 len = lengthUInt ws in
788                             encodeToken (MT2_ByteString len ws)
789               | otherwise = encodeToken (MT6_Tag (UIntSmall 3))
790                          <> let ws  = integerToBytes (-1 - n)
791                                 len = lengthUInt ws in
792                             encodeToken (MT2_ByteString len ws)
793encodeTerm (TBytes ws)     = let len = lengthUInt ws in
794                             encodeToken (MT2_ByteString len ws)
795encodeTerm (TBytess wss)   = encodeToken MT2_ByteStringIndef
796                          <> mconcat [ encodeToken (MT2_ByteString len ws)
797                                     | ws <- wss
798                                     , let len = lengthUInt ws ]
799                          <> encodeToken MT7_Break
800encodeTerm (TString  cs)   = let ws  = encodeUTF8 cs
801                                 len = lengthUInt ws in
802                             encodeToken (MT3_String len ws)
803encodeTerm (TStrings css)  = encodeToken MT3_StringIndef
804                          <> mconcat [ encodeToken (MT3_String len ws)
805                                     | cs <- css
806                                     , let ws  = encodeUTF8 cs
807                                           len = lengthUInt ws ]
808                          <> encodeToken MT7_Break
809encodeTerm (TArray  ts)    = let len = lengthUInt ts in
810                             encodeToken (MT4_ArrayLen len)
811                          <> mconcat (map encodeTerm ts)
812encodeTerm (TArrayI ts)    = encodeToken MT4_ArrayLenIndef
813                          <> mconcat (map encodeTerm ts)
814                          <> encodeToken MT7_Break
815encodeTerm (TMap    kvs)   = let len = lengthUInt kvs in
816                             encodeToken (MT5_MapLen len)
817                          <> mconcat [ encodeTerm k <> encodeTerm v
818                                     | (k,v) <- kvs ]
819encodeTerm (TMapI   kvs)   = encodeToken MT5_MapLenIndef
820                          <> mconcat [ encodeTerm k <> encodeTerm v
821                                     | (k,v) <- kvs ]
822                          <> encodeToken MT7_Break
823encodeTerm (TTagged tag t) = encodeToken (MT6_Tag tag)
824                          <> encodeTerm t
825encodeTerm  TFalse         = encodeToken (MT7_Simple (SimpleSmall 20))
826encodeTerm  TTrue          = encodeToken (MT7_Simple (SimpleSmall 21))
827encodeTerm  TNull          = encodeToken (MT7_Simple (SimpleSmall 22))
828encodeTerm  TUndef         = encodeToken (MT7_Simple (SimpleSmall 23))
829encodeTerm (TSimple  w)    = encodeToken (MT7_Simple w)
830encodeTerm (TFloat16 f)    = encodeToken (MT7_Float16 f)
831encodeTerm (TFloat32 f)    = encodeToken (MT7_Float32 f)
832encodeTerm (TFloat64 f)    = encodeToken (MT7_Float64 f)
833
834
835-------------------------------------------------------------------------------
836
837prop_Term :: Term -> Bool
838prop_Term term =
839    let ws = encodeTerm term
840        Just (term', []) = runDecoder decodeTerm ws
841     in term == term'
842
843isCanonicalTerm :: Term -> Bool
844isCanonicalTerm t = canonicaliseTerm t == t
845
846canonicaliseTerm :: Term -> Term
847canonicaliseTerm (TUInt n) = TUInt (canonicaliseUInt n)
848canonicaliseTerm (TNInt n) = TNInt (canonicaliseUInt n)
849canonicaliseTerm (TBigInt n)
850  | n >= 0 && n <= fromIntegral (maxBound :: Word64)
851                           = TUInt (toUInt (fromIntegral n))
852  | n <  0 && n >= -1 - fromIntegral (maxBound :: Word64)
853                           = TNInt (toUInt (fromIntegral (-1 - n)))
854  | otherwise              = TBigInt n
855canonicaliseTerm (TSimple  n)   = TSimple  (canonicaliseSimple n)
856canonicaliseTerm (TFloat16 f)   = canonicaliseFloat TFloat16 f
857canonicaliseTerm (TFloat32 f)   = canonicaliseFloat TFloat32 f
858canonicaliseTerm (TFloat64 f)   = canonicaliseFloat TFloat64 f
859canonicaliseTerm (TBytess  wss) = TBytess  (filter (not . null) wss)
860canonicaliseTerm (TStrings css) = TStrings (filter (not . null) css)
861canonicaliseTerm (TArray  ts) = TArray  (map canonicaliseTerm ts)
862canonicaliseTerm (TArrayI ts) = TArrayI (map canonicaliseTerm ts)
863canonicaliseTerm (TMap    ts) = TMap    (map canonicaliseTermPair ts)
864canonicaliseTerm (TMapI   ts) = TMapI   (map canonicaliseTermPair ts)
865canonicaliseTerm (TTagged tag t) = TTagged (canonicaliseUInt tag) (canonicaliseTerm t)
866canonicaliseTerm t = t
867
868canonicaliseUInt :: UInt -> UInt
869canonicaliseUInt = toUInt . fromUInt
870
871canonicaliseSimple :: Simple -> Simple
872canonicaliseSimple = toSimple . fromSimple
873
874canonicaliseFloat :: RealFloat t => (t -> Term) -> t -> Term
875canonicaliseFloat tfloatNN f
876  | isNaN f   = TFloat16 canonicalNaN
877  | otherwise = tfloatNN f
878
879canonicaliseTermPair :: (Term, Term) -> (Term, Term)
880canonicaliseTermPair (x,y) = (canonicaliseTerm x, canonicaliseTerm y)
881
882
883-------------------------------------------------------------------------------
884
885diagnosticNotation :: Term -> String
886diagnosticNotation = \t -> showsTerm t ""
887  where
888    showsTerm tm = case tm of
889      TUInt    n     -> shows (fromUInt n)
890      TNInt    n     -> shows (-1 - fromIntegral (fromUInt n) :: Integer)
891      TBigInt  n     -> shows n
892      TBytes   bs    -> showsBytes bs
893      TBytess  bss   -> surround '(' ')' (underscoreSpace . commaSep showsBytes bss)
894      TString  cs    -> shows cs
895      TStrings css   -> surround '(' ')' (underscoreSpace . commaSep shows css)
896      TArray   ts    -> surround '[' ']' (commaSep showsTerm ts)
897      TArrayI  ts    -> surround '[' ']' (underscoreSpace . commaSep showsTerm ts)
898      TMap     ts    -> surround '{' '}' (commaSep showsMapElem ts)
899      TMapI    ts    -> surround '{' '}' (underscoreSpace . commaSep showsMapElem ts)
900      TTagged  tag t -> shows (fromUInt tag) . surround '(' ')' (showsTerm t)
901      TTrue          -> showString "true"
902      TFalse         -> showString "false"
903      TNull          -> showString "null"
904      TUndef         -> showString "undefined"
905      TSimple  n     -> showString "simple" . surround '(' ')' (shows (fromSimple n))
906      -- convert to float to work around https://github.com/ekmett/half/issues/2
907      TFloat16 f     -> showFloatCompat (float2Double (Half.fromHalf (getHalfSpecials f)))
908      TFloat32 f     -> showFloatCompat (float2Double (getFloatSpecials f))
909      TFloat64 f     -> showFloatCompat (getDoubleSpecials f)
910
911    surround a b x = showChar a . x . showChar b
912
913    commaSpace = showChar ',' . showChar ' '
914    underscoreSpace = showChar '_' . showChar ' '
915
916    showsMapElem (k,v) = showsTerm k . showChar ':' . showChar ' ' . showsTerm v
917
918    catShows :: (a -> ShowS) -> [a] -> ShowS
919    catShows f xs = \s -> foldr (\x r -> f x . r) id xs s
920
921    sepShows :: ShowS -> (a -> ShowS) -> [a] -> ShowS
922    sepShows sep f xs = foldr (.) id (intersperse sep (map f xs))
923
924    commaSep = sepShows commaSpace
925
926    showsBytes :: [Word8] -> ShowS
927    showsBytes bs = showChar 'h' . showChar '\''
928                                 . catShows showFHex bs
929                                 . showChar '\''
930
931    showFHex n | n < 16    = showChar '0' . showHex n
932               | otherwise = showHex n
933
934    showFloatCompat n
935      | exponent' >= -5 && exponent' <= 15 = showFFloat Nothing n
936      | otherwise                          = showEFloat Nothing n
937      where exponent' = snd (floatToDigits 10 n)
938
939
940word16FromNet :: Word8 -> Word8 -> Word16
941word16FromNet w1 w0 =
942      fromIntegral w1 `shiftL` (8*1)
943  .|. fromIntegral w0 `shiftL` (8*0)
944
945word16ToNet :: Word16 -> (Word8, Word8)
946word16ToNet w =
947    ( fromIntegral ((w `shiftR` (8*1)) .&. 0xff)
948    , fromIntegral ((w `shiftR` (8*0)) .&. 0xff)
949    )
950
951word32FromNet :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
952word32FromNet w3 w2 w1 w0 =
953      fromIntegral w3 `shiftL` (8*3)
954  .|. fromIntegral w2 `shiftL` (8*2)
955  .|. fromIntegral w1 `shiftL` (8*1)
956  .|. fromIntegral w0 `shiftL` (8*0)
957
958word32ToNet :: Word32 -> (Word8, Word8, Word8, Word8)
959word32ToNet w =
960    ( fromIntegral ((w `shiftR` (8*3)) .&. 0xff)
961    , fromIntegral ((w `shiftR` (8*2)) .&. 0xff)
962    , fromIntegral ((w `shiftR` (8*1)) .&. 0xff)
963    , fromIntegral ((w `shiftR` (8*0)) .&. 0xff)
964    )
965
966word64FromNet :: Word8 -> Word8 -> Word8 -> Word8 ->
967                 Word8 -> Word8 -> Word8 -> Word8 -> Word64
968word64FromNet w7 w6 w5 w4 w3 w2 w1 w0 =
969      fromIntegral w7 `shiftL` (8*7)
970  .|. fromIntegral w6 `shiftL` (8*6)
971  .|. fromIntegral w5 `shiftL` (8*5)
972  .|. fromIntegral w4 `shiftL` (8*4)
973  .|. fromIntegral w3 `shiftL` (8*3)
974  .|. fromIntegral w2 `shiftL` (8*2)
975  .|. fromIntegral w1 `shiftL` (8*1)
976  .|. fromIntegral w0 `shiftL` (8*0)
977
978word64ToNet :: Word64 -> (Word8, Word8, Word8, Word8,
979                          Word8, Word8, Word8, Word8)
980word64ToNet w =
981    ( fromIntegral ((w `shiftR` (8*7)) .&. 0xff)
982    , fromIntegral ((w `shiftR` (8*6)) .&. 0xff)
983    , fromIntegral ((w `shiftR` (8*5)) .&. 0xff)
984    , fromIntegral ((w `shiftR` (8*4)) .&. 0xff)
985    , fromIntegral ((w `shiftR` (8*3)) .&. 0xff)
986    , fromIntegral ((w `shiftR` (8*2)) .&. 0xff)
987    , fromIntegral ((w `shiftR` (8*1)) .&. 0xff)
988    , fromIntegral ((w `shiftR` (8*0)) .&. 0xff)
989    )
990
991prop_word16ToFromNet :: Word8 -> Word8 -> Bool
992prop_word16ToFromNet w1 w0 =
993    word16ToNet (word16FromNet w1 w0) == (w1, w0)
994
995prop_word32ToFromNet :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
996prop_word32ToFromNet w3 w2 w1 w0 =
997    word32ToNet (word32FromNet w3 w2 w1 w0) == (w3, w2, w1, w0)
998
999prop_word64ToFromNet :: Word8 -> Word8 -> Word8 -> Word8 ->
1000                        Word8 -> Word8 -> Word8 -> Word8 -> Bool
1001prop_word64ToFromNet w7 w6 w5 w4 w3 w2 w1 w0 =
1002    word64ToNet (word64FromNet w7 w6 w5 w4 w3 w2 w1 w0)
1003 == (w7, w6, w5, w4, w3, w2, w1, w0)
1004
1005-- Note: some NaNs do not roundtrip https://github.com/ekmett/half/issues/3
1006-- but all the others had better
1007prop_halfToFromFloat :: Bool
1008prop_halfToFromFloat =
1009    all (\w -> roundTrip w || isNaN (Half.Half w)) [minBound..maxBound]
1010  where
1011    roundTrip w =
1012      w == (Half.getHalf . Half.toHalf . Half.fromHalf . Half.Half $ w)
1013
1014