1{-# LANGUAGE CPP, BangPatterns #-} 2----------------------------------------------------------------------------- 3-- | 4-- Module : Codec.CBOR 5-- Copyright : 2013 Simon Meier <iridcode@gmail.com>, 6-- 2013-2014 Duncan Coutts, 7-- License : BSD3-style (see LICENSE.txt) 8-- 9-- Maintainer : Duncan Coutts 10-- Stability : 11-- Portability : portable 12-- 13-- CBOR format support. 14-- 15----------------------------------------------------------------------------- 16 17module Tests.Reference.Implementation ( 18 serialise, 19 deserialise, 20 21 Term(..), 22 Token(..), 23 canonicaliseTerm, 24 isCanonicalTerm, 25 26 UInt(..), 27 fromUInt, 28 toUInt, 29 canonicaliseUInt, 30 31 Simple(..), 32 fromSimple, 33 toSimple, 34 reservedSimple, 35 unassignedSimple, 36 reservedTag, 37 38 Decoder, 39 runDecoder, 40 testDecode, 41 42 decodeTerm, 43 decodeTokens, 44 decodeToken, 45 decodeTagged, 46 47 diagnosticNotation, 48 49 Encoder, 50 encodeTerm, 51 encodeToken, 52 53 prop_InitialByte, 54 prop_AdditionalInfo, 55 prop_TokenHeader, 56 prop_TokenHeader2, 57 prop_Token, 58 prop_Term, 59 60 -- properties of internal helpers 61 prop_integerToFromBytes, 62 prop_word16ToFromNet, 63 prop_word32ToFromNet, 64 prop_word64ToFromNet, 65 prop_halfToFromFloat, 66 ) where 67 68 69import qualified Control.Monad.Fail as Fail 70import Data.Bits 71import Data.Word 72import qualified Numeric.Half as Half 73import Data.List 74import Numeric 75import GHC.Float (float2Double) 76import qualified Data.ByteString as BS 77import qualified Data.ByteString.Lazy as LBS 78import qualified Data.Text as T 79import qualified Data.Text.Encoding as T 80import Data.Monoid ((<>)) 81import Control.Monad (ap) 82 83import Test.QuickCheck.Arbitrary 84import Test.QuickCheck.Gen 85 86#if !MIN_VERSION_base(4,8,0) 87import Data.Monoid (Monoid(..)) 88import Control.Applicative 89#endif 90 91import Tests.Reference.Generators 92 93 94serialise :: Term -> LBS.ByteString 95serialise = LBS.pack . encodeTerm 96 97deserialise :: LBS.ByteString -> Term 98deserialise bytes = 99 case runDecoder decodeTerm (LBS.unpack bytes) of 100 Just (term, []) -> term 101 Just _ -> error "ReferenceImpl.deserialise: trailing data" 102 Nothing -> error "ReferenceImpl.deserialise: decoding failed" 103 104 105------------------------------------------------------------------------ 106 107newtype Decoder a = Decoder { runDecoder :: [Word8] -> Maybe (a, [Word8]) } 108 109instance Functor Decoder where 110 fmap f a = a >>= return . f 111 112instance Applicative Decoder where 113 pure = return 114 (<*>) = ap 115 116instance Monad Decoder where 117 return x = Decoder (\ws -> Just (x, ws)) 118 d >>= f = Decoder (\ws -> case runDecoder d ws of 119 Nothing -> Nothing 120 Just (x, ws') -> runDecoder (f x) ws') 121#if !MIN_VERSION_base(4,13,0) 122 fail = Fail.fail 123#endif 124 125instance Fail.MonadFail Decoder where 126 fail _ = Decoder (\_ -> Nothing) 127 128getByte :: Decoder Word8 129getByte = 130 Decoder $ \ws -> 131 case ws of 132 w:ws' -> Just (w, ws') 133 _ -> Nothing 134 135getBytes :: Integral n => n -> Decoder [Word8] 136getBytes n = 137 Decoder $ \ws -> 138 case genericSplitAt n ws of 139 (ws', []) | genericLength ws' == n -> Just (ws', []) 140 | otherwise -> Nothing 141 (ws', ws'') -> Just (ws', ws'') 142 143eof :: Decoder Bool 144eof = Decoder $ \ws -> Just (null ws, ws) 145 146type Encoder a = a -> [Word8] 147 148-- The initial byte of each data item contains both information about 149-- the major type (the high-order 3 bits, described in Section 2.1) and 150-- additional information (the low-order 5 bits). 151 152data MajorType = MajorType0 | MajorType1 | MajorType2 | MajorType3 153 | MajorType4 | MajorType5 | MajorType6 | MajorType7 154 deriving (Show, Eq, Ord, Enum) 155 156instance Arbitrary MajorType where 157 arbitrary = elements [MajorType0 .. MajorType7] 158 159encodeInitialByte :: MajorType -> Word -> Word8 160encodeInitialByte mt ai 161 | ai < 2^(5 :: Int) 162 = fromIntegral (fromIntegral (fromEnum mt) `shiftL` 5 .|. ai) 163 164 | otherwise 165 = error "encodeInitialByte: invalid additional info value" 166 167decodeInitialByte :: Word8 -> (MajorType, Word) 168decodeInitialByte ib = ( toEnum $ fromIntegral $ ib `shiftR` 5 169 , fromIntegral $ ib .&. 0x1f) 170 171prop_InitialByte :: Bool 172prop_InitialByte = 173 and [ (uncurry encodeInitialByte . decodeInitialByte) w8 == w8 174 | w8 <- [minBound..maxBound] ] 175 176-- When the value of the 177-- additional information is less than 24, it is directly used as a 178-- small unsigned integer. When it is 24 to 27, the additional bytes 179-- for a variable-length integer immediately follow; the values 24 to 27 180-- of the additional information specify that its length is a 1-, 2-, 181-- 4-, or 8-byte unsigned integer, respectively. Additional information 182-- value 31 is used for indefinite-length items, described in 183-- Section 2.2. Additional information values 28 to 30 are reserved for 184-- future expansion. 185-- 186-- In all additional information values, the resulting integer is 187-- interpreted depending on the major type. It may represent the actual 188-- data: for example, in integer types, the resulting integer is used 189-- for the value itself. It may instead supply length information: for 190-- example, in byte strings it gives the length of the byte string data 191-- that follows. 192 193data UInt = 194 UIntSmall Word 195 | UInt8 Word8 196 | UInt16 Word16 197 | UInt32 Word32 198 | UInt64 Word64 199 deriving (Eq, Show) 200 201data AdditionalInformation = 202 AiValue UInt 203 | AiIndefLen 204 | AiReserved Word 205 deriving (Eq, Show) 206 207instance Arbitrary UInt where 208 arbitrary = 209 sized $ \n -> 210 oneof $ take (1 + n `div` 2) 211 [ UIntSmall <$> choose (0, 23) 212 , UInt8 <$> arbitraryBoundedIntegral 213 , UInt16 <$> arbitraryBoundedIntegral 214 , UInt32 <$> arbitraryBoundedIntegral 215 , UInt64 <$> arbitraryBoundedIntegral 216 ] 217 shrink (UIntSmall n) = [ UIntSmall n' | n' <- shrink n ] 218 shrink (UInt8 n) = [ UInt8 n' | n' <- shrink n ] 219 ++ [ UIntSmall (fromIntegral n) | n <= 23 ] 220 shrink (UInt16 n) = [ UInt16 n' | n' <- shrink n ] 221 ++ [ UInt8 (fromIntegral n) 222 | n <= fromIntegral (maxBound :: Word8) ] 223 shrink (UInt32 n) = [ UInt32 n' | n' <- shrink n ] 224 ++ [ UInt16 (fromIntegral n) 225 | n <= fromIntegral (maxBound :: Word16) ] 226 shrink (UInt64 n) = [ UInt64 n' | n' <- shrink n ] 227 ++ [ UInt32 (fromIntegral n) 228 | n <= fromIntegral (maxBound :: Word32) ] 229 230instance Arbitrary AdditionalInformation where 231 arbitrary = 232 frequency 233 [ (7, AiValue <$> arbitrary) 234 , (2, pure AiIndefLen) 235 , (1, AiReserved <$> choose (28, 30)) 236 ] 237 238decodeAdditionalInfo :: Word -> Decoder AdditionalInformation 239decodeAdditionalInfo = dec 240 where 241 dec n 242 | n < 24 = return (AiValue (UIntSmall n)) 243 dec 24 = do w <- getByte 244 return (AiValue (UInt8 w)) 245 dec 25 = do [w1,w0] <- getBytes (2 :: Int) 246 let w = word16FromNet w1 w0 247 return (AiValue (UInt16 w)) 248 dec 26 = do [w3,w2,w1,w0] <- getBytes (4 :: Int) 249 let w = word32FromNet w3 w2 w1 w0 250 return (AiValue (UInt32 w)) 251 dec 27 = do [w7,w6,w5,w4,w3,w2,w1,w0] <- getBytes (8 :: Int) 252 let w = word64FromNet w7 w6 w5 w4 w3 w2 w1 w0 253 return (AiValue (UInt64 w)) 254 dec 31 = return AiIndefLen 255 dec n 256 | n < 31 = return (AiReserved n) 257 dec _ = fail "" 258 259encodeAdditionalInfo :: AdditionalInformation -> (Word, [Word8]) 260encodeAdditionalInfo = enc 261 where 262 enc (AiValue (UIntSmall n)) 263 | n < 24 = (n, []) 264 | otherwise = error "invalid UIntSmall value" 265 enc (AiValue (UInt8 w)) = (24, [w]) 266 enc (AiValue (UInt16 w)) = (25, [w1, w0]) 267 where (w1, w0) = word16ToNet w 268 enc (AiValue (UInt32 w)) = (26, [w3, w2, w1, w0]) 269 where (w3, w2, w1, w0) = word32ToNet w 270 enc (AiValue (UInt64 w)) = (27, [w7, w6, w5, w4, 271 w3, w2, w1, w0]) 272 where (w7, w6, w5, w4, 273 w3, w2, w1, w0) = word64ToNet w 274 enc AiIndefLen = (31, []) 275 enc (AiReserved n) 276 | n >= 28 && n < 31 = (n, []) 277 | otherwise = error "invalid AiReserved value" 278 279prop_AdditionalInfo :: AdditionalInformation -> Bool 280prop_AdditionalInfo ai = 281 let (w, ws) = encodeAdditionalInfo ai 282 Just (ai', _) = runDecoder (decodeAdditionalInfo w) ws 283 in ai == ai' 284 285 286data TokenHeader = TokenHeader MajorType AdditionalInformation 287 deriving (Show, Eq) 288 289instance Arbitrary TokenHeader where 290 arbitrary = TokenHeader <$> arbitrary <*> arbitrary 291 292decodeTokenHeader :: Decoder TokenHeader 293decodeTokenHeader = do 294 b <- getByte 295 let (mt, ai) = decodeInitialByte b 296 ai' <- decodeAdditionalInfo ai 297 return (TokenHeader mt ai') 298 299encodeTokenHeader :: Encoder TokenHeader 300encodeTokenHeader (TokenHeader mt ai) = 301 let (w, ws) = encodeAdditionalInfo ai 302 in encodeInitialByte mt w : ws 303 304prop_TokenHeader :: TokenHeader -> Bool 305prop_TokenHeader header = 306 let ws = encodeTokenHeader header 307 Just (header', _) = runDecoder decodeTokenHeader ws 308 in header == header' 309 310prop_TokenHeader2 :: Bool 311prop_TokenHeader2 = 312 and [ w8 : extraused == encoded 313 | w8 <- [minBound..maxBound] 314 , let extra = [1..8] 315 Just (header, unused) = runDecoder decodeTokenHeader (w8 : extra) 316 encoded = encodeTokenHeader header 317 extraused = take (8 - length unused) extra 318 ] 319 320data Simple = SimpleSmall Word -- 0 .. 23 321 | SimpleLarge Word8 -- 0 .. 255, but 0..23 are non-canonical 322 -- and 24..31 are reserved 323 deriving (Eq, Show) 324 325fromSimple :: Simple -> Word8 326fromSimple (SimpleSmall w) = fromIntegral w 327fromSimple (SimpleLarge w) = w 328 329toSimple :: Word8 -> Simple 330toSimple w | w <= 23 = SimpleSmall (fromIntegral w) 331 | otherwise = SimpleLarge w 332 333reservedSimple :: Word8 -> Bool 334reservedSimple w = w >= 24 && w <= 31 335 336unassignedSimple :: Word8 -> Bool 337unassignedSimple w = w < 20 || w > 31 338 339instance Arbitrary Simple where 340 arbitrary = oneof [ SimpleSmall <$> choose (0, 23) 341 , SimpleLarge <$> choose (0, 31) 342 , SimpleLarge <$> choose (32, 255) 343 ] 344 shrink (SimpleSmall n) = [ SimpleSmall n' | n' <- shrink n ] 345 shrink (SimpleLarge n) = [ SimpleSmall (fromIntegral n') 346 | n' <- shrink n, n' <= 23 ] 347 ++ [ SimpleLarge n' | n' <- shrink n ] 348 349 350data Token = 351 MT0_UnsignedInt UInt 352 | MT1_NegativeInt UInt 353 | MT2_ByteString UInt [Word8] 354 | MT2_ByteStringIndef 355 | MT3_String UInt [Word8] 356 | MT3_StringIndef 357 | MT4_ArrayLen UInt 358 | MT4_ArrayLenIndef 359 | MT5_MapLen UInt 360 | MT5_MapLenIndef 361 | MT6_Tag UInt 362 | MT7_Simple Simple 363 | MT7_Float16 HalfSpecials 364 | MT7_Float32 FloatSpecials 365 | MT7_Float64 DoubleSpecials 366 | MT7_Break 367 deriving (Show, Eq) 368 369instance Arbitrary Token where 370 arbitrary = 371 oneof 372 [ MT0_UnsignedInt <$> arbitrary 373 , MT1_NegativeInt <$> arbitrary 374 , do ws <- arbitrary 375 MT2_ByteString <$> arbitraryLengthUInt ws <*> pure ws 376 , pure MT2_ByteStringIndef 377 , do cs <- arbitrary 378 let ws = encodeUTF8 cs 379 MT3_String <$> arbitraryLengthUInt ws <*> pure ws 380 , pure MT3_StringIndef 381 , MT4_ArrayLen <$> arbitrary 382 , pure MT4_ArrayLenIndef 383 , MT5_MapLen <$> arbitrary 384 , pure MT5_MapLenIndef 385 , MT6_Tag <$> arbitrary 386 , MT7_Simple <$> arbitrary 387 , MT7_Float16 <$> arbitrary 388 , MT7_Float32 <$> arbitrary 389 , MT7_Float64 <$> arbitrary 390 , pure MT7_Break 391 ] 392 where 393 arbitraryLengthUInt xs = 394 let n = length xs in 395 elements $ 396 [ UIntSmall (fromIntegral n) | n < 24 ] 397 ++ [ UInt8 (fromIntegral n) | n < 255 ] 398 ++ [ UInt16 (fromIntegral n) | n < 65536 ] 399 ++ [ UInt32 (fromIntegral n) 400 , UInt64 (fromIntegral n) ] 401 402testDecode :: [Word8] -> Term 403testDecode ws = 404 case runDecoder decodeTerm ws of 405 Just (x, []) -> x 406 _ -> error "testDecode: parse error" 407 408decodeTokens :: Decoder [Token] 409decodeTokens = do 410 done <- eof 411 if done 412 then return [] 413 else do tok <- decodeToken 414 toks <- decodeTokens 415 return (tok:toks) 416 417decodeToken :: Decoder Token 418decodeToken = do 419 header <- decodeTokenHeader 420 extra <- getBytes (tokenExtraLen header) 421 either fail return (packToken header extra) 422 423tokenExtraLen :: TokenHeader -> Word64 424tokenExtraLen (TokenHeader MajorType2 (AiValue n)) = fromUInt n -- bytestrings 425tokenExtraLen (TokenHeader MajorType3 (AiValue n)) = fromUInt n -- unicode strings 426tokenExtraLen _ = 0 427 428packToken :: TokenHeader -> [Word8] -> Either String Token 429packToken (TokenHeader mt ai) extra = case (mt, ai) of 430 -- Major type 0: an unsigned integer. The 5-bit additional information 431 -- is either the integer itself (for additional information values 0 432 -- through 23) or the length of additional data. 433 (MajorType0, AiValue n) -> return (MT0_UnsignedInt n) 434 435 -- Major type 1: a negative integer. The encoding follows the rules 436 -- for unsigned integers (major type 0), except that the value is 437 -- then -1 minus the encoded unsigned integer. 438 (MajorType1, AiValue n) -> return (MT1_NegativeInt n) 439 440 -- Major type 2: a byte string. The string's length in bytes is 441 -- represented following the rules for positive integers (major type 0). 442 (MajorType2, AiValue n) -> return (MT2_ByteString n extra) 443 (MajorType2, AiIndefLen) -> return MT2_ByteStringIndef 444 445 -- Major type 3: a text string, specifically a string of Unicode 446 -- characters that is encoded as UTF-8 [RFC3629]. The format of this 447 -- type is identical to that of byte strings (major type 2), that is, 448 -- as with major type 2, the length gives the number of bytes. 449 (MajorType3, AiValue n) -> return (MT3_String n extra) 450 (MajorType3, AiIndefLen) -> return MT3_StringIndef 451 452 -- Major type 4: an array of data items. The array's length follows the 453 -- rules for byte strings (major type 2), except that the length 454 -- denotes the number of data items, not the length in bytes that the 455 -- array takes up. 456 (MajorType4, AiValue n) -> return (MT4_ArrayLen n) 457 (MajorType4, AiIndefLen) -> return MT4_ArrayLenIndef 458 459 -- Major type 5: a map of pairs of data items. A map is comprised of 460 -- pairs of data items, each pair consisting of a key that is 461 -- immediately followed by a value. The map's length follows the 462 -- rules for byte strings (major type 2), except that the length 463 -- denotes the number of pairs, not the length in bytes that the map 464 -- takes up. 465 (MajorType5, AiValue n) -> return (MT5_MapLen n) 466 (MajorType5, AiIndefLen) -> return MT5_MapLenIndef 467 468 -- Major type 6: optional semantic tagging of other major types. 469 -- The initial bytes of the tag follow the rules for positive integers 470 -- (major type 0). 471 (MajorType6, AiValue n) -> return (MT6_Tag n) 472 473 -- Major type 7 is for two types of data: floating-point numbers and 474 -- "simple values" that do not need any content. Each value of the 475 -- 5-bit additional information in the initial byte has its own separate 476 -- meaning, as defined in Table 1. 477 -- | 0..23 | Simple value (value 0..23) | 478 -- | 24 | Simple value (value 32..255 in following byte) | 479 -- | 25 | IEEE 754 Half-Precision Float (16 bits follow) | 480 -- | 26 | IEEE 754 Single-Precision Float (32 bits follow) | 481 -- | 27 | IEEE 754 Double-Precision Float (64 bits follow) | 482 -- | 28-30 | (Unassigned) | 483 -- | 31 | "break" stop code for indefinite-length items | 484 (MajorType7, AiValue (UIntSmall w)) -> return (MT7_Simple (SimpleSmall w)) 485 (MajorType7, AiValue (UInt8 w)) -> return (MT7_Simple (SimpleLarge w)) 486 (MajorType7, AiValue (UInt16 w)) -> return (MT7_Float16 (HalfSpecials (wordToHalf w))) 487 (MajorType7, AiValue (UInt32 w)) -> return (MT7_Float32 (FloatSpecials (wordToFloat w))) 488 (MajorType7, AiValue (UInt64 w)) -> return (MT7_Float64 (DoubleSpecials (wordToDouble w))) 489 (MajorType7, AiIndefLen) -> return (MT7_Break) 490 _ -> Left "invalid token header" 491 492 493encodeToken :: Encoder Token 494encodeToken tok = 495 let (header, extra) = unpackToken tok 496 in encodeTokenHeader header ++ extra 497 498 499unpackToken :: Token -> (TokenHeader, [Word8]) 500unpackToken tok = (\(mt, ai, ws) -> (TokenHeader mt ai, ws)) $ case tok of 501 (MT0_UnsignedInt n) -> (MajorType0, AiValue n, []) 502 (MT1_NegativeInt n) -> (MajorType1, AiValue n, []) 503 (MT2_ByteString n ws) -> (MajorType2, AiValue n, ws) 504 MT2_ByteStringIndef -> (MajorType2, AiIndefLen, []) 505 (MT3_String n ws) -> (MajorType3, AiValue n, ws) 506 MT3_StringIndef -> (MajorType3, AiIndefLen, []) 507 (MT4_ArrayLen n) -> (MajorType4, AiValue n, []) 508 MT4_ArrayLenIndef -> (MajorType4, AiIndefLen, []) 509 (MT5_MapLen n) -> (MajorType5, AiValue n, []) 510 MT5_MapLenIndef -> (MajorType5, AiIndefLen, []) 511 (MT6_Tag n) -> (MajorType6, AiValue n, []) 512 (MT7_Simple 513 (SimpleSmall n)) -> (MajorType7, AiValue (UIntSmall (fromIntegral n)), []) 514 (MT7_Simple 515 (SimpleLarge n)) -> (MajorType7, AiValue (UInt8 n), []) 516 (MT7_Float16 517 (HalfSpecials f)) -> (MajorType7, AiValue (UInt16 (halfToWord f)), []) 518 (MT7_Float32 519 (FloatSpecials f)) -> (MajorType7, AiValue (UInt32 (floatToWord f)), []) 520 (MT7_Float64 521 (DoubleSpecials f))-> (MajorType7, AiValue (UInt64 (doubleToWord f)), []) 522 MT7_Break -> (MajorType7, AiIndefLen, []) 523 524 525fromUInt :: UInt -> Word64 526fromUInt (UIntSmall w) = fromIntegral w 527fromUInt (UInt8 w) = fromIntegral w 528fromUInt (UInt16 w) = fromIntegral w 529fromUInt (UInt32 w) = fromIntegral w 530fromUInt (UInt64 w) = fromIntegral w 531 532toUInt :: Word64 -> UInt 533toUInt n 534 | n < 24 = UIntSmall (fromIntegral n) 535 | n <= fromIntegral (maxBound :: Word8) = UInt8 (fromIntegral n) 536 | n <= fromIntegral (maxBound :: Word16) = UInt16 (fromIntegral n) 537 | n <= fromIntegral (maxBound :: Word32) = UInt32 (fromIntegral n) 538 | otherwise = UInt64 n 539 540lengthUInt :: [a] -> UInt 541lengthUInt = toUInt . fromIntegral . length 542 543decodeUTF8 :: [Word8] -> Either String [Char] 544decodeUTF8 = either (Left . show) (return . T.unpack) . T.decodeUtf8' . BS.pack 545 546encodeUTF8 :: [Char] -> [Word8] 547encodeUTF8 = BS.unpack . T.encodeUtf8 . T.pack 548 549reservedTag :: Word64 -> Bool 550reservedTag w = w <= 5 551 552prop_Token :: Token -> Bool 553prop_Token token = 554 let ws = encodeToken token 555 Just (token', []) = runDecoder decodeToken ws 556 in token == token' 557 558data Term = TUInt UInt 559 | TNInt UInt 560 | TBigInt Integer 561 | TBytes [Word8] 562 | TBytess [[Word8]] 563 | TString [Char] 564 | TStrings [[Char]] 565 | TArray [Term] 566 | TArrayI [Term] 567 | TMap [(Term, Term)] 568 | TMapI [(Term, Term)] 569 | TTagged UInt Term 570 | TTrue 571 | TFalse 572 | TNull 573 | TUndef 574 | TSimple Simple 575 | TFloat16 HalfSpecials 576 | TFloat32 FloatSpecials 577 | TFloat64 DoubleSpecials 578 deriving (Show, Eq) 579 580instance Arbitrary Term where 581 arbitrary = 582 frequency 583 [ (1, TUInt <$> arbitrary) 584 , (1, TNInt <$> arbitrary) 585 , (1, TBigInt . getLargeInteger <$> arbitrary) 586 , (1, TBytes <$> arbitrary) 587 , (1, TBytess <$> arbitrary) 588 , (1, TString <$> arbitrary) 589 , (1, TStrings <$> arbitrary) 590 , (2, TArray <$> listOfSmaller arbitrary) 591 , (2, TArrayI <$> listOfSmaller arbitrary) 592 , (2, TMap <$> listOfSmaller ((,) <$> arbitrary <*> arbitrary)) 593 , (2, TMapI <$> listOfSmaller ((,) <$> arbitrary <*> arbitrary)) 594 , (1, TTagged <$> arbitraryTag <*> sized (\sz -> resize (max 0 (sz-1)) arbitrary)) 595 , (1, pure TFalse) 596 , (1, pure TTrue) 597 , (1, pure TNull) 598 , (1, pure TUndef) 599 , (1, TSimple <$> arbitrary `suchThat` (unassignedSimple . fromSimple)) 600 , (1, TFloat16 <$> arbitrary) 601 , (1, TFloat32 <$> arbitrary) 602 , (1, TFloat64 <$> arbitrary) 603 ] 604 where 605 listOfSmaller :: Gen a -> Gen [a] 606 listOfSmaller gen = 607 sized $ \n -> do 608 k <- choose (0,n) 609 vectorOf k (resize (n `div` (k+1)) gen) 610 611 arbitraryTag = arbitrary `suchThat` (not . reservedTag . fromUInt) 612 613 shrink (TUInt n) = [ TUInt n' | n' <- shrink n ] 614 shrink (TNInt n) = [ TNInt n' | n' <- shrink n ] 615 shrink (TBigInt n) = [ TBigInt n' | n' <- shrink n ] 616 617 shrink (TBytes ws) = [ TBytes ws' | ws' <- shrink ws ] 618 shrink (TBytess wss) = [ TBytess wss' | wss' <- shrink wss ] 619 shrink (TString ws) = [ TString ws' | ws' <- shrink ws ] 620 shrink (TStrings wss) = [ TStrings wss' | wss' <- shrink wss ] 621 622 shrink (TArray xs@[x]) = x : [ TArray xs' | xs' <- shrink xs ] 623 shrink (TArray xs) = [ TArray xs' | xs' <- shrink xs ] 624 shrink (TArrayI xs@[x]) = x : [ TArrayI xs' | xs' <- shrink xs ] 625 shrink (TArrayI xs) = [ TArrayI xs' | xs' <- shrink xs ] 626 627 shrink (TMap xys@[(x,y)]) = x : y : [ TMap xys' | xys' <- shrink xys ] 628 shrink (TMap xys) = [ TMap xys' | xys' <- shrink xys ] 629 shrink (TMapI xys@[(x,y)]) = x : y : [ TMapI xys' | xys' <- shrink xys ] 630 shrink (TMapI xys) = [ TMapI xys' | xys' <- shrink xys ] 631 632 shrink (TTagged w t) = [ TTagged w' t' | (w', t') <- shrink (w, t) 633 , not (reservedTag (fromUInt w')) ] 634 635 shrink TFalse = [] 636 shrink TTrue = [] 637 shrink TNull = [] 638 shrink TUndef = [] 639 640 shrink (TSimple n) = [ TSimple n' | n' <- shrink n 641 , unassignedSimple (fromSimple n') ] 642 shrink (TFloat16 f) = [ TFloat16 f' | f' <- shrink f ] 643 shrink (TFloat32 f) = [ TFloat32 f' | f' <- shrink f ] 644 shrink (TFloat64 f) = [ TFloat64 f' | f' <- shrink f ] 645 646 647decodeTerm :: Decoder Term 648decodeTerm = decodeToken >>= decodeTermFrom 649 650decodeTermFrom :: Token -> Decoder Term 651decodeTermFrom tk = 652 case tk of 653 MT0_UnsignedInt n -> return (TUInt n) 654 MT1_NegativeInt n -> return (TNInt n) 655 656 MT2_ByteString _ bs -> return (TBytes bs) 657 MT2_ByteStringIndef -> decodeBytess [] 658 659 MT3_String _ ws -> either fail (return . TString) (decodeUTF8 ws) 660 MT3_StringIndef -> decodeStrings [] 661 662 MT4_ArrayLen len -> decodeArrayN (fromUInt len) [] 663 MT4_ArrayLenIndef -> decodeArray [] 664 665 MT5_MapLen len -> decodeMapN (fromUInt len) [] 666 MT5_MapLenIndef -> decodeMap [] 667 668 MT6_Tag tag -> decodeTagged tag 669 670 MT7_Simple n 671 | n' == 20 -> return TFalse 672 | n' == 21 -> return TTrue 673 | n' == 22 -> return TNull 674 | n' == 23 -> return TUndef 675 | otherwise -> return (TSimple n) 676 where 677 n' = fromSimple n 678 MT7_Float16 f -> return (TFloat16 f) 679 MT7_Float32 f -> return (TFloat32 f) 680 MT7_Float64 f -> return (TFloat64 f) 681 MT7_Break -> fail "unexpected" 682 683 684decodeBytess :: [[Word8]] -> Decoder Term 685decodeBytess acc = do 686 tk <- decodeToken 687 case tk of 688 MT7_Break -> return $! TBytess (reverse acc) 689 MT2_ByteString _ bs -> decodeBytess (bs : acc) 690 _ -> fail "unexpected" 691 692decodeStrings :: [String] -> Decoder Term 693decodeStrings acc = do 694 tk <- decodeToken 695 case tk of 696 MT7_Break -> return $! TStrings (reverse acc) 697 MT3_String _ ws -> do cs <- either fail return (decodeUTF8 ws) 698 decodeStrings (cs : acc) 699 _ -> fail "unexpected" 700 701decodeArrayN :: Word64 -> [Term] -> Decoder Term 702decodeArrayN n acc = 703 case n of 704 0 -> return $! TArray (reverse acc) 705 _ -> do t <- decodeTerm 706 decodeArrayN (n-1) (t : acc) 707 708decodeArray :: [Term] -> Decoder Term 709decodeArray acc = do 710 tk <- decodeToken 711 case tk of 712 MT7_Break -> return $! TArrayI (reverse acc) 713 _ -> do 714 tm <- decodeTermFrom tk 715 decodeArray (tm : acc) 716 717decodeMapN :: Word64 -> [(Term, Term)] -> Decoder Term 718decodeMapN n acc = 719 case n of 720 0 -> return $! TMap (reverse acc) 721 _ -> do 722 tm <- decodeTerm 723 tm' <- decodeTerm 724 decodeMapN (n-1) ((tm, tm') : acc) 725 726decodeMap :: [(Term, Term)] -> Decoder Term 727decodeMap acc = do 728 tk <- decodeToken 729 case tk of 730 MT7_Break -> return $! TMapI (reverse acc) 731 _ -> do 732 tm <- decodeTermFrom tk 733 tm' <- decodeTerm 734 decodeMap ((tm, tm') : acc) 735 736decodeTagged :: UInt -> Decoder Term 737decodeTagged tag | fromUInt tag == 2 = do 738 MT2_ByteString _ bs <- decodeToken 739 let !n = integerFromBytes bs 740 return (TBigInt n) 741decodeTagged tag | fromUInt tag == 3 = do 742 MT2_ByteString _ bs <- decodeToken 743 let !n = integerFromBytes bs 744 return (TBigInt (-1 - n)) 745decodeTagged tag = do 746 tm <- decodeTerm 747 return (TTagged tag tm) 748 749integerFromBytes :: [Word8] -> Integer 750integerFromBytes [] = 0 751integerFromBytes (w0:ws0) = go (fromIntegral w0) ws0 752 where 753 go !acc [] = acc 754 go !acc (w:ws) = go (acc `shiftL` 8 + fromIntegral w) ws 755 756integerToBytes :: Integer -> [Word8] 757integerToBytes n0 758 | n0 == 0 = [0] 759 | n0 < 0 = reverse (go (-n0)) 760 | otherwise = reverse (go n0) 761 where 762 go n | n == 0 = [] 763 | otherwise = narrow n : go (n `shiftR` 8) 764 765 narrow :: Integer -> Word8 766 narrow = fromIntegral 767 768prop_integerToFromBytes :: LargeInteger -> Bool 769prop_integerToFromBytes (LargeInteger n) 770 | n >= 0 = 771 let ws = integerToBytes n 772 n' = integerFromBytes ws 773 in n == n' 774 | otherwise = 775 let ws = integerToBytes n 776 n' = integerFromBytes ws 777 in n == -n' 778 779------------------------------------------------------------------------------- 780 781encodeTerm :: Encoder Term 782encodeTerm (TUInt n) = encodeToken (MT0_UnsignedInt n) 783encodeTerm (TNInt n) = encodeToken (MT1_NegativeInt n) 784encodeTerm (TBigInt n) 785 | n >= 0 = encodeToken (MT6_Tag (UIntSmall 2)) 786 <> let ws = integerToBytes n 787 len = lengthUInt ws in 788 encodeToken (MT2_ByteString len ws) 789 | otherwise = encodeToken (MT6_Tag (UIntSmall 3)) 790 <> let ws = integerToBytes (-1 - n) 791 len = lengthUInt ws in 792 encodeToken (MT2_ByteString len ws) 793encodeTerm (TBytes ws) = let len = lengthUInt ws in 794 encodeToken (MT2_ByteString len ws) 795encodeTerm (TBytess wss) = encodeToken MT2_ByteStringIndef 796 <> mconcat [ encodeToken (MT2_ByteString len ws) 797 | ws <- wss 798 , let len = lengthUInt ws ] 799 <> encodeToken MT7_Break 800encodeTerm (TString cs) = let ws = encodeUTF8 cs 801 len = lengthUInt ws in 802 encodeToken (MT3_String len ws) 803encodeTerm (TStrings css) = encodeToken MT3_StringIndef 804 <> mconcat [ encodeToken (MT3_String len ws) 805 | cs <- css 806 , let ws = encodeUTF8 cs 807 len = lengthUInt ws ] 808 <> encodeToken MT7_Break 809encodeTerm (TArray ts) = let len = lengthUInt ts in 810 encodeToken (MT4_ArrayLen len) 811 <> mconcat (map encodeTerm ts) 812encodeTerm (TArrayI ts) = encodeToken MT4_ArrayLenIndef 813 <> mconcat (map encodeTerm ts) 814 <> encodeToken MT7_Break 815encodeTerm (TMap kvs) = let len = lengthUInt kvs in 816 encodeToken (MT5_MapLen len) 817 <> mconcat [ encodeTerm k <> encodeTerm v 818 | (k,v) <- kvs ] 819encodeTerm (TMapI kvs) = encodeToken MT5_MapLenIndef 820 <> mconcat [ encodeTerm k <> encodeTerm v 821 | (k,v) <- kvs ] 822 <> encodeToken MT7_Break 823encodeTerm (TTagged tag t) = encodeToken (MT6_Tag tag) 824 <> encodeTerm t 825encodeTerm TFalse = encodeToken (MT7_Simple (SimpleSmall 20)) 826encodeTerm TTrue = encodeToken (MT7_Simple (SimpleSmall 21)) 827encodeTerm TNull = encodeToken (MT7_Simple (SimpleSmall 22)) 828encodeTerm TUndef = encodeToken (MT7_Simple (SimpleSmall 23)) 829encodeTerm (TSimple w) = encodeToken (MT7_Simple w) 830encodeTerm (TFloat16 f) = encodeToken (MT7_Float16 f) 831encodeTerm (TFloat32 f) = encodeToken (MT7_Float32 f) 832encodeTerm (TFloat64 f) = encodeToken (MT7_Float64 f) 833 834 835------------------------------------------------------------------------------- 836 837prop_Term :: Term -> Bool 838prop_Term term = 839 let ws = encodeTerm term 840 Just (term', []) = runDecoder decodeTerm ws 841 in term == term' 842 843isCanonicalTerm :: Term -> Bool 844isCanonicalTerm t = canonicaliseTerm t == t 845 846canonicaliseTerm :: Term -> Term 847canonicaliseTerm (TUInt n) = TUInt (canonicaliseUInt n) 848canonicaliseTerm (TNInt n) = TNInt (canonicaliseUInt n) 849canonicaliseTerm (TBigInt n) 850 | n >= 0 && n <= fromIntegral (maxBound :: Word64) 851 = TUInt (toUInt (fromIntegral n)) 852 | n < 0 && n >= -1 - fromIntegral (maxBound :: Word64) 853 = TNInt (toUInt (fromIntegral (-1 - n))) 854 | otherwise = TBigInt n 855canonicaliseTerm (TSimple n) = TSimple (canonicaliseSimple n) 856canonicaliseTerm (TFloat16 f) = canonicaliseFloat TFloat16 f 857canonicaliseTerm (TFloat32 f) = canonicaliseFloat TFloat32 f 858canonicaliseTerm (TFloat64 f) = canonicaliseFloat TFloat64 f 859canonicaliseTerm (TBytess wss) = TBytess (filter (not . null) wss) 860canonicaliseTerm (TStrings css) = TStrings (filter (not . null) css) 861canonicaliseTerm (TArray ts) = TArray (map canonicaliseTerm ts) 862canonicaliseTerm (TArrayI ts) = TArrayI (map canonicaliseTerm ts) 863canonicaliseTerm (TMap ts) = TMap (map canonicaliseTermPair ts) 864canonicaliseTerm (TMapI ts) = TMapI (map canonicaliseTermPair ts) 865canonicaliseTerm (TTagged tag t) = TTagged (canonicaliseUInt tag) (canonicaliseTerm t) 866canonicaliseTerm t = t 867 868canonicaliseUInt :: UInt -> UInt 869canonicaliseUInt = toUInt . fromUInt 870 871canonicaliseSimple :: Simple -> Simple 872canonicaliseSimple = toSimple . fromSimple 873 874canonicaliseFloat :: RealFloat t => (t -> Term) -> t -> Term 875canonicaliseFloat tfloatNN f 876 | isNaN f = TFloat16 canonicalNaN 877 | otherwise = tfloatNN f 878 879canonicaliseTermPair :: (Term, Term) -> (Term, Term) 880canonicaliseTermPair (x,y) = (canonicaliseTerm x, canonicaliseTerm y) 881 882 883------------------------------------------------------------------------------- 884 885diagnosticNotation :: Term -> String 886diagnosticNotation = \t -> showsTerm t "" 887 where 888 showsTerm tm = case tm of 889 TUInt n -> shows (fromUInt n) 890 TNInt n -> shows (-1 - fromIntegral (fromUInt n) :: Integer) 891 TBigInt n -> shows n 892 TBytes bs -> showsBytes bs 893 TBytess bss -> surround '(' ')' (underscoreSpace . commaSep showsBytes bss) 894 TString cs -> shows cs 895 TStrings css -> surround '(' ')' (underscoreSpace . commaSep shows css) 896 TArray ts -> surround '[' ']' (commaSep showsTerm ts) 897 TArrayI ts -> surround '[' ']' (underscoreSpace . commaSep showsTerm ts) 898 TMap ts -> surround '{' '}' (commaSep showsMapElem ts) 899 TMapI ts -> surround '{' '}' (underscoreSpace . commaSep showsMapElem ts) 900 TTagged tag t -> shows (fromUInt tag) . surround '(' ')' (showsTerm t) 901 TTrue -> showString "true" 902 TFalse -> showString "false" 903 TNull -> showString "null" 904 TUndef -> showString "undefined" 905 TSimple n -> showString "simple" . surround '(' ')' (shows (fromSimple n)) 906 -- convert to float to work around https://github.com/ekmett/half/issues/2 907 TFloat16 f -> showFloatCompat (float2Double (Half.fromHalf (getHalfSpecials f))) 908 TFloat32 f -> showFloatCompat (float2Double (getFloatSpecials f)) 909 TFloat64 f -> showFloatCompat (getDoubleSpecials f) 910 911 surround a b x = showChar a . x . showChar b 912 913 commaSpace = showChar ',' . showChar ' ' 914 underscoreSpace = showChar '_' . showChar ' ' 915 916 showsMapElem (k,v) = showsTerm k . showChar ':' . showChar ' ' . showsTerm v 917 918 catShows :: (a -> ShowS) -> [a] -> ShowS 919 catShows f xs = \s -> foldr (\x r -> f x . r) id xs s 920 921 sepShows :: ShowS -> (a -> ShowS) -> [a] -> ShowS 922 sepShows sep f xs = foldr (.) id (intersperse sep (map f xs)) 923 924 commaSep = sepShows commaSpace 925 926 showsBytes :: [Word8] -> ShowS 927 showsBytes bs = showChar 'h' . showChar '\'' 928 . catShows showFHex bs 929 . showChar '\'' 930 931 showFHex n | n < 16 = showChar '0' . showHex n 932 | otherwise = showHex n 933 934 showFloatCompat n 935 | exponent' >= -5 && exponent' <= 15 = showFFloat Nothing n 936 | otherwise = showEFloat Nothing n 937 where exponent' = snd (floatToDigits 10 n) 938 939 940word16FromNet :: Word8 -> Word8 -> Word16 941word16FromNet w1 w0 = 942 fromIntegral w1 `shiftL` (8*1) 943 .|. fromIntegral w0 `shiftL` (8*0) 944 945word16ToNet :: Word16 -> (Word8, Word8) 946word16ToNet w = 947 ( fromIntegral ((w `shiftR` (8*1)) .&. 0xff) 948 , fromIntegral ((w `shiftR` (8*0)) .&. 0xff) 949 ) 950 951word32FromNet :: Word8 -> Word8 -> Word8 -> Word8 -> Word32 952word32FromNet w3 w2 w1 w0 = 953 fromIntegral w3 `shiftL` (8*3) 954 .|. fromIntegral w2 `shiftL` (8*2) 955 .|. fromIntegral w1 `shiftL` (8*1) 956 .|. fromIntegral w0 `shiftL` (8*0) 957 958word32ToNet :: Word32 -> (Word8, Word8, Word8, Word8) 959word32ToNet w = 960 ( fromIntegral ((w `shiftR` (8*3)) .&. 0xff) 961 , fromIntegral ((w `shiftR` (8*2)) .&. 0xff) 962 , fromIntegral ((w `shiftR` (8*1)) .&. 0xff) 963 , fromIntegral ((w `shiftR` (8*0)) .&. 0xff) 964 ) 965 966word64FromNet :: Word8 -> Word8 -> Word8 -> Word8 -> 967 Word8 -> Word8 -> Word8 -> Word8 -> Word64 968word64FromNet w7 w6 w5 w4 w3 w2 w1 w0 = 969 fromIntegral w7 `shiftL` (8*7) 970 .|. fromIntegral w6 `shiftL` (8*6) 971 .|. fromIntegral w5 `shiftL` (8*5) 972 .|. fromIntegral w4 `shiftL` (8*4) 973 .|. fromIntegral w3 `shiftL` (8*3) 974 .|. fromIntegral w2 `shiftL` (8*2) 975 .|. fromIntegral w1 `shiftL` (8*1) 976 .|. fromIntegral w0 `shiftL` (8*0) 977 978word64ToNet :: Word64 -> (Word8, Word8, Word8, Word8, 979 Word8, Word8, Word8, Word8) 980word64ToNet w = 981 ( fromIntegral ((w `shiftR` (8*7)) .&. 0xff) 982 , fromIntegral ((w `shiftR` (8*6)) .&. 0xff) 983 , fromIntegral ((w `shiftR` (8*5)) .&. 0xff) 984 , fromIntegral ((w `shiftR` (8*4)) .&. 0xff) 985 , fromIntegral ((w `shiftR` (8*3)) .&. 0xff) 986 , fromIntegral ((w `shiftR` (8*2)) .&. 0xff) 987 , fromIntegral ((w `shiftR` (8*1)) .&. 0xff) 988 , fromIntegral ((w `shiftR` (8*0)) .&. 0xff) 989 ) 990 991prop_word16ToFromNet :: Word8 -> Word8 -> Bool 992prop_word16ToFromNet w1 w0 = 993 word16ToNet (word16FromNet w1 w0) == (w1, w0) 994 995prop_word32ToFromNet :: Word8 -> Word8 -> Word8 -> Word8 -> Bool 996prop_word32ToFromNet w3 w2 w1 w0 = 997 word32ToNet (word32FromNet w3 w2 w1 w0) == (w3, w2, w1, w0) 998 999prop_word64ToFromNet :: Word8 -> Word8 -> Word8 -> Word8 -> 1000 Word8 -> Word8 -> Word8 -> Word8 -> Bool 1001prop_word64ToFromNet w7 w6 w5 w4 w3 w2 w1 w0 = 1002 word64ToNet (word64FromNet w7 w6 w5 w4 w3 w2 w1 w0) 1003 == (w7, w6, w5, w4, w3, w2, w1, w0) 1004 1005-- Note: some NaNs do not roundtrip https://github.com/ekmett/half/issues/3 1006-- but all the others had better 1007prop_halfToFromFloat :: Bool 1008prop_halfToFromFloat = 1009 all (\w -> roundTrip w || isNaN (Half.Half w)) [minBound..maxBound] 1010 where 1011 roundTrip w = 1012 w == (Half.getHalf . Half.toHalf . Half.fromHalf . Half.Half $ w) 1013 1014