1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE CPP #-} 3{-# LANGUAGE DeriveDataTypeable #-} 4{-# LANGUAGE FlexibleContexts #-} 5{-# LANGUAGE FlexibleInstances #-} 6{-# LANGUAGE FunctionalDependencies #-} 7{-# LANGUAGE MagicHash #-} 8{-# LANGUAGE MultiParamTypeClasses #-} 9{-# LANGUAGE OverloadedStrings #-} 10{-# LANGUAGE RankNTypes #-} 11 12#if __GLASGOW_HASKELL__ < 900 13-- Bump up from the default 1.5, otherwise our decoder fast path is no good. 14-- We went over the threshold when we switched to using ST. 15-- 16-- However, this flag is not supported on GHC 9.0 and later and eye-balling the 17-- Core suggests that the new inlining heuristics don't require it. 18{-# OPTIONS_GHC -funfolding-keeness-factor=2.0 #-} 19#endif 20 21-- | 22-- Module : Codec.CBOR.Read 23-- Copyright : (c) Duncan Coutts 2015-2017 24-- License : BSD3-style (see LICENSE.txt) 25-- 26-- Maintainer : duncan@community.haskell.org 27-- Stability : experimental 28-- Portability : non-portable (GHC extensions) 29-- 30-- Tools for reading values in a CBOR-encoded format 31-- back into ordinary values. 32-- 33module Codec.CBOR.Read 34 ( deserialiseFromBytes -- :: Decoder a -> ByteString -> Either String (ByteString, a) 35 , deserialiseFromBytesWithSize -- :: Decoder a -> ByteString -> Either String (ByteString, ByteOffset, a) 36 , deserialiseIncremental -- :: Decoder a -> ST s (IDecode s a) 37 , DeserialiseFailure(..) 38 , IDecode(..) 39 , ByteOffset 40 ) where 41 42#include "cbor.h" 43 44#if !MIN_VERSION_base(4,8,0) 45import Control.Applicative 46#endif 47import GHC.Int 48 49import Control.DeepSeq 50import Control.Monad (ap) 51import Control.Monad.ST 52import Data.Array.IArray 53import Data.Array.Unboxed 54import qualified Data.Array.Base as A 55import Data.Monoid 56import Data.Bits 57import Data.ByteString (ByteString) 58import qualified Data.ByteString as BS 59import qualified Data.ByteString.Unsafe as BS 60import qualified Data.ByteString.Lazy as LBS 61import qualified Data.ByteString.Lazy.Internal as LBS 62import qualified Data.Text as T 63import qualified Data.Text.Encoding as T 64import Data.Word 65import GHC.Word 66#if defined(ARCH_32bit) 67import GHC.IntWord64 68#endif 69import GHC.Exts 70import GHC.Float (float2Double) 71import Data.Typeable 72import Control.Exception 73 74-- We do all numeric conversions explicitly to be careful about overflows. 75import Prelude hiding (fromIntegral) 76 77import qualified Codec.CBOR.ByteArray as BA 78import Codec.CBOR.Decoding hiding (DecodeAction(Done, Fail)) 79import Codec.CBOR.Decoding (DecodeAction) 80import qualified Codec.CBOR.Decoding as D 81import Codec.CBOR.Magic 82 83-------------------------------------------------------------------------------- 84 85-- | An exception type that may be returned (by pure functions) or 86-- thrown (by IO actions) that fail to deserialise a given input. 87-- 88-- @since 0.2.0.0 89data DeserialiseFailure = DeserialiseFailure ByteOffset String 90 deriving (Eq, Show, Typeable) 91 92instance NFData DeserialiseFailure where 93 rnf (DeserialiseFailure offset msg) = rnf offset `seq` rnf msg `seq` () 94 95instance Exception DeserialiseFailure where 96#if MIN_VERSION_base(4,8,0) 97 displayException (DeserialiseFailure off msg) = 98 "Codec.CBOR: deserialising failed at offset " 99 ++ show off ++ " : " ++ msg 100#endif 101 102-- | An Incremental decoder, used to represent the result of 103-- attempting to run a decoder over a given input, and return a value 104-- of type @a@. 105data IDecode s a 106 = -- | The decoder has consumed the available input and needs more 107 -- to continue. Provide 'Just' if more input is available and 108 -- 'Nothing' otherwise, and you will get a new 'IDecode'. 109 Partial (Maybe BS.ByteString -> ST s (IDecode s a)) 110 111 -- | The decoder has successfully finished. Except for the output 112 -- value you also get any unused input as well as the number of 113 -- bytes consumed. 114 | Done !BS.ByteString {-# UNPACK #-} !ByteOffset a 115 116 -- | The decoder ran into an error. The decoder either used 117 -- 'fail' or was not provided enough input. Contains any 118 -- unconsumed input, the number of bytes consumed, and a 119 -- 'DeserialiseFailure' exception describing the reason why the 120 -- failure occurred. 121 | Fail !BS.ByteString {-# UNPACK #-} !ByteOffset DeserialiseFailure 122 123-- | Given a 'Decoder' and some 'LBS.ByteString' representing 124-- an encoded CBOR value, return 'Either' the decoded CBOR value 125-- or an error. In addition to the decoded value return any remaining input 126-- content. 127-- 128-- @since 0.2.0.0 129deserialiseFromBytes :: (forall s. Decoder s a) 130 -> LBS.ByteString 131 -> Either DeserialiseFailure (LBS.ByteString, a) 132deserialiseFromBytes d lbs = 133 fmap f $ runIDecode (deserialiseIncremental d) lbs 134 where f (rest, _, x) = (rest, x) 135 136-- | Given a 'Decoder' and some 'LBS.ByteString' representing 137-- an encoded CBOR value, return 'Either' the decoded CBOR value 138-- or an error. In addition to the decoded value return any remaining input 139-- content and the number of bytes consumed. 140-- 141-- @since 0.2.0.0 142deserialiseFromBytesWithSize :: (forall s. Decoder s a) 143 -> LBS.ByteString 144 -> Either DeserialiseFailure (LBS.ByteString, ByteOffset, a) 145deserialiseFromBytesWithSize d lbs = 146 runIDecode (deserialiseIncremental d) lbs 147 148runIDecode :: (forall s. ST s (IDecode s a)) 149 -> LBS.ByteString 150 -> Either DeserialiseFailure (LBS.ByteString, ByteOffset, a) 151runIDecode d lbs = 152 runST (go lbs =<< d) 153 where 154 go :: LBS.ByteString 155 -> IDecode s a 156 -> ST s (Either DeserialiseFailure (LBS.ByteString, ByteOffset, a)) 157 go _ (Fail _ _ err) = return (Left err) 158 go lbs' (Done bs off x) = let rest 159 | BS.null bs = lbs' 160 | otherwise = LBS.Chunk bs lbs' 161 in return (Right (rest, off, x)) 162 go LBS.Empty (Partial k) = k Nothing >>= go LBS.Empty 163 go (LBS.Chunk bs lbs') (Partial k) = k (Just bs) >>= go lbs' 164 165-- | Run a 'Decoder' incrementally, returning a continuation 166-- representing the result of the incremental decode. 167-- 168-- @since 0.2.0.0 169deserialiseIncremental :: Decoder s a -> ST s (IDecode s a) 170deserialiseIncremental decoder = do 171 da <- getDecodeAction decoder 172 runIncrementalDecoder (runDecodeAction da) 173 174---------------------------------------------- 175-- A monad for building incremental decoders 176-- 177 178newtype IncrementalDecoder s a = IncrementalDecoder { 179 unIncrementalDecoder :: 180 forall r. (a -> ST s (IDecode s r)) -> ST s (IDecode s r) 181 } 182 183instance Functor (IncrementalDecoder s) where 184 fmap f a = a >>= return . f 185 186instance Applicative (IncrementalDecoder s) where 187 pure x = IncrementalDecoder $ \k -> k x 188 (<*>) = ap 189 190instance Monad (IncrementalDecoder s) where 191 return = pure 192 193 {-# INLINE (>>=) #-} 194 m >>= f = IncrementalDecoder $ \k -> 195 unIncrementalDecoder m $ \x -> 196 unIncrementalDecoder (f x) k 197 198runIncrementalDecoder :: IncrementalDecoder s (ByteString, ByteOffset, a) 199 -> ST s (IDecode s a) 200runIncrementalDecoder (IncrementalDecoder f) = 201 f (\(trailing, off, x) -> return $ Done trailing off x) 202 203decodeFail :: ByteString -> ByteOffset -> String -> IncrementalDecoder s a 204decodeFail trailing off msg = IncrementalDecoder $ \_ -> return $ Fail trailing off exn 205 where exn = DeserialiseFailure off msg 206 207needChunk :: IncrementalDecoder s (Maybe ByteString) 208needChunk = IncrementalDecoder $ \k -> return $ Partial $ \mbs -> k mbs 209 210lift :: ST s a -> IncrementalDecoder s a 211lift action = IncrementalDecoder (\k -> action >>= k) 212 213-------------------------------------------- 214-- The main decoder 215-- 216 217-- The top level entry point 218runDecodeAction :: DecodeAction s a 219 -> IncrementalDecoder s (ByteString, ByteOffset, a) 220runDecodeAction (D.Fail msg) = decodeFail BS.empty 0 msg 221runDecodeAction (D.Done x) = return (BS.empty, 0, x) 222runDecodeAction (D.PeekAvailable k) = lift (k 0#) >>= runDecodeAction 223runDecodeAction da = do 224 mbs <- needChunk 225 case mbs of 226 Nothing -> decodeFail BS.empty 0 "end of input" 227 Just bs -> go_slow da bs 0 228 229-- The decoder is split into a fast path and a slow path. The fast path is 230-- used for a single input chunk. It decodes as far as it can, reading only 231-- whole tokens that fit within the input chunk. When it cannot read any 232-- further it returns control to the slow path. The slow path fixes up all the 233-- complicated corner cases with tokens that span chunk boundaries, gets more 234-- input and then goes back into the fast path. 235-- 236-- The idea is that chunks are usually large, and we can use simpler and 237-- faster code if we don't make it deal with the general case of tokens that 238-- span chunk boundaries. 239 240-- These are all the ways in which the fast path can finish, and return 241-- control to the slow path. In particular there are three different cases 242-- of tokens spanning a chunk boundary. 243-- 244data SlowPath s a 245 = FastDone {-# UNPACK #-} !ByteString a 246 | SlowConsumeTokenBytes {-# UNPACK #-} !ByteString (ByteString -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int 247 | SlowConsumeTokenByteArray {-# UNPACK #-} !ByteString (BA.ByteArray -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int 248 | SlowConsumeTokenString {-# UNPACK #-} !ByteString (T.Text -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int 249 | SlowConsumeTokenUtf8ByteArray {-# UNPACK #-} !ByteString (BA.ByteArray -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int 250#if defined(ARCH_32bit) 251 | SlowPeekByteOffset {-# UNPACK #-} !ByteString (Int64# -> ST s (DecodeAction s a)) 252#else 253 | SlowPeekByteOffset {-# UNPACK #-} !ByteString (Int# -> ST s (DecodeAction s a)) 254#endif 255 | SlowDecodeAction {-# UNPACK #-} !ByteString (DecodeAction s a) 256 | SlowFail {-# UNPACK #-} !ByteString String 257 258 259-- The main fast path. The fast path itself is actually split into two parts 260-- the main version 'go_fast' and a version used when we are near the end of 261-- the chunk, 'go_fast_end'. 262-- 263-- This version can then do fewer tests when we're not near the end of the 264-- chunk, in particular we just check if there's enough input buffer space 265-- left for the largest possible fixed-size token (8+1 bytes). 266-- 267go_fast :: ByteString -> DecodeAction s a -> ST s (SlowPath s a) 268 269go_fast !bs da | BS.length bs < 9 = go_fast_end bs da 270 271go_fast !bs da@(ConsumeWord k) = 272 case tryConsumeWord (BS.unsafeHead bs) bs of 273 DecodeFailure -> go_fast_end bs da 274 DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 275 276go_fast !bs da@(ConsumeWord8 k) = 277 case tryConsumeWord (BS.unsafeHead bs) bs of 278 DecodeFailure -> go_fast_end bs da 279 DecodedToken sz (W# w#) -> 280 case gtWord# w# 0xff## of 281 0# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 282 _ -> go_fast_end bs da 283 284go_fast !bs da@(ConsumeWord16 k) = 285 case tryConsumeWord (BS.unsafeHead bs) bs of 286 DecodeFailure -> go_fast_end bs da 287 DecodedToken sz (W# w#) -> 288 case gtWord# w# 0xffff## of 289 0# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 290 _ -> go_fast_end bs da 291 292go_fast !bs da@(ConsumeWord32 k) = 293 case tryConsumeWord (BS.unsafeHead bs) bs of 294 DecodeFailure -> go_fast_end bs da 295 DecodedToken sz (W# w#) -> 296#if defined(ARCH_32bit) 297 k w# >>= go_fast (BS.unsafeDrop sz bs) 298#else 299 case gtWord# w# 0xffffffff## of 300 0# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 301 _ -> go_fast_end bs da 302#endif 303 304go_fast !bs da@(ConsumeNegWord k) = 305 case tryConsumeNegWord (BS.unsafeHead bs) bs of 306 DecodeFailure -> go_fast_end bs da 307 DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 308 309go_fast !bs da@(ConsumeInt k) = 310 case tryConsumeInt (BS.unsafeHead bs) bs of 311 DecodeFailure -> go_fast_end bs da 312 DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 313 314go_fast !bs da@(ConsumeInt8 k) = 315 case tryConsumeInt (BS.unsafeHead bs) bs of 316 DecodeFailure -> go_fast_end bs da 317 DecodedToken sz (I# n#) -> 318 case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of 319 0# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 320 _ -> go_fast_end bs da 321 322go_fast !bs da@(ConsumeInt16 k) = 323 case tryConsumeInt (BS.unsafeHead bs) bs of 324 DecodeFailure -> go_fast_end bs da 325 DecodedToken sz (I# n#) -> 326 case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of 327 0# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 328 _ -> go_fast_end bs da 329 330go_fast !bs da@(ConsumeInt32 k) = 331 case tryConsumeInt (BS.unsafeHead bs) bs of 332 DecodeFailure -> go_fast_end bs da 333 DecodedToken sz (I# n#) -> 334#if defined(ARCH_32bit) 335 k n# >>= go_fast (BS.unsafeDrop sz bs) 336#else 337 case (n# ># 0x7fffffff#) `orI#` (n# <# -0x80000000#) of 338 0# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 339 _ -> go_fast_end bs da 340#endif 341 342go_fast !bs da@(ConsumeListLen k) = 343 case tryConsumeListLen (BS.unsafeHead bs) bs of 344 DecodeFailure -> go_fast_end bs da 345 DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 346 347go_fast !bs da@(ConsumeMapLen k) = 348 case tryConsumeMapLen (BS.unsafeHead bs) bs of 349 DecodeFailure -> go_fast_end bs da 350 DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 351 352go_fast !bs da@(ConsumeTag k) = 353 case tryConsumeTag (BS.unsafeHead bs) bs of 354 DecodeFailure -> go_fast_end bs da 355 DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 356 357go_fast !bs da@(ConsumeWordCanonical k) = 358 case tryConsumeWord (BS.unsafeHead bs) bs of 359 DecodeFailure -> go_fast_end bs da 360 DecodedToken sz (W# w#) 361 | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 362 | otherwise -> go_fast_end bs da 363 364go_fast !bs da@(ConsumeWord8Canonical k) = 365 case tryConsumeWord (BS.unsafeHead bs) bs of 366 DecodeFailure -> go_fast_end bs da 367 DecodedToken sz (W# w#) -> 368 case gtWord# w# 0xff## of 369 0# | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 370 _ -> go_fast_end bs da 371 372go_fast !bs da@(ConsumeWord16Canonical k) = 373 case tryConsumeWord (BS.unsafeHead bs) bs of 374 DecodeFailure -> go_fast_end bs da 375 DecodedToken sz (W# w#) -> 376 case gtWord# w# 0xffff## of 377 0# | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 378 _ -> go_fast_end bs da 379 380go_fast !bs da@(ConsumeWord32Canonical k) = 381 case tryConsumeWord (BS.unsafeHead bs) bs of 382 DecodeFailure -> go_fast_end bs da 383 DecodedToken sz (W# w#) -> 384 case w_out_of_range w# of 385 0# | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 386 _ -> go_fast_end bs da 387 where 388 w_out_of_range :: Word# -> Int# 389 w_out_of_range _w# = 390#if defined(ARCH_32bit) 391 0# 392#else 393 gtWord# _w# 0xffffffff## 394#endif 395 396go_fast !bs da@(ConsumeNegWordCanonical k) = 397 case tryConsumeNegWord (BS.unsafeHead bs) bs of 398 DecodeFailure -> go_fast_end bs da 399 DecodedToken sz (W# w#) 400 | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 401 | otherwise -> go_fast_end bs da 402 403go_fast !bs da@(ConsumeIntCanonical k) = 404 case tryConsumeInt (BS.unsafeHead bs) bs of 405 DecodeFailure -> go_fast_end bs da 406 DecodedToken sz (I# n#) 407 | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 408 | otherwise -> go_fast_end bs da 409 410go_fast !bs da@(ConsumeInt8Canonical k) = 411 case tryConsumeInt (BS.unsafeHead bs) bs of 412 DecodeFailure -> go_fast_end bs da 413 DecodedToken sz (I# n#) -> 414 case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of 415 0# | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 416 _ -> go_fast_end bs da 417 418go_fast !bs da@(ConsumeInt16Canonical k) = 419 case tryConsumeInt (BS.unsafeHead bs) bs of 420 DecodeFailure -> go_fast_end bs da 421 DecodedToken sz (I# n#) -> 422 case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of 423 0# | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 424 _ -> go_fast_end bs da 425 426go_fast !bs da@(ConsumeInt32Canonical k) = 427 case tryConsumeInt (BS.unsafeHead bs) bs of 428 DecodeFailure -> go_fast_end bs da 429 DecodedToken sz (I# n#) -> 430 case n_out_of_range n# of 431 0# | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs) 432 _ -> go_fast_end bs da 433 where 434 n_out_of_range :: Int# -> Int# 435 n_out_of_range _n# = 436#if defined(ARCH_32bit) 437 0# 438#else 439 (_n# ># 0x7fffffff#) `orI#` (_n# <# -0x80000000#) 440#endif 441 442go_fast !bs da@(ConsumeListLenCanonical k) = 443 case tryConsumeListLen (BS.unsafeHead bs) bs of 444 DecodeFailure -> go_fast_end bs da 445 DecodedToken sz (I# n#) 446 -- List length can't be negative, cast it to Word#. 447 | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 448 | otherwise -> go_fast_end bs da 449 450go_fast !bs da@(ConsumeMapLenCanonical k) = 451 case tryConsumeMapLen (BS.unsafeHead bs) bs of 452 DecodeFailure -> go_fast_end bs da 453 DecodedToken sz (I# n#) 454 -- Map length can't be negative, cast it to Word#. 455 | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 456 | otherwise -> go_fast_end bs da 457 458go_fast !bs da@(ConsumeTagCanonical k) = 459 case tryConsumeTag (BS.unsafeHead bs) bs of 460 DecodeFailure -> go_fast_end bs da 461 DecodedToken sz (W# w#) 462 | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 463 | otherwise -> go_fast_end bs da 464 465#if defined(ARCH_32bit) 466go_fast !bs da@(ConsumeWord64 k) = 467 case tryConsumeWord64 (BS.unsafeHead bs) bs of 468 DecodeFailure -> go_fast_end bs da 469 DecodedToken sz (W64# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 470 471go_fast !bs da@(ConsumeNegWord64 k) = 472 case tryConsumeNegWord64 (BS.unsafeHead bs) bs of 473 DecodeFailure -> go_fast_end bs da 474 DecodedToken sz (W64# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 475 476go_fast !bs da@(ConsumeInt64 k) = 477 case tryConsumeInt64 (BS.unsafeHead bs) bs of 478 DecodeFailure -> go_fast_end bs da 479 DecodedToken sz (I64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs) 480 481go_fast !bs da@(ConsumeListLen64 k) = 482 case tryConsumeListLen64 (BS.unsafeHead bs) bs of 483 DecodeFailure -> go_fast_end bs da 484 DecodedToken sz (I64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs) 485 486go_fast !bs da@(ConsumeMapLen64 k) = 487 case tryConsumeMapLen64 (BS.unsafeHead bs) bs of 488 DecodeFailure -> go_fast_end bs da 489 DecodedToken sz (I64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs) 490 491go_fast !bs da@(ConsumeTag64 k) = 492 case tryConsumeTag64 (BS.unsafeHead bs) bs of 493 DecodeFailure -> go_fast_end bs da 494 DecodedToken sz (W64# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 495 496go_fast !bs da@(ConsumeWord64Canonical k) = 497 case tryConsumeWord64 (BS.unsafeHead bs) bs of 498 DecodeFailure -> go_fast_end bs da 499 DecodedToken sz (W64# w#) 500 | isWord64Canonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 501 | otherwise -> go_fast_end bs da 502 503go_fast !bs da@(ConsumeNegWord64Canonical k) = 504 case tryConsumeNegWord64 (BS.unsafeHead bs) bs of 505 DecodeFailure -> go_fast_end bs da 506 DecodedToken sz (W64# w#) 507 | isWord64Canonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 508 | otherwise -> go_fast_end bs da 509 510go_fast !bs da@(ConsumeInt64Canonical k) = 511 case tryConsumeInt64 (BS.unsafeHead bs) bs of 512 DecodeFailure -> go_fast_end bs da 513 DecodedToken sz (I64# i#) 514 | isInt64Canonical sz i# -> k i# >>= go_fast (BS.unsafeDrop sz bs) 515 | otherwise -> go_fast_end bs da 516 517go_fast !bs da@(ConsumeListLen64Canonical k) = 518 case tryConsumeListLen64 (BS.unsafeHead bs) bs of 519 DecodeFailure -> go_fast_end bs da 520 DecodedToken sz (I64# i#) 521 -- List length can't be negative, cast it to Word64#. 522 | isWord64Canonical sz (int64ToWord64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs) 523 | otherwise -> go_fast_end bs da 524 525go_fast !bs da@(ConsumeMapLen64Canonical k) = 526 case tryConsumeMapLen64 (BS.unsafeHead bs) bs of 527 DecodeFailure -> go_fast_end bs da 528 DecodedToken sz (I64# i#) 529 -- Map length can't be negative, cast it to Word64#. 530 | isWord64Canonical sz (int64ToWord64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs) 531 | otherwise -> go_fast_end bs da 532 533go_fast !bs da@(ConsumeTag64Canonical k) = 534 case tryConsumeTag64 (BS.unsafeHead bs) bs of 535 DecodeFailure -> go_fast_end bs da 536 DecodedToken sz (W64# w#) 537 | isWord64Canonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 538 | otherwise -> go_fast_end bs da 539#endif 540 541go_fast !bs da@(ConsumeInteger k) = 542 case tryConsumeInteger (BS.unsafeHead bs) bs of 543 DecodedToken sz (BigIntToken _ n) -> k n >>= go_fast (BS.unsafeDrop sz bs) 544 _ -> go_fast_end bs da 545 546go_fast !bs da@(ConsumeFloat k) = 547 case tryConsumeFloat (BS.unsafeHead bs) bs of 548 DecodeFailure -> go_fast_end bs da 549 DecodedToken sz (F# f#) -> k f# >>= go_fast (BS.unsafeDrop sz bs) 550 551go_fast !bs da@(ConsumeDouble k) = 552 case tryConsumeDouble (BS.unsafeHead bs) bs of 553 DecodeFailure -> go_fast_end bs da 554 DecodedToken sz (D# f#) -> k f# >>= go_fast (BS.unsafeDrop sz bs) 555 556go_fast !bs da@(ConsumeBytes k) = 557 case tryConsumeBytes (BS.unsafeHead bs) bs of 558 DecodeFailure -> go_fast_end bs da 559 DecodedToken sz (Fits _ bstr) -> k bstr >>= go_fast (BS.unsafeDrop sz bs) 560 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenBytes 561 (BS.unsafeDrop sz bs) k len 562 563go_fast !bs da@(ConsumeByteArray k) = 564 case tryConsumeBytes (BS.unsafeHead bs) bs of 565 DecodeFailure -> go_fast_end bs da 566 DecodedToken sz (Fits _ str) -> k (BA.fromByteString str) >>= go_fast (BS.unsafeDrop sz bs) 567 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenByteArray 568 (BS.unsafeDrop sz bs) k len 569 570go_fast !bs da@(ConsumeString k) = 571 case tryConsumeString (BS.unsafeHead bs) bs of 572 DecodeFailure -> go_fast_end bs da 573 DecodedToken sz (Fits _ str) -> case T.decodeUtf8' str of 574 Right t -> k t >>= go_fast (BS.unsafeDrop sz bs) 575 Left _e -> return $! SlowFail bs "invalid UTF8" 576 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenString 577 (BS.unsafeDrop sz bs) k len 578 579go_fast !bs da@(ConsumeUtf8ByteArray k) = 580 case tryConsumeString (BS.unsafeHead bs) bs of 581 DecodeFailure -> go_fast_end bs da 582 DecodedToken sz (Fits _ str) -> k (BA.fromByteString str) 583 >>= go_fast (BS.unsafeDrop sz bs) 584 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenUtf8ByteArray 585 (BS.unsafeDrop sz bs) k len 586 587go_fast !bs da@(ConsumeBool k) = 588 case tryConsumeBool (BS.unsafeHead bs) of 589 DecodeFailure -> go_fast_end bs da 590 DecodedToken sz b -> k b >>= go_fast (BS.unsafeDrop sz bs) 591 592go_fast !bs da@(ConsumeSimple k) = 593 case tryConsumeSimple (BS.unsafeHead bs) bs of 594 DecodeFailure -> go_fast_end bs da 595 DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs) 596 597go_fast !bs da@(ConsumeIntegerCanonical k) = 598 case tryConsumeInteger (BS.unsafeHead bs) bs of 599 DecodedToken sz (BigIntToken True n) -> k n >>= go_fast (BS.unsafeDrop sz bs) 600 _ -> go_fast_end bs da 601 602 603go_fast !bs da@(ConsumeFloat16Canonical k) = 604 case tryConsumeFloat (BS.unsafeHead bs) bs of 605 DecodeFailure -> go_fast_end bs da 606 DecodedToken sz f@(F# f#) 607 | isFloat16Canonical sz bs f -> k f# >>= go_fast (BS.unsafeDrop sz bs) 608 | otherwise -> go_fast_end bs da 609 610go_fast !bs da@(ConsumeFloatCanonical k) = 611 case tryConsumeFloat (BS.unsafeHead bs) bs of 612 DecodeFailure -> go_fast_end bs da 613 DecodedToken sz f@(F# f#) 614 | isFloatCanonical sz bs f -> k f# >>= go_fast (BS.unsafeDrop sz bs) 615 | otherwise -> go_fast_end bs da 616 617go_fast !bs da@(ConsumeDoubleCanonical k) = 618 case tryConsumeDouble (BS.unsafeHead bs) bs of 619 DecodeFailure -> go_fast_end bs da 620 DecodedToken sz f@(D# f#) 621 | isDoubleCanonical sz bs f -> k f# >>= go_fast (BS.unsafeDrop sz bs) 622 | otherwise -> go_fast_end bs da 623 624go_fast !bs da@(ConsumeBytesCanonical k) = 625 case tryConsumeBytes (BS.unsafeHead bs) bs of 626 DecodedToken sz (Fits True bstr) -> k bstr >>= go_fast (BS.unsafeDrop sz bs) 627 DecodedToken sz (TooLong True len) -> 628 return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) k len 629 _ -> go_fast_end bs da 630 631go_fast !bs da@(ConsumeByteArrayCanonical k) = 632 case tryConsumeBytes (BS.unsafeHead bs) bs of 633 DecodedToken sz (Fits True str) -> 634 k (BA.fromByteString str) >>= go_fast (BS.unsafeDrop sz bs) 635 DecodedToken sz (TooLong True len) -> 636 return $! SlowConsumeTokenByteArray (BS.unsafeDrop sz bs) k len 637 _ -> go_fast_end bs da 638 639go_fast !bs da@(ConsumeStringCanonical k) = 640 case tryConsumeString (BS.unsafeHead bs) bs of 641 DecodedToken sz (Fits True str) -> case T.decodeUtf8' str of 642 Right t -> k t >>= go_fast (BS.unsafeDrop sz bs) 643 Left _e -> return $! SlowFail bs "invalid UTF8" 644 DecodedToken sz (TooLong True len) -> 645 return $! SlowConsumeTokenString (BS.unsafeDrop sz bs) k len 646 _ -> go_fast_end bs da 647 648go_fast !bs da@(ConsumeUtf8ByteArrayCanonical k) = 649 case tryConsumeString (BS.unsafeHead bs) bs of 650 DecodedToken sz (Fits True str) -> 651 k (BA.fromByteString str) >>= go_fast (BS.unsafeDrop sz bs) 652 DecodedToken sz (TooLong True len) -> 653 return $! SlowConsumeTokenUtf8ByteArray (BS.unsafeDrop sz bs) k len 654 _ -> go_fast_end bs da 655 656go_fast !bs da@(ConsumeSimpleCanonical k) = 657 case tryConsumeSimple (BS.unsafeHead bs) bs of 658 DecodeFailure -> go_fast_end bs da 659 DecodedToken sz (W# w#) 660 | isSimpleCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs) 661 | otherwise -> go_fast_end bs da 662 663go_fast !bs da@(ConsumeBytesIndef k) = 664 case tryConsumeBytesIndef (BS.unsafeHead bs) of 665 DecodeFailure -> go_fast_end bs da 666 DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs) 667 668go_fast !bs da@(ConsumeStringIndef k) = 669 case tryConsumeStringIndef (BS.unsafeHead bs) of 670 DecodeFailure -> go_fast_end bs da 671 DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs) 672 673go_fast !bs da@(ConsumeListLenIndef k) = 674 case tryConsumeListLenIndef (BS.unsafeHead bs) of 675 DecodeFailure -> go_fast_end bs da 676 DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs) 677 678go_fast !bs da@(ConsumeMapLenIndef k) = 679 case tryConsumeMapLenIndef (BS.unsafeHead bs) of 680 DecodeFailure -> go_fast_end bs da 681 DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs) 682 683go_fast !bs da@(ConsumeNull k) = 684 case tryConsumeNull (BS.unsafeHead bs) of 685 DecodeFailure -> go_fast_end bs da 686 DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs) 687 688go_fast !bs da@(ConsumeListLenOrIndef k) = 689 case tryConsumeListLenOrIndef (BS.unsafeHead bs) bs of 690 DecodeFailure -> go_fast_end bs da 691 DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 692 693go_fast !bs da@(ConsumeMapLenOrIndef k) = 694 case tryConsumeMapLenOrIndef (BS.unsafeHead bs) bs of 695 DecodeFailure -> go_fast_end bs da 696 DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs) 697 698go_fast !bs (ConsumeBreakOr k) = 699 case tryConsumeBreakOr (BS.unsafeHead bs) of 700 DecodeFailure -> k False >>= go_fast bs 701 DecodedToken sz _ -> k True >>= go_fast (BS.unsafeDrop sz bs) 702 703go_fast !bs (PeekTokenType k) = 704 let !hdr = BS.unsafeHead bs 705 !tkty = decodeTokenTypeTable `A.unsafeAt` word8ToInt hdr 706 in k tkty >>= go_fast bs 707 708go_fast !bs (PeekAvailable k) = k (case BS.length bs of I# len# -> len#) >>= go_fast bs 709 710go_fast !bs da@PeekByteOffset{} = go_fast_end bs da 711go_fast !bs da@D.Fail{} = go_fast_end bs da 712go_fast !bs da@D.Done{} = go_fast_end bs da 713 714 715-- This variant of the fast path has to do a few more checks because we're 716-- near the end of the chunk. The guarantee we provide here is that we will 717-- decode any tokens where the whole token fits within the input buffer. So 718-- if we return with input buffer space still unconsumed (and we're not done 719-- or failed) then there's one remaining token that spans the end of the 720-- input chunk (the slow path fixup code relies on this guarantee). 721-- 722go_fast_end :: ByteString -> DecodeAction s a -> ST s (SlowPath s a) 723 724-- these three cases don't need any input 725 726go_fast_end !bs (D.Fail msg) = return $! SlowFail bs msg 727go_fast_end !bs (D.Done x) = return $! FastDone bs x 728go_fast_end !bs (PeekAvailable k) = k (case BS.length bs of I# len# -> len#) >>= go_fast_end bs 729 730go_fast_end !bs (PeekByteOffset k) = return $! SlowPeekByteOffset bs k 731 732-- the next two cases only need the 1 byte token header 733go_fast_end !bs da | BS.null bs = return $! SlowDecodeAction bs da 734 735go_fast_end !bs (ConsumeBreakOr k) = 736 case tryConsumeBreakOr (BS.unsafeHead bs) of 737 DecodeFailure -> k False >>= go_fast_end bs 738 DecodedToken sz _ -> k True >>= go_fast_end (BS.unsafeDrop sz bs) 739 740go_fast_end !bs (PeekTokenType k) = 741 let !hdr = BS.unsafeHead bs 742 !tkty = decodeTokenTypeTable `A.unsafeAt` word8ToInt hdr 743 in k tkty >>= go_fast_end bs 744 745-- all the remaining cases have to decode the current token 746 747go_fast_end !bs da 748 | let !hdr = BS.unsafeHead bs 749 , BS.length bs < tokenSize hdr 750 = return $! SlowDecodeAction bs da 751 752go_fast_end !bs (ConsumeWord k) = 753 case tryConsumeWord (BS.unsafeHead bs) bs of 754 DecodeFailure -> return $! SlowFail bs "expected word" 755 DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 756 757go_fast_end !bs (ConsumeWord8 k) = 758 case tryConsumeWord (BS.unsafeHead bs) bs of 759 DecodeFailure -> return $! SlowFail bs "expected word8" 760 DecodedToken sz (W# w#) -> 761 case gtWord# w# 0xff## of 762 0# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 763 _ -> return $! SlowFail bs "expected word8" 764 765go_fast_end !bs (ConsumeWord16 k) = 766 case tryConsumeWord (BS.unsafeHead bs) bs of 767 DecodeFailure -> return $! SlowFail bs "expected word16" 768 DecodedToken sz (W# w#) -> 769 case gtWord# w# 0xffff## of 770 0# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 771 _ -> return $! SlowFail bs "expected word16" 772 773go_fast_end !bs (ConsumeWord32 k) = 774 case tryConsumeWord (BS.unsafeHead bs) bs of 775 DecodeFailure -> return $! SlowFail bs "expected word32" 776 DecodedToken sz (W# w#) -> 777#if defined(ARCH_32bit) 778 k w# >>= go_fast_end (BS.unsafeDrop sz bs) 779#else 780 case gtWord# w# 0xffffffff## of 781 0# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 782 _ -> return $! SlowFail bs "expected word32" 783#endif 784 785go_fast_end !bs (ConsumeNegWord k) = 786 case tryConsumeNegWord (BS.unsafeHead bs) bs of 787 DecodeFailure -> return $! SlowFail bs "expected negative int" 788 DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 789 790go_fast_end !bs (ConsumeInt k) = 791 case tryConsumeInt (BS.unsafeHead bs) bs of 792 DecodeFailure -> return $! SlowFail bs "expected int" 793 DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 794 795go_fast_end !bs (ConsumeInt8 k) = 796 case tryConsumeInt (BS.unsafeHead bs) bs of 797 DecodeFailure -> return $! SlowFail bs "expected int8" 798 DecodedToken sz (I# n#) -> 799 case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of 800 0# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 801 _ -> return $! SlowFail bs "expected int8" 802 803go_fast_end !bs (ConsumeInt16 k) = 804 case tryConsumeInt (BS.unsafeHead bs) bs of 805 DecodeFailure -> return $! SlowFail bs "expected int16" 806 DecodedToken sz (I# n#) -> 807 case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of 808 0# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 809 _ -> return $! SlowFail bs "expected int16" 810 811go_fast_end !bs (ConsumeInt32 k) = 812 case tryConsumeInt (BS.unsafeHead bs) bs of 813 DecodeFailure -> return $! SlowFail bs "expected int32" 814 DecodedToken sz (I# n#) -> 815#if defined(ARCH_32bit) 816 k n# >>= go_fast_end (BS.unsafeDrop sz bs) 817#else 818 case (n# ># 0x7fffffff#) `orI#` (n# <# -0x80000000#) of 819 0# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 820 _ -> return $! SlowFail bs "expected int32" 821#endif 822 823go_fast_end !bs (ConsumeListLen k) = 824 case tryConsumeListLen (BS.unsafeHead bs) bs of 825 DecodeFailure -> return $! SlowFail bs "expected list len" 826 DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 827 828go_fast_end !bs (ConsumeMapLen k) = 829 case tryConsumeMapLen (BS.unsafeHead bs) bs of 830 DecodeFailure -> return $! SlowFail bs "expected map len" 831 DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 832 833go_fast_end !bs (ConsumeTag k) = 834 case tryConsumeTag (BS.unsafeHead bs) bs of 835 DecodeFailure -> return $! SlowFail bs "expected tag" 836 DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 837 838go_fast_end !bs (ConsumeWordCanonical k) = 839 case tryConsumeWord (BS.unsafeHead bs) bs of 840 DecodeFailure -> return $! SlowFail bs "expected word" 841 DecodedToken sz (W# w#) 842 | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 843 | otherwise -> return $! SlowFail bs "non-canonical word" 844 845go_fast_end !bs (ConsumeWord8Canonical k) = 846 case tryConsumeWord (BS.unsafeHead bs) bs of 847 DecodeFailure -> return $! SlowFail bs "expected word8" 848 DecodedToken sz (W# w#) -> case gtWord# w# 0xff## of 849 0# | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 850 | otherwise -> return $! SlowFail bs "non-canonical word8" 851 _ -> return $! SlowFail bs "expected word8" 852 853go_fast_end !bs (ConsumeWord16Canonical k) = 854 case tryConsumeWord (BS.unsafeHead bs) bs of 855 DecodeFailure -> return $! SlowFail bs "expected word16" 856 DecodedToken sz (W# w#) -> case gtWord# w# 0xffff## of 857 0# | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 858 | otherwise -> return $! SlowFail bs "non-canonical word16" 859 _ -> return $! SlowFail bs "expected word16" 860 861go_fast_end !bs (ConsumeWord32Canonical k) = 862 case tryConsumeWord (BS.unsafeHead bs) bs of 863 DecodeFailure -> return $! SlowFail bs "expected word32" 864 DecodedToken sz (W# w#) -> case w_out_of_range w# of 865 0# | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 866 | otherwise -> return $! SlowFail bs "non-canonical word32" 867 _ -> return $! SlowFail bs "expected word32" 868 where 869 w_out_of_range :: Word# -> Int# 870 w_out_of_range _w# = 871#if defined(ARCH_32bit) 872 0# 873#else 874 gtWord# _w# 0xffffffff## 875#endif 876 877go_fast_end !bs (ConsumeNegWordCanonical k) = 878 case tryConsumeNegWord (BS.unsafeHead bs) bs of 879 DecodeFailure -> return $! SlowFail bs "expected negative int" 880 DecodedToken sz (W# w#) 881 | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 882 | otherwise -> return $! SlowFail bs "non-canonical negative int" 883 884go_fast_end !bs (ConsumeIntCanonical k) = 885 case tryConsumeInt (BS.unsafeHead bs) bs of 886 DecodeFailure -> return $! SlowFail bs "expected int" 887 DecodedToken sz (I# n#) 888 | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 889 | otherwise -> return $! SlowFail bs "non-canonical int" 890 891go_fast_end !bs (ConsumeInt8Canonical k) = 892 case tryConsumeInt (BS.unsafeHead bs) bs of 893 DecodeFailure -> return $! SlowFail bs "expected int8" 894 DecodedToken sz (I# n#) -> 895 case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of 896 0# | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 897 | otherwise -> return $! SlowFail bs "non-canonical int8" 898 _ -> return $! SlowFail bs "expected int8" 899 900go_fast_end !bs (ConsumeInt16Canonical k) = 901 case tryConsumeInt (BS.unsafeHead bs) bs of 902 DecodeFailure -> return $! SlowFail bs "expected int16" 903 DecodedToken sz (I# n#) -> 904 case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of 905 0# | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 906 | otherwise -> return $! SlowFail bs "non-canonical int16" 907 _ -> return $! SlowFail bs "expected int16" 908 909go_fast_end !bs (ConsumeInt32Canonical k) = 910 case tryConsumeInt (BS.unsafeHead bs) bs of 911 DecodeFailure -> return $! SlowFail bs "expected int32" 912 DecodedToken sz (I# n#) -> 913 case n_out_of_range n# of 914 0# | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 915 | otherwise -> return $! SlowFail bs "non-canonical int32" 916 _ -> return $! SlowFail bs "expected int32" 917 where 918 n_out_of_range :: Int# -> Int# 919 n_out_of_range _n# = 920#if defined(ARCH_32bit) 921 0# 922#else 923 (_n# ># 0x7fffffff#) `orI#` (_n# <# -0x80000000#) 924#endif 925 926go_fast_end !bs (ConsumeListLenCanonical k) = 927 case tryConsumeListLen (BS.unsafeHead bs) bs of 928 DecodeFailure -> return $! SlowFail bs "expected list len" 929 DecodedToken sz (I# n#) 930 -- List length can't be negative, cast it to Word#. 931 | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 932 | otherwise -> return $! SlowFail bs "non-canonical list len" 933 934go_fast_end !bs (ConsumeMapLenCanonical k) = 935 case tryConsumeMapLen (BS.unsafeHead bs) bs of 936 DecodeFailure -> return $! SlowFail bs "expected map len" 937 DecodedToken sz (I# n#) 938 -- Map length can't be negative, cast it to Word#. 939 | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 940 | otherwise -> return $! SlowFail bs "non-canonical map len" 941 942go_fast_end !bs (ConsumeTagCanonical k) = 943 case tryConsumeTag (BS.unsafeHead bs) bs of 944 DecodeFailure -> return $! SlowFail bs "expected tag" 945 DecodedToken sz (W# w#) 946 | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 947 | otherwise -> return $! SlowFail bs "non-canonical tag" 948 949#if defined(ARCH_32bit) 950go_fast_end !bs (ConsumeWord64 k) = 951 case tryConsumeWord64 (BS.unsafeHead bs) bs of 952 DecodeFailure -> return $! SlowFail bs "expected word64" 953 DecodedToken sz (W64# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 954 955go_fast_end !bs (ConsumeNegWord64 k) = 956 case tryConsumeNegWord64 (BS.unsafeHead bs) bs of 957 DecodeFailure -> return $! SlowFail bs "expected negative int" 958 DecodedToken sz (W64# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 959 960go_fast_end !bs (ConsumeInt64 k) = 961 case tryConsumeInt64 (BS.unsafeHead bs) bs of 962 DecodeFailure -> return $! SlowFail bs "expected int64" 963 DecodedToken sz (I64# i#) -> k i# >>= go_fast_end (BS.unsafeDrop sz bs) 964 965go_fast_end !bs (ConsumeListLen64 k) = 966 case tryConsumeListLen64 (BS.unsafeHead bs) bs of 967 DecodeFailure -> return $! SlowFail bs "expected list len 64" 968 DecodedToken sz (I64# i#) -> k i# >>= go_fast_end (BS.unsafeDrop sz bs) 969 970go_fast_end !bs (ConsumeMapLen64 k) = 971 case tryConsumeMapLen64 (BS.unsafeHead bs) bs of 972 DecodeFailure -> return $! SlowFail bs "expected map len 64" 973 DecodedToken sz (I64# i#) -> k i# >>= go_fast_end (BS.unsafeDrop sz bs) 974 975go_fast_end !bs (ConsumeTag64 k) = 976 case tryConsumeTag64 (BS.unsafeHead bs) bs of 977 DecodeFailure -> return $! SlowFail bs "expected tag64" 978 DecodedToken sz (W64# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 979 980go_fast_end !bs (ConsumeWord64Canonical k) = 981 case tryConsumeWord64 (BS.unsafeHead bs) bs of 982 DecodeFailure -> return $! SlowFail bs "expected word64" 983 DecodedToken sz (W64# w#) 984 | isWord64Canonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 985 | otherwise -> return $! SlowFail bs "non-canonical word64" 986 987go_fast_end !bs (ConsumeNegWord64Canonical k) = 988 case tryConsumeNegWord64 (BS.unsafeHead bs) bs of 989 DecodeFailure -> return $! SlowFail bs "expected negative int" 990 DecodedToken sz (W64# w#) 991 | isWord64Canonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 992 | otherwise -> return $! SlowFail bs "non-canonical negative int" 993 994go_fast_end !bs (ConsumeInt64Canonical k) = 995 case tryConsumeInt64 (BS.unsafeHead bs) bs of 996 DecodeFailure -> return $! SlowFail bs "expected int64" 997 DecodedToken sz (I64# i#) 998 | isInt64Canonical sz i# -> k i# >>= go_fast_end (BS.unsafeDrop sz bs) 999 | otherwise -> return $! SlowFail bs "non-canonical int64" 1000 1001go_fast_end !bs (ConsumeListLen64Canonical k) = 1002 case tryConsumeListLen64 (BS.unsafeHead bs) bs of 1003 DecodeFailure -> return $! SlowFail bs "expected list len 64" 1004 DecodedToken sz (I64# i#) 1005 -- List length can't be negative, cast it to Word64#. 1006 | isWord64Canonical sz (int64ToWord64# i#) -> 1007 k i# >>= go_fast_end (BS.unsafeDrop sz bs) 1008 | otherwise -> 1009 return $! SlowFail bs "non-canonical list len 64" 1010 1011go_fast_end !bs (ConsumeMapLen64Canonical k) = 1012 case tryConsumeMapLen64 (BS.unsafeHead bs) bs of 1013 DecodeFailure -> return $! SlowFail bs "expected map len 64" 1014 DecodedToken sz (I64# i#) 1015 -- Map length can't be negative, cast it to Word64#. 1016 | isWord64Canonical sz (int64ToWord64# i#) -> 1017 k i# >>= go_fast_end (BS.unsafeDrop sz bs) 1018 | otherwise -> 1019 return $! SlowFail bs "non-canonical map len 64" 1020 1021go_fast_end !bs (ConsumeTag64Canonical k) = 1022 case tryConsumeTag64 (BS.unsafeHead bs) bs of 1023 DecodeFailure -> return $! SlowFail bs "expected tag64" 1024 DecodedToken sz (W64# w#) 1025 | isWord64Canonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 1026 | otherwise -> return $! SlowFail bs "non-canonical tag64" 1027 1028#endif 1029 1030go_fast_end !bs (ConsumeInteger k) = 1031 case tryConsumeInteger (BS.unsafeHead bs) bs of 1032 DecodeFailure -> return $! SlowFail bs "expected integer" 1033 DecodedToken sz (BigIntToken _ n) -> k n >>= go_fast_end (BS.unsafeDrop sz bs) 1034 DecodedToken sz (BigUIntNeedBody _ len) -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) (adjustContBigUIntNeedBody k) len 1035 DecodedToken sz (BigNIntNeedBody _ len) -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) (adjustContBigNIntNeedBody k) len 1036 DecodedToken sz BigUIntNeedHeader -> return $! SlowDecodeAction (BS.unsafeDrop sz bs) (adjustContBigUIntNeedHeader k) 1037 DecodedToken sz BigNIntNeedHeader -> return $! SlowDecodeAction (BS.unsafeDrop sz bs) (adjustContBigNIntNeedHeader k) 1038 1039go_fast_end !bs (ConsumeFloat k) = 1040 case tryConsumeFloat (BS.unsafeHead bs) bs of 1041 DecodeFailure -> return $! SlowFail bs "expected float" 1042 DecodedToken sz (F# f#) -> k f# >>= go_fast_end (BS.unsafeDrop sz bs) 1043 1044go_fast_end !bs (ConsumeDouble k) = 1045 case tryConsumeDouble (BS.unsafeHead bs) bs of 1046 DecodeFailure -> return $! SlowFail bs "expected double" 1047 DecodedToken sz (D# f#) -> k f# >>= go_fast_end (BS.unsafeDrop sz bs) 1048 1049go_fast_end !bs (ConsumeBytes k) = 1050 case tryConsumeBytes (BS.unsafeHead bs) bs of 1051 DecodeFailure -> return $! SlowFail bs "expected bytes" 1052 DecodedToken sz (Fits _ bstr) -> k bstr >>= go_fast_end (BS.unsafeDrop sz bs) 1053 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenBytes 1054 (BS.unsafeDrop sz bs) k len 1055 1056go_fast_end !bs (ConsumeByteArray k) = 1057 case tryConsumeBytes (BS.unsafeHead bs) bs of 1058 DecodeFailure -> return $! SlowFail bs "expected string" 1059 DecodedToken sz (Fits _ str) -> (k $! BA.fromByteString str) 1060 >>= go_fast_end (BS.unsafeDrop sz bs) 1061 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenByteArray 1062 (BS.unsafeDrop sz bs) k len 1063 1064go_fast_end !bs (ConsumeString k) = 1065 case tryConsumeString (BS.unsafeHead bs) bs of 1066 DecodeFailure -> return $! SlowFail bs "expected string" 1067 DecodedToken sz (Fits _ str) -> case T.decodeUtf8' str of 1068 Right t -> k t >>= go_fast_end (BS.unsafeDrop sz bs) 1069 Left _e -> return $! SlowFail bs "invalid UTF8" 1070 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenString 1071 (BS.unsafeDrop sz bs) k len 1072 1073go_fast_end !bs (ConsumeUtf8ByteArray k) = 1074 case tryConsumeString (BS.unsafeHead bs) bs of 1075 DecodeFailure -> return $! SlowFail bs "expected string" 1076 DecodedToken sz (Fits _ str) -> (k $! BA.fromByteString str) 1077 >>= go_fast_end (BS.unsafeDrop sz bs) 1078 DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenUtf8ByteArray 1079 (BS.unsafeDrop sz bs) k len 1080 1081go_fast_end !bs (ConsumeBool k) = 1082 case tryConsumeBool (BS.unsafeHead bs) of 1083 DecodeFailure -> return $! SlowFail bs "expected bool" 1084 DecodedToken sz b -> k b >>= go_fast_end (BS.unsafeDrop sz bs) 1085 1086go_fast_end !bs (ConsumeSimple k) = 1087 case tryConsumeSimple (BS.unsafeHead bs) bs of 1088 DecodeFailure -> return $! SlowFail bs "expected simple" 1089 DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 1090 1091go_fast_end !bs (ConsumeIntegerCanonical k) = 1092 case tryConsumeInteger (BS.unsafeHead bs) bs of 1093 DecodeFailure -> return $! SlowFail bs "expected integer" 1094 DecodedToken sz (BigIntToken True n) -> k n >>= go_fast_end (BS.unsafeDrop sz bs) 1095 DecodedToken sz (BigUIntNeedBody True len) -> return $! SlowConsumeTokenBytes 1096 (BS.unsafeDrop sz bs) (adjustContCanonicalBigUIntNeedBody k) len 1097 DecodedToken sz (BigNIntNeedBody True len) -> return $! SlowConsumeTokenBytes 1098 (BS.unsafeDrop sz bs) (adjustContCanonicalBigNIntNeedBody k) len 1099 DecodedToken sz BigUIntNeedHeader -> return $! SlowDecodeAction 1100 (BS.unsafeDrop sz bs) (adjustContCanonicalBigUIntNeedHeader k) 1101 DecodedToken sz BigNIntNeedHeader -> return $! SlowDecodeAction 1102 (BS.unsafeDrop sz bs) (adjustContCanonicalBigNIntNeedHeader k) 1103 _ -> return $! SlowFail bs "non-canonical integer" 1104 1105go_fast_end !bs (ConsumeFloat16Canonical k) = 1106 case tryConsumeFloat (BS.unsafeHead bs) bs of 1107 DecodeFailure -> return $! SlowFail bs "expected float" 1108 DecodedToken sz f@(F# f#) 1109 | isFloat16Canonical sz bs f -> k f# >>= go_fast_end (BS.unsafeDrop sz bs) 1110 | otherwise -> return $! SlowFail bs "non-canonical float16" 1111 1112go_fast_end !bs (ConsumeFloatCanonical k) = 1113 case tryConsumeFloat (BS.unsafeHead bs) bs of 1114 DecodeFailure -> return $! SlowFail bs "expected float" 1115 DecodedToken sz f@(F# f#) 1116 | isFloatCanonical sz bs f -> k f# >>= go_fast_end (BS.unsafeDrop sz bs) 1117 | otherwise -> return $! SlowFail bs "non-canonical float" 1118 1119go_fast_end !bs (ConsumeDoubleCanonical k) = 1120 case tryConsumeDouble (BS.unsafeHead bs) bs of 1121 DecodeFailure -> return $! SlowFail bs "expected double" 1122 DecodedToken sz f@(D# f#) 1123 | isDoubleCanonical sz bs f -> k f# >>= go_fast_end (BS.unsafeDrop sz bs) 1124 | otherwise -> return $! SlowFail bs "non-canonical double" 1125 1126go_fast_end !bs (ConsumeBytesCanonical k) = 1127 case tryConsumeBytes (BS.unsafeHead bs) bs of 1128 DecodeFailure -> return $! SlowFail bs "expected bytes" 1129 DecodedToken sz token -> case token of 1130 Fits True bstr -> k bstr >>= go_fast_end (BS.unsafeDrop sz bs) 1131 TooLong True len -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) k len 1132 _ -> return $! SlowFail bs "non-canonical length prefix" 1133 1134go_fast_end !bs (ConsumeByteArrayCanonical k) = 1135 case tryConsumeBytes (BS.unsafeHead bs) bs of 1136 DecodeFailure -> return $! SlowFail bs "expected string" 1137 DecodedToken sz token -> case token of 1138 Fits True str -> 1139 (k $! BA.fromByteString str) >>= go_fast_end (BS.unsafeDrop sz bs) 1140 TooLong True len -> 1141 return $! SlowConsumeTokenByteArray (BS.unsafeDrop sz bs) k len 1142 _ -> return $! SlowFail bs "non-canonical length prefix" 1143 1144go_fast_end !bs (ConsumeStringCanonical k) = 1145 case tryConsumeString (BS.unsafeHead bs) bs of 1146 DecodeFailure -> return $! SlowFail bs "expected string" 1147 DecodedToken sz token -> case token of 1148 Fits True str -> case T.decodeUtf8' str of 1149 Right t -> k t >>= go_fast_end (BS.unsafeDrop sz bs) 1150 Left _e -> return $! SlowFail bs "invalid UTF8" 1151 TooLong True len -> return $! SlowConsumeTokenString (BS.unsafeDrop sz bs) k len 1152 _ -> return $! SlowFail bs "non-canonical length prefix" 1153 1154go_fast_end !bs (ConsumeUtf8ByteArrayCanonical k) = 1155 case tryConsumeString (BS.unsafeHead bs) bs of 1156 DecodeFailure -> return $! SlowFail bs "expected string" 1157 DecodedToken sz token -> case token of 1158 Fits True str -> 1159 (k $! BA.fromByteString str) >>= go_fast_end (BS.unsafeDrop sz bs) 1160 TooLong True len -> 1161 return $! SlowConsumeTokenUtf8ByteArray (BS.unsafeDrop sz bs) k len 1162 _ -> 1163 return $! SlowFail bs "non-canonical length prefix" 1164 1165go_fast_end !bs (ConsumeSimpleCanonical k) = 1166 case tryConsumeSimple (BS.unsafeHead bs) bs of 1167 DecodeFailure -> return $! SlowFail bs "expected simple" 1168 DecodedToken sz (W# w#) 1169 | isSimpleCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs) 1170 | otherwise -> return $! SlowFail bs "non-canonical simple" 1171 1172go_fast_end !bs (ConsumeBytesIndef k) = 1173 case tryConsumeBytesIndef (BS.unsafeHead bs) of 1174 DecodeFailure -> return $! SlowFail bs "expected bytes start" 1175 DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs) 1176 1177go_fast_end !bs (ConsumeStringIndef k) = 1178 case tryConsumeStringIndef (BS.unsafeHead bs) of 1179 DecodeFailure -> return $! SlowFail bs "expected string start" 1180 DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs) 1181 1182go_fast_end !bs (ConsumeListLenIndef k) = 1183 case tryConsumeListLenIndef (BS.unsafeHead bs) of 1184 DecodeFailure -> return $! SlowFail bs "expected list start" 1185 DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs) 1186 1187go_fast_end !bs (ConsumeMapLenIndef k) = 1188 case tryConsumeMapLenIndef (BS.unsafeHead bs) of 1189 DecodeFailure -> return $! SlowFail bs "expected map start" 1190 DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs) 1191 1192go_fast_end !bs (ConsumeNull k) = 1193 case tryConsumeNull (BS.unsafeHead bs) of 1194 DecodeFailure -> return $! SlowFail bs "expected null" 1195 DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs) 1196 1197go_fast_end !bs (ConsumeListLenOrIndef k) = 1198 case tryConsumeListLenOrIndef (BS.unsafeHead bs) bs of 1199 DecodeFailure -> return $! SlowFail bs "expected list len or indef" 1200 DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 1201 1202go_fast_end !bs (ConsumeMapLenOrIndef k) = 1203 case tryConsumeMapLenOrIndef (BS.unsafeHead bs) bs of 1204 DecodeFailure -> return $! SlowFail bs "expected map len or indef" 1205 DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs) 1206 1207 1208-- The slow path starts off by running the fast path on the current chunk 1209-- then looking at where it finished, fixing up the chunk boundary issues, 1210-- getting more input and going around again. 1211-- 1212-- The offset here is the offset after of all data consumed so far, 1213-- so not including the current chunk. 1214-- 1215go_slow :: DecodeAction s a -> ByteString -> ByteOffset 1216 -> IncrementalDecoder s (ByteString, ByteOffset, a) 1217go_slow da bs !offset = do 1218 slowpath <- lift $ go_fast bs da 1219 case slowpath of 1220 FastDone bs' x -> return (bs', offset', x) 1221 where 1222 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1223 1224 SlowConsumeTokenBytes bs' k len -> do 1225 (bstr, bs'') <- getTokenVarLen len bs' offset' 1226 lift (k bstr) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len) 1227 where 1228 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1229 1230 SlowConsumeTokenByteArray bs' k len -> do 1231 (bstr, bs'') <- getTokenVarLen len bs' offset' 1232 let !str = BA.fromByteString bstr 1233 lift (k str) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len) 1234 where 1235 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1236 1237 SlowConsumeTokenString bs' k len -> do 1238 (bstr, bs'') <- getTokenVarLen len bs' offset' 1239 case T.decodeUtf8' bstr of 1240 Right str -> lift (k str) >>= \daz -> 1241 go_slow daz bs'' (offset' + intToInt64 len) 1242 Left _e -> decodeFail bs' offset' "invalid UTF8" 1243 where 1244 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1245 1246 SlowConsumeTokenUtf8ByteArray bs' k len -> do 1247 (bstr, bs'') <- getTokenVarLen len bs' offset' 1248 let !str = BA.fromByteString bstr 1249 lift (k str) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len) 1250 where 1251 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1252 1253 -- we didn't have enough input in the buffer 1254 SlowDecodeAction bs' da' | BS.null bs' -> do 1255 -- in this case we're exactly out of input 1256 -- so we can get more input and carry on 1257 mbs <- needChunk 1258 case mbs of 1259 Nothing -> decodeFail bs' offset' "end of input" 1260 Just bs'' -> go_slow da' bs'' offset' 1261 where 1262 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1263 1264 SlowDecodeAction bs' da' -> 1265 -- of course we should only end up here when we really are out of 1266 -- input, otherwise go_fast_end could have continued 1267 assert (BS.length bs' < tokenSize (BS.head bs')) $ 1268 go_slow_fixup da' bs' offset' 1269 where 1270 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1271 1272 SlowPeekByteOffset bs' k -> 1273 lift (k off#) >>= \daz -> go_slow daz bs' offset' 1274 where 1275 !offset'@(I64# off#) = offset + intToInt64 (BS.length bs - BS.length bs') 1276 1277 SlowFail bs' msg -> decodeFail bs' offset' msg 1278 where 1279 !offset' = offset + intToInt64 (BS.length bs - BS.length bs') 1280 1281-- The complicated case is when a token spans a chunk boundary. 1282-- 1283-- Our goal is to get enough input so that go_fast_end can consume exactly one 1284-- token without need for further fixups. 1285-- 1286go_slow_fixup :: DecodeAction s a -> ByteString -> ByteOffset 1287 -> IncrementalDecoder s (ByteString, ByteOffset, a) 1288go_slow_fixup da !bs !offset = do 1289 let !hdr = BS.head bs 1290 !sz = tokenSize hdr 1291 mbs <- needChunk 1292 case mbs of 1293 Nothing -> decodeFail bs offset "end of input" 1294 1295 Just bs' 1296 -- We have enough input now, try reading one final token 1297 | BS.length bs + BS.length bs' >= sz 1298 -> go_slow_overlapped da sz bs bs' offset 1299 1300 -- We still don't have enough input, get more 1301 | otherwise 1302 -> go_slow_fixup da (bs <> bs') offset 1303 1304-- We've now got more input, but we have one token that spanned the old and 1305-- new input buffers, so we have to decode that one before carrying on 1306go_slow_overlapped :: DecodeAction s a -> Int -> ByteString -> ByteString 1307 -> ByteOffset 1308 -> IncrementalDecoder s (ByteString, ByteOffset, a) 1309go_slow_overlapped da sz bs_cur bs_next !offset = 1310 1311 -- we have: 1312 -- sz the size of the pending input token 1313 -- bs_cur the tail end of the previous input buffer 1314 -- bs_next the next input chunk 1315 1316 -- we know the old buffer is too small, but the combo is enough 1317 assert (BS.length bs_cur < sz) $ 1318 assert (BS.length bs_cur + BS.length bs_next >= sz) $ 1319 1320 -- we make: 1321 -- bs_tok a buffer containing only the pending input token 1322 -- bs' the tail of the next input chunk, 1323 -- which will become the next input buffer 1324 1325 let bs_tok = bs_cur <> BS.unsafeTake (sz - BS.length bs_cur) bs_next 1326 bs' = BS.unsafeDrop (sz - BS.length bs_cur) bs_next 1327 offset' = offset + intToInt64 sz in 1328 1329 -- so the token chunk should be exactly the right size 1330 assert (BS.length bs_tok == sz) $ 1331 -- and overall we shouldn't loose any input 1332 assert (BS.length bs_cur + BS.length bs_next == sz + BS.length bs') $ do 1333 1334 -- so now we can run the fast path to consume just this one token 1335 slowpath <- lift $ go_fast_end bs_tok da 1336 case slowpath of 1337 1338 -- typically we'll fall out of the fast path having 1339 -- consumed exactly one token, now with no trailing data 1340 SlowDecodeAction bs_empty da' -> 1341 assert (BS.null bs_empty) $ 1342 go_slow da' bs' offset' 1343 1344 -- but the other possibilities can happen too 1345 FastDone bs_empty x -> 1346 assert (BS.null bs_empty) $ 1347 return (bs', offset', x) 1348 1349 SlowConsumeTokenBytes bs_empty k len -> 1350 assert (BS.null bs_empty) $ do 1351 (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len 1352 lift (k bstr) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len) 1353 1354 SlowConsumeTokenByteArray bs_empty k len -> 1355 assert (BS.null bs_empty) $ do 1356 (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len 1357 let !ba = BA.fromByteString bstr 1358 lift (k ba) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len) 1359 1360 SlowConsumeTokenString bs_empty k len -> 1361 assert (BS.null bs_empty) $ do 1362 (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len 1363 case T.decodeUtf8' bstr of 1364 Right str -> lift (k str) >>= \daz -> 1365 go_slow daz bs'' (offset' + intToInt64 len) 1366 Left _e -> decodeFail bs' offset' "invalid UTF8" 1367 1368 SlowConsumeTokenUtf8ByteArray bs_empty k len -> 1369 assert (BS.null bs_empty) $ do 1370 (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len 1371 let !ba = BA.fromByteString bstr 1372 lift (k ba) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len) 1373 1374 SlowPeekByteOffset bs_empty k -> 1375 assert (BS.null bs_empty) $ do 1376 lift (k off#) >>= \daz -> go_slow daz bs' offset' 1377 where 1378 !(I64# off#) = offset' 1379 1380 SlowFail bs_unconsumed msg -> 1381 decodeFail (bs_unconsumed <> bs') offset'' msg 1382 where 1383 !offset'' = offset + intToInt64 (sz - BS.length bs_unconsumed) 1384 where 1385 {-# INLINE getTokenShortOrVarLen #-} 1386 getTokenShortOrVarLen :: BS.ByteString 1387 -> ByteOffset 1388 -> Int 1389 -> IncrementalDecoder s (ByteString, ByteString) 1390 getTokenShortOrVarLen bs' offset' len 1391 | BS.length bs' < len = getTokenVarLen len bs' offset' 1392 | otherwise = let !bstr = BS.take len bs' 1393 !bs'' = BS.drop len bs' 1394 in return (bstr, bs'') 1395 1396 1397-- TODO FIXME: we can do slightly better here. If we're returning a 1398-- lazy string (String, lazy Text, lazy ByteString) then we don't have 1399-- to strictify here and if we're returning a strict string perhaps we 1400-- can still stream the utf8 validation/converstion 1401 1402-- TODO FIXME: also consider sharing or not sharing here, and possibly 1403-- rechunking. 1404 1405getTokenVarLen :: Int -> ByteString -> ByteOffset 1406 -> IncrementalDecoder s (ByteString, ByteString) 1407getTokenVarLen len bs offset = 1408 assert (len > BS.length bs) $ do 1409 mbs <- needChunk 1410 case mbs of 1411 Nothing -> decodeFail BS.empty offset "end of input" 1412 Just bs' 1413 | let n = len - BS.length bs 1414 , BS.length bs' >= n -> 1415 let !tok = bs <> BS.unsafeTake n bs' 1416 in return (tok, BS.drop n bs') 1417 1418 | otherwise -> getTokenVarLenSlow 1419 [bs',bs] 1420 (len - (BS.length bs + BS.length bs')) 1421 offset 1422 1423getTokenVarLenSlow :: [ByteString] -> Int -> ByteOffset 1424 -> IncrementalDecoder s (ByteString, ByteString) 1425getTokenVarLenSlow bss n offset = do 1426 mbs <- needChunk 1427 case mbs of 1428 Nothing -> decodeFail BS.empty offset "end of input" 1429 Just bs 1430 | BS.length bs >= n -> 1431 let !tok = BS.concat (reverse (BS.unsafeTake n bs : bss)) 1432 in return (tok, BS.drop n bs) 1433 | otherwise -> getTokenVarLenSlow (bs:bss) (n - BS.length bs) offset 1434 1435 1436 1437tokenSize :: Word8 -> Int 1438tokenSize hdr = 1439 word8ToInt $ 1440 decodeTableSz `A.unsafeAt` (word8ToInt hdr .&. 0x1f) 1441 1442decodeTableSz :: UArray Word8 Word8 1443decodeTableSz = 1444 array (0, 0x1f) $ 1445 [ (encodeHeader 0 n, 1) | n <- [0..0x1f] ] 1446 ++ [ (encodeHeader 0 n, s) | (n, s) <- zip [24..27] [2,3,5,9] ] 1447 1448decodeTokenTypeTable :: Array Word8 TokenType 1449decodeTokenTypeTable = 1450 array (minBound, maxBound) $ 1451 [ (encodeHeader 0 n, TypeUInt) | n <- [0..26] ] 1452 ++ [ (encodeHeader 0 27, TypeUInt64) 1453 , (encodeHeader 0 31, TypeInvalid) ] 1454 1455 ++ [ (encodeHeader 1 n, TypeNInt) | n <- [0..26] ] 1456 ++ [ (encodeHeader 1 27, TypeNInt64) 1457 , (encodeHeader 1 31, TypeInvalid) ] 1458 1459 ++ [ (encodeHeader 2 n, TypeBytes) | n <- [0..27] ] 1460 ++ [ (encodeHeader 2 31, TypeBytesIndef) ] 1461 1462 ++ [ (encodeHeader 3 n, TypeString) | n <- [0..27] ] 1463 ++ [ (encodeHeader 3 31, TypeStringIndef) ] 1464 1465 ++ [ (encodeHeader 4 n, TypeListLen) | n <- [0..26] ] 1466 ++ [ (encodeHeader 4 27, TypeListLen64) 1467 , (encodeHeader 4 31, TypeListLenIndef) ] 1468 1469 ++ [ (encodeHeader 5 n, TypeMapLen) | n <- [0..26] ] 1470 ++ [ (encodeHeader 5 27, TypeMapLen64) 1471 , (encodeHeader 5 31, TypeMapLenIndef) ] 1472 1473 ++ [ (encodeHeader 6 n, TypeTag) | n <- 0:1:[4..26] ] 1474 ++ [ (encodeHeader 6 2, TypeInteger) 1475 , (encodeHeader 6 3, TypeInteger) 1476 , (encodeHeader 6 27, TypeTag64) 1477 , (encodeHeader 6 31, TypeInvalid) ] 1478 1479 ++ [ (encodeHeader 7 n, TypeSimple) | n <- [0..19] ] 1480 ++ [ (encodeHeader 7 20, TypeBool) 1481 , (encodeHeader 7 21, TypeBool) 1482 , (encodeHeader 7 22, TypeNull) 1483 , (encodeHeader 7 23, TypeSimple) 1484 , (encodeHeader 7 24, TypeSimple) 1485 , (encodeHeader 7 25, TypeFloat16) 1486 , (encodeHeader 7 26, TypeFloat32) 1487 , (encodeHeader 7 27, TypeFloat64) 1488 , (encodeHeader 7 31, TypeBreak) ] 1489 1490 ++ [ (encodeHeader mt n, TypeInvalid) | mt <- [0..7], n <- [28..30] ] 1491 1492encodeHeader :: Word8 -> Word8 -> Word8 1493encodeHeader mt ai = mt `shiftL` 5 .|. ai 1494 1495data DecodedToken a = DecodedToken !Int !a | DecodeFailure 1496 deriving Show 1497-- TODO add classification for DecodeFailure 1498 1499-- | Note that canonicity information is calculated lazily. This way we don't 1500-- need to concern ourselves with two distinct paths, while according to 1501-- benchmarks it doesn't affect performance in the non-canonical case. 1502data LongToken a = Fits Bool {- canonical? -} !a 1503 | TooLong Bool {- canonical? -} !Int 1504 deriving Show 1505 1506-- Canoncal NaN floats: 1507-- 1508-- In these float/double canonical tests we check NaNs are canonical too. 1509-- There are lots of bit values representing NaN, for each of the flat types. 1510-- The rule from CBOR RFC 7049, section 3.9 is that the canonical NaN is the 1511-- CBOR term f97e00 which is the canonical half-float representation. We do 1512-- this by testing for the size being 3 (since tryConsumeFloat/Double only 1513-- return 3 when the header byte is 0xf9) and the 16 bytes being 0x7e00. 1514 1515{-# INLINE isFloat16Canonical #-} 1516isFloat16Canonical :: Int -> BS.ByteString -> Float -> Bool 1517isFloat16Canonical sz bs f 1518 | sz /= 3 = False 1519 | isNaN f = eatTailWord16 bs == 0x7e00 1520 | otherwise = True 1521 1522{-# INLINE isFloatCanonical #-} 1523isFloatCanonical :: Int -> BS.ByteString -> Float -> Bool 1524isFloatCanonical sz bs f 1525 | isNaN f = sz == 3 && eatTailWord16 bs == 0x7e00 1526 | otherwise = sz == 5 1527 1528{-# INLINE isDoubleCanonical #-} 1529isDoubleCanonical :: Int -> BS.ByteString -> Double -> Bool 1530isDoubleCanonical sz bs f 1531 | isNaN f = sz == 3 && eatTailWord16 bs == 0x7e00 1532 | otherwise = sz == 9 1533 1534{-# INLINE isWordCanonical #-} 1535isWordCanonical :: Int -> Word# -> Bool 1536isWordCanonical sz w# 1537 | sz == 2 = isTrue# (w# `gtWord#` 0x17##) 1538 | sz == 3 = isTrue# (w# `gtWord#` 0xff##) 1539 | sz == 5 = isTrue# (w# `gtWord#` 0xffff##) 1540 | sz == 9 = isTrue# (w# `gtWord#` 0xffffffff##) 1541 | otherwise = True 1542 1543{-# INLINE isIntCanonical #-} 1544isIntCanonical :: Int -> Int# -> Bool 1545isIntCanonical sz i# 1546 | isTrue# (i# <# 0#) = isWordCanonical sz (not# w#) 1547 | otherwise = isWordCanonical sz w# 1548 where 1549 w# = int2Word# i# 1550 1551#if defined(ARCH_32bit) 1552{-# INLINE isWord64Canonical #-} 1553isWord64Canonical :: Int -> Word64# -> Bool 1554isWord64Canonical sz w# 1555 | sz == 2 = isTrue# (w# `gtWord64#` wordToWord64# 0x17##) 1556 | sz == 3 = isTrue# (w# `gtWord64#` wordToWord64# 0xff##) 1557 | sz == 5 = isTrue# (w# `gtWord64#` wordToWord64# 0xffff##) 1558 | sz == 9 = isTrue# (w# `gtWord64#` wordToWord64# 0xffffffff##) 1559 | otherwise = True 1560 1561{-# INLINE isInt64Canonical #-} 1562isInt64Canonical :: Int -> Int64# -> Bool 1563isInt64Canonical sz i# 1564 | isTrue# (i# `ltInt64#` intToInt64# 0#) = isWord64Canonical sz (not64# w#) 1565 | otherwise = isWord64Canonical sz w# 1566 where 1567 w# = int64ToWord64# i# 1568#endif 1569 1570{-# INLINE isSimpleCanonical #-} 1571isSimpleCanonical :: Int -> Word# -> Bool 1572isSimpleCanonical 2 w# = isTrue# (w# `gtWord#` 0x17##) 1573isSimpleCanonical _ _ = True -- only size 1 and 2 are possible here 1574 1575 1576-- TODO FIXME: check with 7.10 and file ticket: 1577-- a case analysis against 0x00 .. 0xff :: Word8 turns into a huge chain 1578-- of >= tests. It could use a jump table, or at least it could use a binary 1579-- division. Whereas for Int or Word it does the right thing. 1580 1581{-# INLINE tryConsumeWord #-} 1582tryConsumeWord :: Word8 -> ByteString -> DecodedToken Word 1583tryConsumeWord hdr !bs = case word8ToWord hdr of 1584 -- Positive integers (type 0) 1585 0x00 -> DecodedToken 1 0 1586 0x01 -> DecodedToken 1 1 1587 0x02 -> DecodedToken 1 2 1588 0x03 -> DecodedToken 1 3 1589 0x04 -> DecodedToken 1 4 1590 0x05 -> DecodedToken 1 5 1591 0x06 -> DecodedToken 1 6 1592 0x07 -> DecodedToken 1 7 1593 0x08 -> DecodedToken 1 8 1594 0x09 -> DecodedToken 1 9 1595 0x0a -> DecodedToken 1 10 1596 0x0b -> DecodedToken 1 11 1597 0x0c -> DecodedToken 1 12 1598 0x0d -> DecodedToken 1 13 1599 0x0e -> DecodedToken 1 14 1600 0x0f -> DecodedToken 1 15 1601 0x10 -> DecodedToken 1 16 1602 0x11 -> DecodedToken 1 17 1603 0x12 -> DecodedToken 1 18 1604 0x13 -> DecodedToken 1 19 1605 0x14 -> DecodedToken 1 20 1606 0x15 -> DecodedToken 1 21 1607 0x16 -> DecodedToken 1 22 1608 0x17 -> DecodedToken 1 23 1609 0x18 -> DecodedToken 2 $! word8ToWord (eatTailWord8 bs) 1610 0x19 -> DecodedToken 3 $! word16ToWord (eatTailWord16 bs) 1611 0x1a -> DecodedToken 5 $! word32ToWord (eatTailWord32 bs) 1612#if defined(ARCH_64bit) 1613 0x1b -> DecodedToken 9 $! word64ToWord (eatTailWord64 bs) 1614#else 1615 0x1b -> case word64ToWord (eatTailWord64 bs) of 1616 Just n -> DecodedToken 9 n 1617 Nothing -> DecodeFailure 1618#endif 1619 _ -> DecodeFailure 1620 1621 1622{-# INLINE tryConsumeNegWord #-} 1623tryConsumeNegWord :: Word8 -> ByteString -> DecodedToken Word 1624tryConsumeNegWord hdr !bs = case word8ToWord hdr of 1625 -- Positive integers (type 0) 1626 0x20 -> DecodedToken 1 0 1627 0x21 -> DecodedToken 1 1 1628 0x22 -> DecodedToken 1 2 1629 0x23 -> DecodedToken 1 3 1630 0x24 -> DecodedToken 1 4 1631 0x25 -> DecodedToken 1 5 1632 0x26 -> DecodedToken 1 6 1633 0x27 -> DecodedToken 1 7 1634 0x28 -> DecodedToken 1 8 1635 0x29 -> DecodedToken 1 9 1636 0x2a -> DecodedToken 1 10 1637 0x2b -> DecodedToken 1 11 1638 0x2c -> DecodedToken 1 12 1639 0x2d -> DecodedToken 1 13 1640 0x2e -> DecodedToken 1 14 1641 0x2f -> DecodedToken 1 15 1642 0x30 -> DecodedToken 1 16 1643 0x31 -> DecodedToken 1 17 1644 0x32 -> DecodedToken 1 18 1645 0x33 -> DecodedToken 1 19 1646 0x34 -> DecodedToken 1 20 1647 0x35 -> DecodedToken 1 21 1648 0x36 -> DecodedToken 1 22 1649 0x37 -> DecodedToken 1 23 1650 0x38 -> DecodedToken 2 $! (word8ToWord (eatTailWord8 bs)) 1651 0x39 -> DecodedToken 3 $! (word16ToWord (eatTailWord16 bs)) 1652 0x3a -> DecodedToken 5 $! (word32ToWord (eatTailWord32 bs)) 1653#if defined(ARCH_64bit) 1654 0x3b -> DecodedToken 9 $! (word64ToWord (eatTailWord64 bs)) 1655#else 1656 0x3b -> case word64ToWord (eatTailWord64 bs) of 1657 Just n -> DecodedToken 9 n 1658 Nothing -> DecodeFailure 1659#endif 1660 _ -> DecodeFailure 1661 1662 1663{-# INLINE tryConsumeInt #-} 1664tryConsumeInt :: Word8 -> ByteString -> DecodedToken Int 1665tryConsumeInt hdr !bs = case word8ToWord hdr of 1666 -- Positive integers (type 0) 1667 0x00 -> DecodedToken 1 0 1668 0x01 -> DecodedToken 1 1 1669 0x02 -> DecodedToken 1 2 1670 0x03 -> DecodedToken 1 3 1671 0x04 -> DecodedToken 1 4 1672 0x05 -> DecodedToken 1 5 1673 0x06 -> DecodedToken 1 6 1674 0x07 -> DecodedToken 1 7 1675 0x08 -> DecodedToken 1 8 1676 0x09 -> DecodedToken 1 9 1677 0x0a -> DecodedToken 1 10 1678 0x0b -> DecodedToken 1 11 1679 0x0c -> DecodedToken 1 12 1680 0x0d -> DecodedToken 1 13 1681 0x0e -> DecodedToken 1 14 1682 0x0f -> DecodedToken 1 15 1683 0x10 -> DecodedToken 1 16 1684 0x11 -> DecodedToken 1 17 1685 0x12 -> DecodedToken 1 18 1686 0x13 -> DecodedToken 1 19 1687 0x14 -> DecodedToken 1 20 1688 0x15 -> DecodedToken 1 21 1689 0x16 -> DecodedToken 1 22 1690 0x17 -> DecodedToken 1 23 1691 0x18 -> DecodedToken 2 $! (word8ToInt (eatTailWord8 bs)) 1692 0x19 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs)) 1693#if defined(ARCH_64bit) 1694 0x1a -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs)) 1695#else 1696 0x1a -> case word32ToInt (eatTailWord32 bs) of 1697 Just n -> DecodedToken 5 n 1698 Nothing -> DecodeFailure 1699#endif 1700 0x1b -> case word64ToInt (eatTailWord64 bs) of 1701 Just n -> DecodedToken 9 n 1702 Nothing -> DecodeFailure 1703 1704 -- Negative integers (type 1) 1705 0x20 -> DecodedToken 1 (-1) 1706 0x21 -> DecodedToken 1 (-2) 1707 0x22 -> DecodedToken 1 (-3) 1708 0x23 -> DecodedToken 1 (-4) 1709 0x24 -> DecodedToken 1 (-5) 1710 0x25 -> DecodedToken 1 (-6) 1711 0x26 -> DecodedToken 1 (-7) 1712 0x27 -> DecodedToken 1 (-8) 1713 0x28 -> DecodedToken 1 (-9) 1714 0x29 -> DecodedToken 1 (-10) 1715 0x2a -> DecodedToken 1 (-11) 1716 0x2b -> DecodedToken 1 (-12) 1717 0x2c -> DecodedToken 1 (-13) 1718 0x2d -> DecodedToken 1 (-14) 1719 0x2e -> DecodedToken 1 (-15) 1720 0x2f -> DecodedToken 1 (-16) 1721 0x30 -> DecodedToken 1 (-17) 1722 0x31 -> DecodedToken 1 (-18) 1723 0x32 -> DecodedToken 1 (-19) 1724 0x33 -> DecodedToken 1 (-20) 1725 0x34 -> DecodedToken 1 (-21) 1726 0x35 -> DecodedToken 1 (-22) 1727 0x36 -> DecodedToken 1 (-23) 1728 0x37 -> DecodedToken 1 (-24) 1729 0x38 -> DecodedToken 2 $! (-1 - word8ToInt (eatTailWord8 bs)) 1730 0x39 -> DecodedToken 3 $! (-1 - word16ToInt (eatTailWord16 bs)) 1731#if defined(ARCH_64bit) 1732 0x3a -> DecodedToken 5 $! (-1 - word32ToInt (eatTailWord32 bs)) 1733#else 1734 0x3a -> case word32ToInt (eatTailWord32 bs) of 1735 Just n -> DecodedToken 5 (-1 - n) 1736 Nothing -> DecodeFailure 1737#endif 1738 0x3b -> case word64ToInt (eatTailWord64 bs) of 1739 Just n -> DecodedToken 9 (-1 - n) 1740 Nothing -> DecodeFailure 1741 _ -> DecodeFailure 1742 1743 1744{-# INLINE tryConsumeInteger #-} 1745tryConsumeInteger :: Word8 -> ByteString -> DecodedToken (BigIntToken Integer) 1746tryConsumeInteger hdr !bs = case word8ToWord hdr of 1747 1748 -- Positive integers (type 0) 1749 0x00 -> DecodedToken 1 (BigIntToken True 0) 1750 0x01 -> DecodedToken 1 (BigIntToken True 1) 1751 0x02 -> DecodedToken 1 (BigIntToken True 2) 1752 0x03 -> DecodedToken 1 (BigIntToken True 3) 1753 0x04 -> DecodedToken 1 (BigIntToken True 4) 1754 0x05 -> DecodedToken 1 (BigIntToken True 5) 1755 0x06 -> DecodedToken 1 (BigIntToken True 6) 1756 0x07 -> DecodedToken 1 (BigIntToken True 7) 1757 0x08 -> DecodedToken 1 (BigIntToken True 8) 1758 0x09 -> DecodedToken 1 (BigIntToken True 9) 1759 0x0a -> DecodedToken 1 (BigIntToken True 10) 1760 0x0b -> DecodedToken 1 (BigIntToken True 11) 1761 0x0c -> DecodedToken 1 (BigIntToken True 12) 1762 0x0d -> DecodedToken 1 (BigIntToken True 13) 1763 0x0e -> DecodedToken 1 (BigIntToken True 14) 1764 0x0f -> DecodedToken 1 (BigIntToken True 15) 1765 0x10 -> DecodedToken 1 (BigIntToken True 16) 1766 0x11 -> DecodedToken 1 (BigIntToken True 17) 1767 0x12 -> DecodedToken 1 (BigIntToken True 18) 1768 0x13 -> DecodedToken 1 (BigIntToken True 19) 1769 0x14 -> DecodedToken 1 (BigIntToken True 20) 1770 0x15 -> DecodedToken 1 (BigIntToken True 21) 1771 0x16 -> DecodedToken 1 (BigIntToken True 22) 1772 0x17 -> DecodedToken 1 (BigIntToken True 23) 1773 1774 0x18 -> let !w@(W8# w#) = eatTailWord8 bs 1775 sz = 2 1776 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! toInteger w) 1777 0x19 -> let !w@(W16# w#) = eatTailWord16 bs 1778 sz = 3 1779 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! toInteger w) 1780 0x1a -> let !w@(W32# w#) = eatTailWord32 bs 1781 sz = 5 1782 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! toInteger w) 1783 0x1b -> let !w@(W64# w#) = eatTailWord64 bs 1784 sz = 9 1785#if defined(ARCH_32bit) 1786 in DecodedToken sz (BigIntToken (isWord64Canonical sz w#) $! toInteger w) 1787#else 1788 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! toInteger w) 1789#endif 1790 1791 -- Negative integers (type 1) 1792 0x20 -> DecodedToken 1 (BigIntToken True (-1)) 1793 0x21 -> DecodedToken 1 (BigIntToken True (-2)) 1794 0x22 -> DecodedToken 1 (BigIntToken True (-3)) 1795 0x23 -> DecodedToken 1 (BigIntToken True (-4)) 1796 0x24 -> DecodedToken 1 (BigIntToken True (-5)) 1797 0x25 -> DecodedToken 1 (BigIntToken True (-6)) 1798 0x26 -> DecodedToken 1 (BigIntToken True (-7)) 1799 0x27 -> DecodedToken 1 (BigIntToken True (-8)) 1800 0x28 -> DecodedToken 1 (BigIntToken True (-9)) 1801 0x29 -> DecodedToken 1 (BigIntToken True (-10)) 1802 0x2a -> DecodedToken 1 (BigIntToken True (-11)) 1803 0x2b -> DecodedToken 1 (BigIntToken True (-12)) 1804 0x2c -> DecodedToken 1 (BigIntToken True (-13)) 1805 0x2d -> DecodedToken 1 (BigIntToken True (-14)) 1806 0x2e -> DecodedToken 1 (BigIntToken True (-15)) 1807 0x2f -> DecodedToken 1 (BigIntToken True (-16)) 1808 0x30 -> DecodedToken 1 (BigIntToken True (-17)) 1809 0x31 -> DecodedToken 1 (BigIntToken True (-18)) 1810 0x32 -> DecodedToken 1 (BigIntToken True (-19)) 1811 0x33 -> DecodedToken 1 (BigIntToken True (-20)) 1812 0x34 -> DecodedToken 1 (BigIntToken True (-21)) 1813 0x35 -> DecodedToken 1 (BigIntToken True (-22)) 1814 0x36 -> DecodedToken 1 (BigIntToken True (-23)) 1815 0x37 -> DecodedToken 1 (BigIntToken True (-24)) 1816 0x38 -> let !w@(W8# w#) = eatTailWord8 bs 1817 sz = 2 1818 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! (-1 - toInteger w)) 1819 0x39 -> let !w@(W16# w#) = eatTailWord16 bs 1820 sz = 3 1821 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! (-1 - toInteger w)) 1822 0x3a -> let !w@(W32# w#) = eatTailWord32 bs 1823 sz = 5 1824 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! (-1 - toInteger w)) 1825 0x3b -> let !w@(W64# w#) = eatTailWord64 bs 1826 sz = 9 1827#if defined(ARCH_32bit) 1828 in DecodedToken sz (BigIntToken (isWord64Canonical sz w#) $! (-1 - toInteger w)) 1829#else 1830 in DecodedToken sz (BigIntToken (isWordCanonical sz w#) $! (-1 - toInteger w)) 1831#endif 1832 1833 0xc2 -> readBigUInt bs 1834 0xc3 -> readBigNInt bs 1835 1836 _ -> DecodeFailure 1837 1838 1839{-# INLINE tryConsumeBytes #-} 1840tryConsumeBytes :: Word8 -> ByteString -> DecodedToken (LongToken ByteString) 1841tryConsumeBytes hdr !bs = case word8ToWord hdr of 1842 1843 -- Bytes (type 2) 1844 0x40 -> readBytesSmall 0 bs 1845 0x41 -> readBytesSmall 1 bs 1846 0x42 -> readBytesSmall 2 bs 1847 0x43 -> readBytesSmall 3 bs 1848 0x44 -> readBytesSmall 4 bs 1849 0x45 -> readBytesSmall 5 bs 1850 0x46 -> readBytesSmall 6 bs 1851 0x47 -> readBytesSmall 7 bs 1852 0x48 -> readBytesSmall 8 bs 1853 0x49 -> readBytesSmall 9 bs 1854 0x4a -> readBytesSmall 10 bs 1855 0x4b -> readBytesSmall 11 bs 1856 0x4c -> readBytesSmall 12 bs 1857 0x4d -> readBytesSmall 13 bs 1858 0x4e -> readBytesSmall 14 bs 1859 0x4f -> readBytesSmall 15 bs 1860 0x50 -> readBytesSmall 16 bs 1861 0x51 -> readBytesSmall 17 bs 1862 0x52 -> readBytesSmall 18 bs 1863 0x53 -> readBytesSmall 19 bs 1864 0x54 -> readBytesSmall 20 bs 1865 0x55 -> readBytesSmall 21 bs 1866 0x56 -> readBytesSmall 22 bs 1867 0x57 -> readBytesSmall 23 bs 1868 0x58 -> readBytes8 bs 1869 0x59 -> readBytes16 bs 1870 0x5a -> readBytes32 bs 1871 0x5b -> readBytes64 bs 1872 _ -> DecodeFailure 1873 1874 1875{-# INLINE tryConsumeString #-} 1876tryConsumeString :: Word8 -> ByteString -> DecodedToken (LongToken ByteString) 1877tryConsumeString hdr !bs = case word8ToWord hdr of 1878 1879 -- Strings (type 3) 1880 0x60 -> readBytesSmall 0 bs 1881 0x61 -> readBytesSmall 1 bs 1882 0x62 -> readBytesSmall 2 bs 1883 0x63 -> readBytesSmall 3 bs 1884 0x64 -> readBytesSmall 4 bs 1885 0x65 -> readBytesSmall 5 bs 1886 0x66 -> readBytesSmall 6 bs 1887 0x67 -> readBytesSmall 7 bs 1888 0x68 -> readBytesSmall 8 bs 1889 0x69 -> readBytesSmall 9 bs 1890 0x6a -> readBytesSmall 10 bs 1891 0x6b -> readBytesSmall 11 bs 1892 0x6c -> readBytesSmall 12 bs 1893 0x6d -> readBytesSmall 13 bs 1894 0x6e -> readBytesSmall 14 bs 1895 0x6f -> readBytesSmall 15 bs 1896 0x70 -> readBytesSmall 16 bs 1897 0x71 -> readBytesSmall 17 bs 1898 0x72 -> readBytesSmall 18 bs 1899 0x73 -> readBytesSmall 19 bs 1900 0x74 -> readBytesSmall 20 bs 1901 0x75 -> readBytesSmall 21 bs 1902 0x76 -> readBytesSmall 22 bs 1903 0x77 -> readBytesSmall 23 bs 1904 0x78 -> readBytes8 bs 1905 0x79 -> readBytes16 bs 1906 0x7a -> readBytes32 bs 1907 0x7b -> readBytes64 bs 1908 _ -> DecodeFailure 1909 1910 1911{-# INLINE tryConsumeListLen #-} 1912tryConsumeListLen :: Word8 -> ByteString -> DecodedToken Int 1913tryConsumeListLen hdr !bs = case word8ToWord hdr of 1914 -- List structures (type 4) 1915 0x80 -> DecodedToken 1 0 1916 0x81 -> DecodedToken 1 1 1917 0x82 -> DecodedToken 1 2 1918 0x83 -> DecodedToken 1 3 1919 0x84 -> DecodedToken 1 4 1920 0x85 -> DecodedToken 1 5 1921 0x86 -> DecodedToken 1 6 1922 0x87 -> DecodedToken 1 7 1923 0x88 -> DecodedToken 1 8 1924 0x89 -> DecodedToken 1 9 1925 0x8a -> DecodedToken 1 10 1926 0x8b -> DecodedToken 1 11 1927 0x8c -> DecodedToken 1 12 1928 0x8d -> DecodedToken 1 13 1929 0x8e -> DecodedToken 1 14 1930 0x8f -> DecodedToken 1 15 1931 0x90 -> DecodedToken 1 16 1932 0x91 -> DecodedToken 1 17 1933 0x92 -> DecodedToken 1 18 1934 0x93 -> DecodedToken 1 19 1935 0x94 -> DecodedToken 1 20 1936 0x95 -> DecodedToken 1 21 1937 0x96 -> DecodedToken 1 22 1938 0x97 -> DecodedToken 1 23 1939 0x98 -> DecodedToken 2 (word8ToInt (eatTailWord8 bs)) 1940 0x99 -> DecodedToken 3 (word16ToInt (eatTailWord16 bs)) 1941#if defined(ARCH_64bit) 1942 0x9a -> DecodedToken 5 (word32ToInt (eatTailWord32 bs)) 1943#else 1944 0x9a -> case word32ToInt (eatTailWord32 bs) of 1945 Just n -> DecodedToken 5 n 1946 Nothing -> DecodeFailure 1947#endif 1948 0x9b -> case word64ToInt (eatTailWord64 bs) of 1949 Just n -> DecodedToken 9 n 1950 Nothing -> DecodeFailure 1951 _ -> DecodeFailure 1952 1953 1954{-# INLINE tryConsumeMapLen #-} 1955tryConsumeMapLen :: Word8 -> ByteString -> DecodedToken Int 1956tryConsumeMapLen hdr !bs = case word8ToWord hdr of 1957 -- Map structures (type 5) 1958 0xa0 -> DecodedToken 1 0 1959 0xa1 -> DecodedToken 1 1 1960 0xa2 -> DecodedToken 1 2 1961 0xa3 -> DecodedToken 1 3 1962 0xa4 -> DecodedToken 1 4 1963 0xa5 -> DecodedToken 1 5 1964 0xa6 -> DecodedToken 1 6 1965 0xa7 -> DecodedToken 1 7 1966 0xa8 -> DecodedToken 1 8 1967 0xa9 -> DecodedToken 1 9 1968 0xaa -> DecodedToken 1 10 1969 0xab -> DecodedToken 1 11 1970 0xac -> DecodedToken 1 12 1971 0xad -> DecodedToken 1 13 1972 0xae -> DecodedToken 1 14 1973 0xaf -> DecodedToken 1 15 1974 0xb0 -> DecodedToken 1 16 1975 0xb1 -> DecodedToken 1 17 1976 0xb2 -> DecodedToken 1 18 1977 0xb3 -> DecodedToken 1 19 1978 0xb4 -> DecodedToken 1 20 1979 0xb5 -> DecodedToken 1 21 1980 0xb6 -> DecodedToken 1 22 1981 0xb7 -> DecodedToken 1 23 1982 0xb8 -> DecodedToken 2 $! (word8ToInt (eatTailWord8 bs)) 1983 0xb9 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs)) 1984#if defined(ARCH_64bit) 1985 0xba -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs)) 1986#else 1987 0xba -> case word32ToInt (eatTailWord32 bs) of 1988 Just n -> DecodedToken 5 n 1989 Nothing -> DecodeFailure 1990#endif 1991 0xbb -> case word64ToInt (eatTailWord64 bs) of 1992 Just n -> DecodedToken 9 n 1993 Nothing -> DecodeFailure 1994 _ -> DecodeFailure 1995 1996 1997{-# INLINE tryConsumeListLenIndef #-} 1998tryConsumeListLenIndef :: Word8 -> DecodedToken () 1999tryConsumeListLenIndef hdr = case word8ToWord hdr of 2000 0x9f -> DecodedToken 1 () 2001 _ -> DecodeFailure 2002 2003 2004{-# INLINE tryConsumeMapLenIndef #-} 2005tryConsumeMapLenIndef :: Word8 -> DecodedToken () 2006tryConsumeMapLenIndef hdr = case word8ToWord hdr of 2007 0xbf -> DecodedToken 1 () 2008 _ -> DecodeFailure 2009 2010 2011{-# INLINE tryConsumeListLenOrIndef #-} 2012tryConsumeListLenOrIndef :: Word8 -> ByteString -> DecodedToken Int 2013tryConsumeListLenOrIndef hdr !bs = case word8ToWord hdr of 2014 2015 -- List structures (type 4) 2016 0x80 -> DecodedToken 1 0 2017 0x81 -> DecodedToken 1 1 2018 0x82 -> DecodedToken 1 2 2019 0x83 -> DecodedToken 1 3 2020 0x84 -> DecodedToken 1 4 2021 0x85 -> DecodedToken 1 5 2022 0x86 -> DecodedToken 1 6 2023 0x87 -> DecodedToken 1 7 2024 0x88 -> DecodedToken 1 8 2025 0x89 -> DecodedToken 1 9 2026 0x8a -> DecodedToken 1 10 2027 0x8b -> DecodedToken 1 11 2028 0x8c -> DecodedToken 1 12 2029 0x8d -> DecodedToken 1 13 2030 0x8e -> DecodedToken 1 14 2031 0x8f -> DecodedToken 1 15 2032 0x90 -> DecodedToken 1 16 2033 0x91 -> DecodedToken 1 17 2034 0x92 -> DecodedToken 1 18 2035 0x93 -> DecodedToken 1 19 2036 0x94 -> DecodedToken 1 20 2037 0x95 -> DecodedToken 1 21 2038 0x96 -> DecodedToken 1 22 2039 0x97 -> DecodedToken 1 23 2040 0x98 -> DecodedToken 2 $! (word8ToInt (eatTailWord8 bs)) 2041 0x99 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs)) 2042#if defined(ARCH_64bit) 2043 0x9a -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs)) 2044#else 2045 0x9a -> case word32ToInt (eatTailWord32 bs) of 2046 Just n -> DecodedToken 5 n 2047 Nothing -> DecodeFailure 2048#endif 2049 0x9b -> case word64ToInt (eatTailWord64 bs) of 2050 Just n -> DecodedToken 9 n 2051 Nothing -> DecodeFailure 2052 0x9f -> DecodedToken 1 (-1) -- indefinite length 2053 _ -> DecodeFailure 2054 2055 2056{-# INLINE tryConsumeMapLenOrIndef #-} 2057tryConsumeMapLenOrIndef :: Word8 -> ByteString -> DecodedToken Int 2058tryConsumeMapLenOrIndef hdr !bs = case word8ToWord hdr of 2059 2060 -- Map structures (type 5) 2061 0xa0 -> DecodedToken 1 0 2062 0xa1 -> DecodedToken 1 1 2063 0xa2 -> DecodedToken 1 2 2064 0xa3 -> DecodedToken 1 3 2065 0xa4 -> DecodedToken 1 4 2066 0xa5 -> DecodedToken 1 5 2067 0xa6 -> DecodedToken 1 6 2068 0xa7 -> DecodedToken 1 7 2069 0xa8 -> DecodedToken 1 8 2070 0xa9 -> DecodedToken 1 9 2071 0xaa -> DecodedToken 1 10 2072 0xab -> DecodedToken 1 11 2073 0xac -> DecodedToken 1 12 2074 0xad -> DecodedToken 1 13 2075 0xae -> DecodedToken 1 14 2076 0xaf -> DecodedToken 1 15 2077 0xb0 -> DecodedToken 1 16 2078 0xb1 -> DecodedToken 1 17 2079 0xb2 -> DecodedToken 1 18 2080 0xb3 -> DecodedToken 1 19 2081 0xb4 -> DecodedToken 1 20 2082 0xb5 -> DecodedToken 1 21 2083 0xb6 -> DecodedToken 1 22 2084 0xb7 -> DecodedToken 1 23 2085 0xb8 -> DecodedToken 2 $! (word8ToInt (eatTailWord8 bs)) 2086 0xb9 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs)) 2087#if defined(ARCH_64bit) 2088 0xba -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs)) 2089#else 2090 0xba -> case word32ToInt (eatTailWord32 bs) of 2091 Just n -> DecodedToken 5 n 2092 Nothing -> DecodeFailure 2093#endif 2094 0xbb -> case word64ToInt (eatTailWord64 bs) of 2095 Just n -> DecodedToken 9 n 2096 Nothing -> DecodeFailure 2097 0xbf -> DecodedToken 1 (-1) -- indefinite length 2098 _ -> DecodeFailure 2099 2100 2101{-# INLINE tryConsumeTag #-} 2102tryConsumeTag :: Word8 -> ByteString -> DecodedToken Word 2103tryConsumeTag hdr !bs = case word8ToWord hdr of 2104 2105 -- Tagged values (type 6) 2106 0xc0 -> DecodedToken 1 0 2107 0xc1 -> DecodedToken 1 1 2108 0xc2 -> DecodedToken 1 2 2109 0xc3 -> DecodedToken 1 3 2110 0xc4 -> DecodedToken 1 4 2111 0xc5 -> DecodedToken 1 5 2112 0xc6 -> DecodedToken 1 6 2113 0xc7 -> DecodedToken 1 7 2114 0xc8 -> DecodedToken 1 8 2115 0xc9 -> DecodedToken 1 9 2116 0xca -> DecodedToken 1 10 2117 0xcb -> DecodedToken 1 11 2118 0xcc -> DecodedToken 1 12 2119 0xcd -> DecodedToken 1 13 2120 0xce -> DecodedToken 1 14 2121 0xcf -> DecodedToken 1 15 2122 0xd0 -> DecodedToken 1 16 2123 0xd1 -> DecodedToken 1 17 2124 0xd2 -> DecodedToken 1 18 2125 0xd3 -> DecodedToken 1 19 2126 0xd4 -> DecodedToken 1 20 2127 0xd5 -> DecodedToken 1 21 2128 0xd6 -> DecodedToken 1 22 2129 0xd7 -> DecodedToken 1 23 2130 0xd8 -> DecodedToken 2 $! (word8ToWord (eatTailWord8 bs)) 2131 0xd9 -> DecodedToken 3 $! (word16ToWord (eatTailWord16 bs)) 2132 0xda -> DecodedToken 5 $! (word32ToWord (eatTailWord32 bs)) 2133#if defined(ARCH_64bit) 2134 0xdb -> DecodedToken 9 $! (word64ToWord (eatTailWord64 bs)) 2135#else 2136 0xdb -> case word64ToWord (eatTailWord64 bs) of 2137 Just n -> DecodedToken 9 n 2138 Nothing -> DecodeFailure 2139#endif 2140 _ -> DecodeFailure 2141 2142-- 2143-- 64-on-32 bit code paths 2144-- 2145 2146#if defined(ARCH_32bit) 2147tryConsumeWord64 :: Word8 -> ByteString -> DecodedToken Word64 2148tryConsumeWord64 hdr !bs = case word8ToWord hdr of 2149 -- Positive integers (type 0) 2150 0x00 -> DecodedToken 1 0 2151 0x01 -> DecodedToken 1 1 2152 0x02 -> DecodedToken 1 2 2153 0x03 -> DecodedToken 1 3 2154 0x04 -> DecodedToken 1 4 2155 0x05 -> DecodedToken 1 5 2156 0x06 -> DecodedToken 1 6 2157 0x07 -> DecodedToken 1 7 2158 0x08 -> DecodedToken 1 8 2159 0x09 -> DecodedToken 1 9 2160 0x0a -> DecodedToken 1 10 2161 0x0b -> DecodedToken 1 11 2162 0x0c -> DecodedToken 1 12 2163 0x0d -> DecodedToken 1 13 2164 0x0e -> DecodedToken 1 14 2165 0x0f -> DecodedToken 1 15 2166 0x10 -> DecodedToken 1 16 2167 0x11 -> DecodedToken 1 17 2168 0x12 -> DecodedToken 1 18 2169 0x13 -> DecodedToken 1 19 2170 0x14 -> DecodedToken 1 20 2171 0x15 -> DecodedToken 1 21 2172 0x16 -> DecodedToken 1 22 2173 0x17 -> DecodedToken 1 23 2174 0x18 -> DecodedToken 2 $! (word8ToWord64 (eatTailWord8 bs)) 2175 0x19 -> DecodedToken 3 $! (word16ToWord64 (eatTailWord16 bs)) 2176 0x1a -> DecodedToken 5 $! (word32ToWord64 (eatTailWord32 bs)) 2177 0x1b -> DecodedToken 9 $! (eatTailWord64 bs) 2178 _ -> DecodeFailure 2179{-# INLINE tryConsumeWord64 #-} 2180 2181tryConsumeNegWord64 :: Word8 -> ByteString -> DecodedToken Word64 2182tryConsumeNegWord64 hdr !bs = case word8ToWord hdr of 2183 -- Positive integers (type 0) 2184 0x20 -> DecodedToken 1 0 2185 0x21 -> DecodedToken 1 1 2186 0x22 -> DecodedToken 1 2 2187 0x23 -> DecodedToken 1 3 2188 0x24 -> DecodedToken 1 4 2189 0x25 -> DecodedToken 1 5 2190 0x26 -> DecodedToken 1 6 2191 0x27 -> DecodedToken 1 7 2192 0x28 -> DecodedToken 1 8 2193 0x29 -> DecodedToken 1 9 2194 0x2a -> DecodedToken 1 10 2195 0x2b -> DecodedToken 1 11 2196 0x2c -> DecodedToken 1 12 2197 0x2d -> DecodedToken 1 13 2198 0x2e -> DecodedToken 1 14 2199 0x2f -> DecodedToken 1 15 2200 0x30 -> DecodedToken 1 16 2201 0x31 -> DecodedToken 1 17 2202 0x32 -> DecodedToken 1 18 2203 0x33 -> DecodedToken 1 19 2204 0x34 -> DecodedToken 1 20 2205 0x35 -> DecodedToken 1 21 2206 0x36 -> DecodedToken 1 22 2207 0x37 -> DecodedToken 1 23 2208 0x38 -> DecodedToken 2 $! (word8ToWord64 (eatTailWord8 bs)) 2209 0x39 -> DecodedToken 3 $! (word16ToWord64 (eatTailWord16 bs)) 2210 0x3a -> DecodedToken 5 $! (word32ToWord64 (eatTailWord32 bs)) 2211 0x3b -> DecodedToken 9 $! (eatTailWord64 bs) 2212 _ -> DecodeFailure 2213{-# INLINE tryConsumeNegWord64 #-} 2214 2215tryConsumeInt64 :: Word8 -> ByteString -> DecodedToken Int64 2216tryConsumeInt64 hdr !bs = case word8ToWord hdr of 2217 -- Positive integers (type 0) 2218 0x00 -> DecodedToken 1 0 2219 0x01 -> DecodedToken 1 1 2220 0x02 -> DecodedToken 1 2 2221 0x03 -> DecodedToken 1 3 2222 0x04 -> DecodedToken 1 4 2223 0x05 -> DecodedToken 1 5 2224 0x06 -> DecodedToken 1 6 2225 0x07 -> DecodedToken 1 7 2226 0x08 -> DecodedToken 1 8 2227 0x09 -> DecodedToken 1 9 2228 0x0a -> DecodedToken 1 10 2229 0x0b -> DecodedToken 1 11 2230 0x0c -> DecodedToken 1 12 2231 0x0d -> DecodedToken 1 13 2232 0x0e -> DecodedToken 1 14 2233 0x0f -> DecodedToken 1 15 2234 0x10 -> DecodedToken 1 16 2235 0x11 -> DecodedToken 1 17 2236 0x12 -> DecodedToken 1 18 2237 0x13 -> DecodedToken 1 19 2238 0x14 -> DecodedToken 1 20 2239 0x15 -> DecodedToken 1 21 2240 0x16 -> DecodedToken 1 22 2241 0x17 -> DecodedToken 1 23 2242 0x18 -> DecodedToken 2 $! (word8ToInt64 (eatTailWord8 bs)) 2243 0x19 -> DecodedToken 3 $! (word16ToInt64 (eatTailWord16 bs)) 2244 0x1a -> DecodedToken 5 $! (word32ToInt64 (eatTailWord32 bs)) 2245 0x1b -> case word64ToInt64 (eatTailWord64 bs) of 2246 Just n -> DecodedToken 9 n 2247 Nothing -> DecodeFailure 2248 2249 -- Negative integers (type 1) 2250 0x20 -> DecodedToken 1 (-1) 2251 0x21 -> DecodedToken 1 (-2) 2252 0x22 -> DecodedToken 1 (-3) 2253 0x23 -> DecodedToken 1 (-4) 2254 0x24 -> DecodedToken 1 (-5) 2255 0x25 -> DecodedToken 1 (-6) 2256 0x26 -> DecodedToken 1 (-7) 2257 0x27 -> DecodedToken 1 (-8) 2258 0x28 -> DecodedToken 1 (-9) 2259 0x29 -> DecodedToken 1 (-10) 2260 0x2a -> DecodedToken 1 (-11) 2261 0x2b -> DecodedToken 1 (-12) 2262 0x2c -> DecodedToken 1 (-13) 2263 0x2d -> DecodedToken 1 (-14) 2264 0x2e -> DecodedToken 1 (-15) 2265 0x2f -> DecodedToken 1 (-16) 2266 0x30 -> DecodedToken 1 (-17) 2267 0x31 -> DecodedToken 1 (-18) 2268 0x32 -> DecodedToken 1 (-19) 2269 0x33 -> DecodedToken 1 (-20) 2270 0x34 -> DecodedToken 1 (-21) 2271 0x35 -> DecodedToken 1 (-22) 2272 0x36 -> DecodedToken 1 (-23) 2273 0x37 -> DecodedToken 1 (-24) 2274 0x38 -> DecodedToken 2 $! (-1 - word8ToInt64 (eatTailWord8 bs)) 2275 0x39 -> DecodedToken 3 $! (-1 - word16ToInt64 (eatTailWord16 bs)) 2276 0x3a -> DecodedToken 5 $! (-1 - word32ToInt64 (eatTailWord32 bs)) 2277 0x3b -> case word64ToInt64 (eatTailWord64 bs) of 2278 Just n -> DecodedToken 9 (-1 - n) 2279 Nothing -> DecodeFailure 2280 _ -> DecodeFailure 2281{-# INLINE tryConsumeInt64 #-} 2282 2283tryConsumeListLen64 :: Word8 -> ByteString -> DecodedToken Int64 2284tryConsumeListLen64 hdr !bs = case word8ToWord hdr of 2285 -- List structures (type 4) 2286 0x80 -> DecodedToken 1 0 2287 0x81 -> DecodedToken 1 1 2288 0x82 -> DecodedToken 1 2 2289 0x83 -> DecodedToken 1 3 2290 0x84 -> DecodedToken 1 4 2291 0x85 -> DecodedToken 1 5 2292 0x86 -> DecodedToken 1 6 2293 0x87 -> DecodedToken 1 7 2294 0x88 -> DecodedToken 1 8 2295 0x89 -> DecodedToken 1 9 2296 0x8a -> DecodedToken 1 10 2297 0x8b -> DecodedToken 1 11 2298 0x8c -> DecodedToken 1 12 2299 0x8d -> DecodedToken 1 13 2300 0x8e -> DecodedToken 1 14 2301 0x8f -> DecodedToken 1 15 2302 0x90 -> DecodedToken 1 16 2303 0x91 -> DecodedToken 1 17 2304 0x92 -> DecodedToken 1 18 2305 0x93 -> DecodedToken 1 19 2306 0x94 -> DecodedToken 1 20 2307 0x95 -> DecodedToken 1 21 2308 0x96 -> DecodedToken 1 22 2309 0x97 -> DecodedToken 1 23 2310 0x98 -> DecodedToken 2 $! (word8ToInt64 (eatTailWord8 bs)) 2311 0x99 -> DecodedToken 3 $! (word16ToInt64 (eatTailWord16 bs)) 2312 0x9a -> DecodedToken 5 $! (word32ToInt64 (eatTailWord32 bs)) 2313 0x9b -> case word64ToInt64 (eatTailWord64 bs) of 2314 Just n -> DecodedToken 9 n 2315 Nothing -> DecodeFailure 2316 _ -> DecodeFailure 2317{-# INLINE tryConsumeListLen64 #-} 2318 2319tryConsumeMapLen64 :: Word8 -> ByteString -> DecodedToken Int64 2320tryConsumeMapLen64 hdr !bs = case word8ToWord hdr of 2321 -- Map structures (type 5) 2322 0xa0 -> DecodedToken 1 0 2323 0xa1 -> DecodedToken 1 1 2324 0xa2 -> DecodedToken 1 2 2325 0xa3 -> DecodedToken 1 3 2326 0xa4 -> DecodedToken 1 4 2327 0xa5 -> DecodedToken 1 5 2328 0xa6 -> DecodedToken 1 6 2329 0xa7 -> DecodedToken 1 7 2330 0xa8 -> DecodedToken 1 8 2331 0xa9 -> DecodedToken 1 9 2332 0xaa -> DecodedToken 1 10 2333 0xab -> DecodedToken 1 11 2334 0xac -> DecodedToken 1 12 2335 0xad -> DecodedToken 1 13 2336 0xae -> DecodedToken 1 14 2337 0xaf -> DecodedToken 1 15 2338 0xb0 -> DecodedToken 1 16 2339 0xb1 -> DecodedToken 1 17 2340 0xb2 -> DecodedToken 1 18 2341 0xb3 -> DecodedToken 1 19 2342 0xb4 -> DecodedToken 1 20 2343 0xb5 -> DecodedToken 1 21 2344 0xb6 -> DecodedToken 1 22 2345 0xb7 -> DecodedToken 1 23 2346 0xb8 -> DecodedToken 2 $! (word8ToInt64 (eatTailWord8 bs)) 2347 0xb9 -> DecodedToken 3 $! (word16ToInt64 (eatTailWord16 bs)) 2348 0xba -> DecodedToken 5 $! (word32ToInt64 (eatTailWord32 bs)) 2349 0xbb -> case word64ToInt64 (eatTailWord64 bs) of 2350 Just n -> DecodedToken 9 n 2351 Nothing -> DecodeFailure 2352 _ -> DecodeFailure 2353{-# INLINE tryConsumeMapLen64 #-} 2354 2355tryConsumeTag64 :: Word8 -> ByteString -> DecodedToken Word64 2356tryConsumeTag64 hdr !bs = case word8ToWord hdr of 2357 -- Tagged values (type 6) 2358 0xc0 -> DecodedToken 1 0 2359 0xc1 -> DecodedToken 1 1 2360 0xc2 -> DecodedToken 1 2 2361 0xc3 -> DecodedToken 1 3 2362 0xc4 -> DecodedToken 1 4 2363 0xc5 -> DecodedToken 1 5 2364 0xc6 -> DecodedToken 1 6 2365 0xc7 -> DecodedToken 1 7 2366 0xc8 -> DecodedToken 1 8 2367 0xc9 -> DecodedToken 1 9 2368 0xca -> DecodedToken 1 10 2369 0xcb -> DecodedToken 1 11 2370 0xcc -> DecodedToken 1 12 2371 0xcd -> DecodedToken 1 13 2372 0xce -> DecodedToken 1 14 2373 0xcf -> DecodedToken 1 15 2374 0xd0 -> DecodedToken 1 16 2375 0xd1 -> DecodedToken 1 17 2376 0xd2 -> DecodedToken 1 18 2377 0xd3 -> DecodedToken 1 19 2378 0xd4 -> DecodedToken 1 20 2379 0xd5 -> DecodedToken 1 21 2380 0xd6 -> DecodedToken 1 22 2381 0xd7 -> DecodedToken 1 23 2382 0xd8 -> DecodedToken 2 $! (word8ToWord64 (eatTailWord8 bs)) 2383 0xd9 -> DecodedToken 3 $! (word16ToWord64 (eatTailWord16 bs)) 2384 0xda -> DecodedToken 5 $! (word32ToWord64 (eatTailWord32 bs)) 2385 0xdb -> DecodedToken 9 $! (eatTailWord64 bs) 2386 _ -> DecodeFailure 2387{-# INLINE tryConsumeTag64 #-} 2388#endif 2389 2390{-# INLINE tryConsumeFloat #-} 2391tryConsumeFloat :: Word8 -> ByteString -> DecodedToken Float 2392tryConsumeFloat hdr !bs = case word8ToWord hdr of 2393 0xf9 -> DecodedToken 3 $! (wordToFloat16 (eatTailWord16 bs)) 2394 0xfa -> DecodedToken 5 $! (wordToFloat32 (eatTailWord32 bs)) 2395 _ -> DecodeFailure 2396 2397 2398{-# INLINE tryConsumeDouble #-} 2399tryConsumeDouble :: Word8 -> ByteString -> DecodedToken Double 2400tryConsumeDouble hdr !bs = case word8ToWord hdr of 2401 0xf9 -> DecodedToken 3 $! (float2Double $ wordToFloat16 (eatTailWord16 bs)) 2402 0xfa -> DecodedToken 5 $! (float2Double $ wordToFloat32 (eatTailWord32 bs)) 2403 0xfb -> DecodedToken 9 $! (wordToFloat64 (eatTailWord64 bs)) 2404 _ -> DecodeFailure 2405 2406 2407{-# INLINE tryConsumeBool #-} 2408tryConsumeBool :: Word8 -> DecodedToken Bool 2409tryConsumeBool hdr = case word8ToWord hdr of 2410 0xf4 -> DecodedToken 1 False 2411 0xf5 -> DecodedToken 1 True 2412 _ -> DecodeFailure 2413 2414 2415{-# INLINE tryConsumeSimple #-} 2416tryConsumeSimple :: Word8 -> ByteString -> DecodedToken Word 2417tryConsumeSimple hdr !bs = case word8ToWord hdr of 2418 2419 -- Simple and floats (type 7) 2420 0xe0 -> DecodedToken 1 0 2421 0xe1 -> DecodedToken 1 1 2422 0xe2 -> DecodedToken 1 2 2423 0xe3 -> DecodedToken 1 3 2424 0xe4 -> DecodedToken 1 4 2425 0xe5 -> DecodedToken 1 5 2426 0xe6 -> DecodedToken 1 6 2427 0xe7 -> DecodedToken 1 7 2428 0xe8 -> DecodedToken 1 8 2429 0xe9 -> DecodedToken 1 9 2430 0xea -> DecodedToken 1 10 2431 0xeb -> DecodedToken 1 11 2432 0xec -> DecodedToken 1 12 2433 0xed -> DecodedToken 1 13 2434 0xee -> DecodedToken 1 14 2435 0xef -> DecodedToken 1 15 2436 0xf0 -> DecodedToken 1 16 2437 0xf1 -> DecodedToken 1 17 2438 0xf2 -> DecodedToken 1 18 2439 0xf3 -> DecodedToken 1 19 2440 0xf4 -> DecodedToken 1 20 2441 0xf5 -> DecodedToken 1 21 2442 0xf6 -> DecodedToken 1 22 2443 0xf7 -> DecodedToken 1 23 2444 0xf8 -> DecodedToken 2 $! (word8ToWord (eatTailWord8 bs)) 2445 _ -> DecodeFailure 2446 2447 2448{-# INLINE tryConsumeBytesIndef #-} 2449tryConsumeBytesIndef :: Word8 -> DecodedToken () 2450tryConsumeBytesIndef hdr = case word8ToWord hdr of 2451 0x5f -> DecodedToken 1 () 2452 _ -> DecodeFailure 2453 2454 2455{-# INLINE tryConsumeStringIndef #-} 2456tryConsumeStringIndef :: Word8 -> DecodedToken () 2457tryConsumeStringIndef hdr = case word8ToWord hdr of 2458 0x7f -> DecodedToken 1 () 2459 _ -> DecodeFailure 2460 2461 2462{-# INLINE tryConsumeNull #-} 2463tryConsumeNull :: Word8 -> DecodedToken () 2464tryConsumeNull hdr = case word8ToWord hdr of 2465 0xf6 -> DecodedToken 1 () 2466 _ -> DecodeFailure 2467 2468 2469{-# INLINE tryConsumeBreakOr #-} 2470tryConsumeBreakOr :: Word8 -> DecodedToken () 2471tryConsumeBreakOr hdr = case word8ToWord hdr of 2472 0xff -> DecodedToken 1 () 2473 _ -> DecodeFailure 2474 2475{-# INLINE readBytesSmall #-} 2476readBytesSmall :: Int -> ByteString -> DecodedToken (LongToken ByteString) 2477readBytesSmall n bs 2478 -- if n <= bound then ok return it all 2479 | n + hdrsz <= BS.length bs 2480 = DecodedToken (n+hdrsz) $ Fits True $ 2481 BS.unsafeTake n (BS.unsafeDrop hdrsz bs) 2482 2483 -- if n > bound then slow path, multi-chunk 2484 | otherwise 2485 = DecodedToken hdrsz $ TooLong True n 2486 where 2487 hdrsz = 1 2488 2489{-# INLINE readBytes8 #-} 2490{-# INLINE readBytes16 #-} 2491{-# INLINE readBytes32 #-} 2492{-# INLINE readBytes64 #-} 2493readBytes8, readBytes16, readBytes32, readBytes64 2494 :: ByteString -> DecodedToken (LongToken ByteString) 2495 2496readBytes8 bs 2497 | n <= BS.length bs - hdrsz 2498 = DecodedToken (n+hdrsz) $ Fits lengthCanonical $ 2499 BS.unsafeTake n (BS.unsafeDrop hdrsz bs) 2500 2501 -- if n > bound then slow path, multi-chunk 2502 | otherwise 2503 = DecodedToken hdrsz $ TooLong lengthCanonical n 2504 where 2505 hdrsz = 2 2506 !n@(I# n#) = word8ToInt (eatTailWord8 bs) 2507 lengthCanonical = isIntCanonical hdrsz n# 2508 2509readBytes16 bs 2510 | n <= BS.length bs - hdrsz 2511 = DecodedToken (n+hdrsz) $ Fits lengthCanonical $ 2512 BS.unsafeTake n (BS.unsafeDrop hdrsz bs) 2513 2514 -- if n > bound then slow path, multi-chunk 2515 | otherwise 2516 = DecodedToken hdrsz $ TooLong lengthCanonical n 2517 where 2518 hdrsz = 3 2519 !n@(I# n#) = word16ToInt (eatTailWord16 bs) 2520 lengthCanonical = isIntCanonical hdrsz n# 2521 2522readBytes32 bs = case word32ToInt (eatTailWord32 bs) of 2523#if defined(ARCH_32bit) 2524 Just n@(I# n#) 2525#else 2526 n@(I# n#) 2527#endif 2528 | n <= BS.length bs - hdrsz 2529 -> DecodedToken (n+hdrsz) $ Fits (isIntCanonical hdrsz n#) $ 2530 BS.unsafeTake n (BS.unsafeDrop hdrsz bs) 2531 2532 -- if n > bound then slow path, multi-chunk 2533 | otherwise -> DecodedToken hdrsz $ TooLong (isIntCanonical hdrsz n#) n 2534 2535#if defined(ARCH_32bit) 2536 Nothing -> DecodeFailure 2537#endif 2538 where 2539 hdrsz = 5 2540 2541readBytes64 bs = case word64ToInt (eatTailWord64 bs) of 2542 Just n@(I# n#) 2543 | n <= BS.length bs - hdrsz 2544 -> DecodedToken (n+hdrsz) $ Fits (isIntCanonical hdrsz n#) $ 2545 BS.unsafeTake n (BS.unsafeDrop hdrsz bs) 2546 2547 -- if n > bound then slow path, multi-chunk 2548 | otherwise -> DecodedToken hdrsz $ TooLong (isIntCanonical hdrsz n#) n 2549 2550 Nothing -> DecodeFailure 2551 where 2552 hdrsz = 9 2553 2554------------------------------------------------------------------------------ 2555-- Reading big integers 2556-- 2557 2558-- Big ints consist of two CBOR tokens: a tag token (2 for positive, 3 for 2559-- negative) followed by a bytes token. Our usual invariant (for go_fast and 2560-- go_fast_end) only guarantees that we've got enough space to decode the 2561-- first token. So given that there's two tokens and the second is variable 2562-- length then there are several points where we can discover we're out of 2563-- input buffer space. 2564-- 2565-- In those cases we need to break out of the fast path but we must arrange 2566-- things so that we can continue later once we've got more input buffer. 2567-- 2568-- In particular, we might run out of space when: 2569-- 1. trying to decode the header of the second token (bytes); or 2570-- 2. trying to read the bytes body 2571-- 2572--- The existing mechanisms we've got to drop out of the fast path are: 2573-- * SlowDecodeAction to re-read a whole token 2574-- * SlowConsumeTokenBytes to read the body of a bytes token 2575-- 2576-- Of course when we resume we need to convert the bytes into an integer. 2577-- Rather than making new fast path return mechanisms we can reuse the 2578-- existing ones, so long as we're prepared to allocate new continuation 2579-- closures. This seems a reasonable price to pay to reduce complexity since 2580-- decoding a big int across an input buffer boundary ought to be rare, and 2581-- allocating a new continuation closure isn't that expensive. 2582-- 2583-- Note that canonicity information is calculated lazily. This way we don't need 2584-- to concern ourselves with two distinct paths, while according to benchmarks 2585-- it doesn't affect performance in the non-canonical case. 2586 2587data BigIntToken a = BigIntToken Bool {- canonical? -} Integer 2588 | BigUIntNeedBody Bool {- canonical? -} Int 2589 | BigNIntNeedBody Bool {- canonical? -} Int 2590 | BigUIntNeedHeader 2591 | BigNIntNeedHeader 2592 2593-- So when we have to break out because we can't read the whole bytes body 2594-- in one go then we need to use SlowConsumeTokenBytes but we can adjust the 2595-- continuation so that when we get the ByteString back we convert it to an 2596-- Integer before calling the original continuation. 2597 2598adjustContBigUIntNeedBody, adjustContBigNIntNeedBody 2599 :: (Integer -> ST s (DecodeAction s a)) 2600 -> (ByteString -> ST s (DecodeAction s a)) 2601 2602adjustContBigUIntNeedBody k = \bs -> k $! uintegerFromBytes bs 2603adjustContBigNIntNeedBody k = \bs -> k $! nintegerFromBytes bs 2604 2605adjustContCanonicalBigUIntNeedBody, adjustContCanonicalBigNIntNeedBody 2606 :: (Integer -> ST s (DecodeAction s a)) 2607 -> (ByteString -> ST s (DecodeAction s a)) 2608 2609adjustContCanonicalBigUIntNeedBody k = \bs -> 2610 if isBigIntRepCanonical bs 2611 then k $! uintegerFromBytes bs 2612 else pure $! D.Fail ("non-canonical integer") 2613 2614adjustContCanonicalBigNIntNeedBody k = \bs -> 2615 if isBigIntRepCanonical bs 2616 then k $! nintegerFromBytes bs 2617 else pure $! D.Fail ("non-canonical integer") 2618 2619-- And when we have to break out because we can't read the bytes token header 2620-- in one go then we need to use SlowDecodeAction but we have to make two 2621-- adjustments. When we resume we need to read a bytes token, not a big int. 2622-- That is we don't want to re-read the tag token. Indeed we cannot even if we 2623-- wanted to because the slow path code only guarantees to arrange for one 2624-- complete token header in the input buffer. So we must pretend that we did 2625-- in fact want to read a bytes token using ConsumeBytes, and then we can 2626-- adjust the continuation for that in the same way as above. 2627 2628adjustContBigUIntNeedHeader, adjustContBigNIntNeedHeader 2629 :: (Integer -> ST s (DecodeAction s a)) 2630 -> DecodeAction s a 2631 2632adjustContBigUIntNeedHeader k = ConsumeBytes (\bs -> k $! uintegerFromBytes bs) 2633adjustContBigNIntNeedHeader k = ConsumeBytes (\bs -> k $! nintegerFromBytes bs) 2634 2635adjustContCanonicalBigUIntNeedHeader, adjustContCanonicalBigNIntNeedHeader 2636 :: (Integer -> ST s (DecodeAction s a)) 2637 -> DecodeAction s a 2638 2639adjustContCanonicalBigUIntNeedHeader k = ConsumeBytesCanonical $ \bs -> 2640 if isBigIntRepCanonical bs 2641 then k $! uintegerFromBytes bs 2642 else pure $! D.Fail ("non-canonical integer") 2643 2644adjustContCanonicalBigNIntNeedHeader k = ConsumeBytesCanonical $ \bs -> 2645 if isBigIntRepCanonical bs 2646 then k $! nintegerFromBytes bs 2647 else pure $! D.Fail ("non-canonical integer") 2648 2649-- So finally when reading the input buffer we check if we have enough space 2650-- to read the header of the bytes token and then try to read the bytes body, 2651-- using the appropriate break-out codes as above. 2652 2653{-# INLINE readBigUInt #-} 2654readBigUInt :: ByteString -> DecodedToken (BigIntToken a) 2655readBigUInt bs 2656 | let bs' = BS.unsafeTail bs 2657 , not (BS.null bs') 2658 , let !hdr = BS.unsafeHead bs' 2659 , BS.length bs' >= tokenSize hdr 2660 = case tryConsumeBytes hdr bs' of 2661 DecodeFailure -> DecodeFailure 2662 DecodedToken sz (Fits canonical bstr) -> DecodedToken (1+sz) 2663 (BigIntToken (canonical && isBigIntRepCanonical bstr) 2664 (uintegerFromBytes bstr)) 2665 DecodedToken sz (TooLong canonical len) -> 2666 DecodedToken (1+sz) (BigUIntNeedBody canonical len) 2667 2668 | otherwise 2669 = DecodedToken 1 BigUIntNeedHeader 2670 2671{-# INLINE readBigNInt #-} 2672readBigNInt :: ByteString -> DecodedToken (BigIntToken a) 2673readBigNInt bs 2674 | let bs' = BS.unsafeTail bs 2675 , not (BS.null bs') 2676 , let !hdr = BS.unsafeHead bs' 2677 , BS.length bs' >= tokenSize hdr 2678 = case tryConsumeBytes hdr bs' of 2679 DecodeFailure -> DecodeFailure 2680 DecodedToken sz (Fits canonical bstr) -> DecodedToken (1+sz) 2681 (BigIntToken (canonical && isBigIntRepCanonical bstr) 2682 (nintegerFromBytes bstr)) 2683 DecodedToken sz (TooLong canonical len) -> 2684 DecodedToken (1+sz) (BigNIntNeedBody canonical len) 2685 2686 | otherwise 2687 = DecodedToken 1 BigNIntNeedHeader 2688 2689-- Binary representation of a big integer is canonical if it's at least 9 bytes 2690-- long (as for smaller values the canonical representation is the same one as 2691-- for Int) and the leading byte is not zero (meaning that it's the smallest 2692-- representation for the number in question). 2693isBigIntRepCanonical :: ByteString -> Bool 2694isBigIntRepCanonical bstr = BS.length bstr > 8 && BS.unsafeHead bstr /= 0x00 2695