1{-# LANGUAGE BangPatterns           #-}
2{-# LANGUAGE CPP                    #-}
3{-# LANGUAGE DeriveDataTypeable     #-}
4{-# LANGUAGE FlexibleContexts       #-}
5{-# LANGUAGE FlexibleInstances      #-}
6{-# LANGUAGE FunctionalDependencies #-}
7{-# LANGUAGE MagicHash              #-}
8{-# LANGUAGE MultiParamTypeClasses  #-}
9{-# LANGUAGE OverloadedStrings      #-}
10{-# LANGUAGE RankNTypes             #-}
11
12#if __GLASGOW_HASKELL__ < 900
13-- Bump up from the default 1.5, otherwise our decoder fast path is no good.
14-- We went over the threshold when we switched to using ST.
15--
16-- However, this flag is not supported on GHC 9.0 and later and eye-balling the
17-- Core suggests that the new inlining heuristics don't require it.
18{-# OPTIONS_GHC -funfolding-keeness-factor=2.0 #-}
19#endif
20
21-- |
22-- Module      : Codec.CBOR.Read
23-- Copyright   : (c) Duncan Coutts 2015-2017
24-- License     : BSD3-style (see LICENSE.txt)
25--
26-- Maintainer  : duncan@community.haskell.org
27-- Stability   : experimental
28-- Portability : non-portable (GHC extensions)
29--
30-- Tools for reading values in a CBOR-encoded format
31-- back into ordinary values.
32--
33module Codec.CBOR.Read
34  ( deserialiseFromBytes         -- :: Decoder a -> ByteString -> Either String (ByteString, a)
35  , deserialiseFromBytesWithSize -- :: Decoder a -> ByteString -> Either String (ByteString, ByteOffset, a)
36  , deserialiseIncremental       -- :: Decoder a -> ST s (IDecode s a)
37  , DeserialiseFailure(..)
38  , IDecode(..)
39  , ByteOffset
40  ) where
41
42#include "cbor.h"
43
44#if !MIN_VERSION_base(4,8,0)
45import           Control.Applicative
46#endif
47import           GHC.Int
48
49import           Control.DeepSeq
50import           Control.Monad (ap)
51import           Control.Monad.ST
52import           Data.Array.IArray
53import           Data.Array.Unboxed
54import qualified Data.Array.Base as A
55import           Data.Monoid
56import           Data.Bits
57import           Data.ByteString                (ByteString)
58import qualified Data.ByteString                as BS
59import qualified Data.ByteString.Unsafe         as BS
60import qualified Data.ByteString.Lazy           as LBS
61import qualified Data.ByteString.Lazy.Internal  as LBS
62import qualified Data.Text          as T
63import qualified Data.Text.Encoding as T
64import           Data.Word
65import           GHC.Word
66#if defined(ARCH_32bit)
67import           GHC.IntWord64
68#endif
69import           GHC.Exts
70import           GHC.Float (float2Double)
71import           Data.Typeable
72import           Control.Exception
73
74-- We do all numeric conversions explicitly to be careful about overflows.
75import           Prelude hiding (fromIntegral)
76
77import qualified Codec.CBOR.ByteArray as BA
78import           Codec.CBOR.Decoding hiding (DecodeAction(Done, Fail))
79import           Codec.CBOR.Decoding (DecodeAction)
80import qualified Codec.CBOR.Decoding as D
81import           Codec.CBOR.Magic
82
83--------------------------------------------------------------------------------
84
85-- | An exception type that may be returned (by pure functions) or
86-- thrown (by IO actions) that fail to deserialise a given input.
87--
88-- @since 0.2.0.0
89data DeserialiseFailure = DeserialiseFailure ByteOffset String
90  deriving (Eq, Show, Typeable)
91
92instance NFData DeserialiseFailure where
93  rnf (DeserialiseFailure offset msg) = rnf offset `seq` rnf msg `seq` ()
94
95instance Exception DeserialiseFailure where
96#if MIN_VERSION_base(4,8,0)
97    displayException (DeserialiseFailure off msg) =
98      "Codec.CBOR: deserialising failed at offset "
99           ++ show off ++ " : " ++ msg
100#endif
101
102-- | An Incremental decoder, used to represent the result of
103-- attempting to run a decoder over a given input, and return a value
104-- of type @a@.
105data IDecode s a
106  = -- | The decoder has consumed the available input and needs more
107    -- to continue. Provide 'Just' if more input is available and
108    -- 'Nothing' otherwise, and you will get a new 'IDecode'.
109    Partial (Maybe BS.ByteString -> ST s (IDecode s a))
110
111    -- | The decoder has successfully finished. Except for the output
112    -- value you also get any unused input as well as the number of
113    -- bytes consumed.
114  | Done !BS.ByteString {-# UNPACK #-} !ByteOffset a
115
116    -- | The decoder ran into an error. The decoder either used
117    -- 'fail' or was not provided enough input. Contains any
118    -- unconsumed input, the number of bytes consumed, and a
119    -- 'DeserialiseFailure' exception describing the reason why the
120    -- failure occurred.
121  | Fail !BS.ByteString {-# UNPACK #-} !ByteOffset DeserialiseFailure
122
123-- | Given a 'Decoder' and some 'LBS.ByteString' representing
124-- an encoded CBOR value, return 'Either' the decoded CBOR value
125-- or an error. In addition to the decoded value return any remaining input
126-- content.
127--
128-- @since 0.2.0.0
129deserialiseFromBytes :: (forall s. Decoder s a)
130                     -> LBS.ByteString
131                     -> Either DeserialiseFailure (LBS.ByteString, a)
132deserialiseFromBytes d lbs =
133    fmap f $ runIDecode (deserialiseIncremental d) lbs
134  where f (rest, _, x) = (rest, x)
135
136-- | Given a 'Decoder' and some 'LBS.ByteString' representing
137-- an encoded CBOR value, return 'Either' the decoded CBOR value
138-- or an error. In addition to the decoded value return any remaining input
139-- content and the number of bytes consumed.
140--
141-- @since 0.2.0.0
142deserialiseFromBytesWithSize :: (forall s. Decoder s a)
143                             -> LBS.ByteString
144                             -> Either DeserialiseFailure (LBS.ByteString, ByteOffset, a)
145deserialiseFromBytesWithSize d lbs =
146    runIDecode (deserialiseIncremental d) lbs
147
148runIDecode :: (forall s. ST s (IDecode s a))
149           -> LBS.ByteString
150           -> Either DeserialiseFailure (LBS.ByteString, ByteOffset, a)
151runIDecode d lbs =
152    runST (go lbs =<< d)
153  where
154    go :: LBS.ByteString
155       -> IDecode s a
156       -> ST s (Either DeserialiseFailure (LBS.ByteString, ByteOffset, a))
157    go  _                  (Fail _ _ err)  = return (Left err)
158    go  lbs'               (Done bs off x) = let rest
159                                                   | BS.null bs = lbs'
160                                                   | otherwise  = LBS.Chunk bs lbs'
161                                             in return (Right (rest, off, x))
162    go  LBS.Empty          (Partial  k)    = k Nothing   >>= go LBS.Empty
163    go (LBS.Chunk bs lbs') (Partial  k)    = k (Just bs) >>= go lbs'
164
165-- | Run a 'Decoder' incrementally, returning a continuation
166-- representing the result of the incremental decode.
167--
168-- @since 0.2.0.0
169deserialiseIncremental :: Decoder s a -> ST s (IDecode s a)
170deserialiseIncremental decoder = do
171    da <- getDecodeAction decoder
172    runIncrementalDecoder (runDecodeAction da)
173
174----------------------------------------------
175-- A monad for building incremental decoders
176--
177
178newtype IncrementalDecoder s a = IncrementalDecoder {
179       unIncrementalDecoder ::
180         forall r. (a -> ST s (IDecode s r)) -> ST s (IDecode s r)
181     }
182
183instance Functor (IncrementalDecoder s) where
184    fmap f a = a >>= return . f
185
186instance Applicative (IncrementalDecoder s) where
187    pure x = IncrementalDecoder $ \k -> k x
188    (<*>) = ap
189
190instance Monad (IncrementalDecoder s) where
191    return = pure
192
193    {-# INLINE (>>=) #-}
194    m >>= f = IncrementalDecoder $ \k ->
195                unIncrementalDecoder m $ \x ->
196                  unIncrementalDecoder (f x) k
197
198runIncrementalDecoder :: IncrementalDecoder s (ByteString, ByteOffset, a)
199                      -> ST s (IDecode s a)
200runIncrementalDecoder (IncrementalDecoder f) =
201  f (\(trailing, off, x) -> return $ Done trailing off x)
202
203decodeFail :: ByteString -> ByteOffset -> String -> IncrementalDecoder s a
204decodeFail trailing off msg = IncrementalDecoder $ \_ -> return $ Fail trailing off exn
205  where exn = DeserialiseFailure off msg
206
207needChunk :: IncrementalDecoder s (Maybe ByteString)
208needChunk = IncrementalDecoder $ \k -> return $ Partial $ \mbs -> k mbs
209
210lift :: ST s a -> IncrementalDecoder s a
211lift action = IncrementalDecoder (\k -> action >>= k)
212
213--------------------------------------------
214-- The main decoder
215--
216
217-- The top level entry point
218runDecodeAction :: DecodeAction s a
219                -> IncrementalDecoder s (ByteString, ByteOffset, a)
220runDecodeAction (D.Fail msg)        = decodeFail BS.empty 0 msg
221runDecodeAction (D.Done x)          = return (BS.empty, 0, x)
222runDecodeAction (D.PeekAvailable k) = lift (k 0#) >>= runDecodeAction
223runDecodeAction da = do
224    mbs <- needChunk
225    case mbs of
226      Nothing -> decodeFail BS.empty 0 "end of input"
227      Just bs -> go_slow da bs 0
228
229-- The decoder is split into a fast path and a slow path. The fast path is
230-- used for a single input chunk. It decodes as far as it can, reading only
231-- whole tokens that fit within the input chunk. When it cannot read any
232-- further it returns control to the slow path. The slow path fixes up all the
233-- complicated corner cases with tokens that span chunk boundaries, gets more
234-- input and then goes back into the fast path.
235--
236-- The idea is that chunks are usually large, and we can use simpler and
237-- faster code if we don't make it deal with the general case of tokens that
238-- span chunk boundaries.
239
240-- These are all the ways in which the fast path can finish, and return
241-- control to the slow path. In particular there are three different cases
242-- of tokens spanning a chunk boundary.
243--
244data SlowPath s a
245   = FastDone                      {-# UNPACK #-} !ByteString a
246   | SlowConsumeTokenBytes         {-# UNPACK #-} !ByteString (ByteString   -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int
247   | SlowConsumeTokenByteArray     {-# UNPACK #-} !ByteString (BA.ByteArray -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int
248   | SlowConsumeTokenString        {-# UNPACK #-} !ByteString (T.Text       -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int
249   | SlowConsumeTokenUtf8ByteArray {-# UNPACK #-} !ByteString (BA.ByteArray -> ST s (DecodeAction s a)) {-# UNPACK #-} !Int
250#if defined(ARCH_32bit)
251   | SlowPeekByteOffset            {-# UNPACK #-} !ByteString (Int64#       -> ST s (DecodeAction s a))
252#else
253   | SlowPeekByteOffset            {-# UNPACK #-} !ByteString (Int#         -> ST s (DecodeAction s a))
254#endif
255   | SlowDecodeAction              {-# UNPACK #-} !ByteString (DecodeAction s a)
256   | SlowFail                      {-# UNPACK #-} !ByteString String
257
258
259-- The main fast path. The fast path itself is actually split into two parts
260-- the main version 'go_fast' and a version used when we are near the end of
261-- the chunk, 'go_fast_end'.
262--
263-- This version can then do fewer tests when we're not near the end of the
264-- chunk, in particular we just check if there's enough input buffer space
265-- left for the largest possible fixed-size token (8+1 bytes).
266--
267go_fast :: ByteString -> DecodeAction s a -> ST s (SlowPath s a)
268
269go_fast !bs da | BS.length bs < 9 = go_fast_end bs da
270
271go_fast !bs da@(ConsumeWord k) =
272    case tryConsumeWord (BS.unsafeHead bs) bs of
273      DecodeFailure           -> go_fast_end bs da
274      DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
275
276go_fast !bs da@(ConsumeWord8 k) =
277    case tryConsumeWord (BS.unsafeHead bs) bs of
278      DecodeFailure           -> go_fast_end bs da
279      DecodedToken sz (W# w#) ->
280        case gtWord# w# 0xff## of
281          0#                  -> k w#  >>= go_fast (BS.unsafeDrop sz bs)
282          _                   -> go_fast_end bs da
283
284go_fast !bs da@(ConsumeWord16 k) =
285    case tryConsumeWord (BS.unsafeHead bs) bs of
286      DecodeFailure           -> go_fast_end bs da
287      DecodedToken sz (W# w#) ->
288        case gtWord# w# 0xffff## of
289          0#                  -> k w#  >>= go_fast (BS.unsafeDrop sz bs)
290          _                   -> go_fast_end bs da
291
292go_fast !bs da@(ConsumeWord32 k) =
293    case tryConsumeWord (BS.unsafeHead bs) bs of
294      DecodeFailure           -> go_fast_end bs da
295      DecodedToken sz (W# w#) ->
296#if defined(ARCH_32bit)
297                                 k w# >>= go_fast (BS.unsafeDrop sz bs)
298#else
299        case gtWord# w# 0xffffffff## of
300          0#                  -> k w# >>= go_fast (BS.unsafeDrop sz bs)
301          _                   -> go_fast_end bs da
302#endif
303
304go_fast !bs da@(ConsumeNegWord k) =
305    case tryConsumeNegWord (BS.unsafeHead bs) bs of
306      DecodeFailure           -> go_fast_end bs da
307      DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
308
309go_fast !bs da@(ConsumeInt k) =
310    case tryConsumeInt (BS.unsafeHead bs) bs of
311      DecodeFailure           -> go_fast_end bs da
312      DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
313
314go_fast !bs da@(ConsumeInt8 k) =
315    case tryConsumeInt (BS.unsafeHead bs) bs of
316      DecodeFailure           -> go_fast_end bs da
317      DecodedToken sz (I# n#) ->
318        case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of
319          0#                  -> k n# >>= go_fast (BS.unsafeDrop sz bs)
320          _                   -> go_fast_end bs da
321
322go_fast !bs da@(ConsumeInt16 k) =
323    case tryConsumeInt (BS.unsafeHead bs) bs of
324      DecodeFailure           -> go_fast_end bs da
325      DecodedToken sz (I# n#) ->
326        case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of
327          0#                  -> k n# >>= go_fast (BS.unsafeDrop sz bs)
328          _                   -> go_fast_end bs da
329
330go_fast !bs da@(ConsumeInt32 k) =
331    case tryConsumeInt (BS.unsafeHead bs) bs of
332      DecodeFailure           -> go_fast_end bs da
333      DecodedToken sz (I# n#) ->
334#if defined(ARCH_32bit)
335                                 k n# >>= go_fast (BS.unsafeDrop sz bs)
336#else
337        case (n# ># 0x7fffffff#) `orI#` (n# <# -0x80000000#) of
338          0#                  -> k n# >>= go_fast (BS.unsafeDrop sz bs)
339          _                   -> go_fast_end bs da
340#endif
341
342go_fast !bs da@(ConsumeListLen k) =
343    case tryConsumeListLen (BS.unsafeHead bs) bs of
344      DecodeFailure           -> go_fast_end bs da
345      DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
346
347go_fast !bs da@(ConsumeMapLen k) =
348    case tryConsumeMapLen (BS.unsafeHead bs) bs of
349      DecodeFailure           -> go_fast_end bs da
350      DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
351
352go_fast !bs da@(ConsumeTag k) =
353    case tryConsumeTag (BS.unsafeHead bs) bs of
354      DecodeFailure           -> go_fast_end bs da
355      DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
356
357go_fast !bs da@(ConsumeWordCanonical k) =
358    case tryConsumeWord (BS.unsafeHead bs) bs of
359      DecodeFailure           -> go_fast_end bs da
360      DecodedToken sz (W# w#)
361        | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
362        | otherwise             -> go_fast_end bs da
363
364go_fast !bs da@(ConsumeWord8Canonical k) =
365    case tryConsumeWord (BS.unsafeHead bs) bs of
366      DecodeFailure           -> go_fast_end bs da
367      DecodedToken sz (W# w#) ->
368        case gtWord# w# 0xff## of
369          0# | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
370          _                          -> go_fast_end bs da
371
372go_fast !bs da@(ConsumeWord16Canonical k) =
373    case tryConsumeWord (BS.unsafeHead bs) bs of
374      DecodeFailure           -> go_fast_end bs da
375      DecodedToken sz (W# w#) ->
376        case gtWord# w# 0xffff## of
377          0# | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
378          _                          -> go_fast_end bs da
379
380go_fast !bs da@(ConsumeWord32Canonical k) =
381    case tryConsumeWord (BS.unsafeHead bs) bs of
382      DecodeFailure           -> go_fast_end bs da
383      DecodedToken sz (W# w#) ->
384        case w_out_of_range w# of
385          0# | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
386          _                          -> go_fast_end bs da
387  where
388    w_out_of_range :: Word# -> Int#
389    w_out_of_range _w# =
390#if defined(ARCH_32bit)
391      0#
392#else
393      gtWord# _w# 0xffffffff##
394#endif
395
396go_fast !bs da@(ConsumeNegWordCanonical k) =
397    case tryConsumeNegWord (BS.unsafeHead bs) bs of
398      DecodeFailure           -> go_fast_end bs da
399      DecodedToken sz (W# w#)
400        | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
401        | otherwise             -> go_fast_end bs da
402
403go_fast !bs da@(ConsumeIntCanonical k) =
404    case tryConsumeInt (BS.unsafeHead bs) bs of
405      DecodeFailure           -> go_fast_end bs da
406      DecodedToken sz (I# n#)
407        | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs)
408        | otherwise            -> go_fast_end bs da
409
410go_fast !bs da@(ConsumeInt8Canonical k) =
411    case tryConsumeInt (BS.unsafeHead bs) bs of
412      DecodeFailure           -> go_fast_end bs da
413      DecodedToken sz (I# n#) ->
414        case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of
415          0# | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs)
416          _                         -> go_fast_end bs da
417
418go_fast !bs da@(ConsumeInt16Canonical k) =
419    case tryConsumeInt (BS.unsafeHead bs) bs of
420      DecodeFailure           -> go_fast_end bs da
421      DecodedToken sz (I# n#) ->
422        case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of
423          0# | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs)
424          _                         -> go_fast_end bs da
425
426go_fast !bs da@(ConsumeInt32Canonical k) =
427    case tryConsumeInt (BS.unsafeHead bs) bs of
428      DecodeFailure           -> go_fast_end bs da
429      DecodedToken sz (I# n#) ->
430        case n_out_of_range n# of
431          0# | isIntCanonical sz n# -> k n# >>= go_fast (BS.unsafeDrop sz bs)
432          _                         -> go_fast_end bs da
433  where
434    n_out_of_range :: Int# -> Int#
435    n_out_of_range _n# =
436#if defined(ARCH_32bit)
437      0#
438#else
439      (_n# ># 0x7fffffff#) `orI#` (_n# <# -0x80000000#)
440#endif
441
442go_fast !bs da@(ConsumeListLenCanonical k) =
443    case tryConsumeListLen (BS.unsafeHead bs) bs of
444      DecodeFailure           -> go_fast_end bs da
445      DecodedToken sz (I# n#)
446          -- List length can't be negative, cast it to Word#.
447        | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
448        | otherwise                         -> go_fast_end bs da
449
450go_fast !bs da@(ConsumeMapLenCanonical k) =
451    case tryConsumeMapLen (BS.unsafeHead bs) bs of
452      DecodeFailure           -> go_fast_end bs da
453      DecodedToken sz (I# n#)
454          -- Map length can't be negative, cast it to Word#.
455        | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
456        | otherwise                         -> go_fast_end bs da
457
458go_fast !bs da@(ConsumeTagCanonical k) =
459    case tryConsumeTag (BS.unsafeHead bs) bs of
460      DecodeFailure           -> go_fast_end bs da
461      DecodedToken sz (W# w#)
462        | isWordCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
463        | otherwise             -> go_fast_end bs da
464
465#if defined(ARCH_32bit)
466go_fast !bs da@(ConsumeWord64 k) =
467  case tryConsumeWord64 (BS.unsafeHead bs) bs of
468    DecodeFailure             -> go_fast_end bs da
469    DecodedToken sz (W64# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
470
471go_fast !bs da@(ConsumeNegWord64 k) =
472  case tryConsumeNegWord64 (BS.unsafeHead bs) bs of
473    DecodeFailure             -> go_fast_end bs da
474    DecodedToken sz (W64# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
475
476go_fast !bs da@(ConsumeInt64 k) =
477  case tryConsumeInt64 (BS.unsafeHead bs) bs of
478    DecodeFailure             -> go_fast_end bs da
479    DecodedToken sz (I64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs)
480
481go_fast !bs da@(ConsumeListLen64 k) =
482  case tryConsumeListLen64 (BS.unsafeHead bs) bs of
483    DecodeFailure             -> go_fast_end bs da
484    DecodedToken sz (I64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs)
485
486go_fast !bs da@(ConsumeMapLen64 k) =
487  case tryConsumeMapLen64 (BS.unsafeHead bs) bs of
488    DecodeFailure             -> go_fast_end bs da
489    DecodedToken sz (I64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs)
490
491go_fast !bs da@(ConsumeTag64 k) =
492  case tryConsumeTag64 (BS.unsafeHead bs) bs of
493    DecodeFailure             -> go_fast_end bs da
494    DecodedToken sz (W64# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
495
496go_fast !bs da@(ConsumeWord64Canonical k) =
497  case tryConsumeWord64 (BS.unsafeHead bs) bs of
498    DecodeFailure             -> go_fast_end bs da
499    DecodedToken sz (W64# w#)
500      | isWord64Canonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
501      | otherwise               -> go_fast_end bs da
502
503go_fast !bs da@(ConsumeNegWord64Canonical k) =
504  case tryConsumeNegWord64 (BS.unsafeHead bs) bs of
505    DecodeFailure             -> go_fast_end bs da
506    DecodedToken sz (W64# w#)
507      | isWord64Canonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
508      | otherwise               -> go_fast_end bs da
509
510go_fast !bs da@(ConsumeInt64Canonical k) =
511  case tryConsumeInt64 (BS.unsafeHead bs) bs of
512    DecodeFailure             -> go_fast_end bs da
513    DecodedToken sz (I64# i#)
514      | isInt64Canonical sz i# -> k i# >>= go_fast (BS.unsafeDrop sz bs)
515      | otherwise              -> go_fast_end bs da
516
517go_fast !bs da@(ConsumeListLen64Canonical k) =
518  case tryConsumeListLen64 (BS.unsafeHead bs) bs of
519    DecodeFailure             -> go_fast_end bs da
520    DecodedToken sz (I64# i#)
521        -- List length can't be negative, cast it to Word64#.
522      | isWord64Canonical sz (int64ToWord64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs)
523      | otherwise                                -> go_fast_end bs da
524
525go_fast !bs da@(ConsumeMapLen64Canonical k) =
526  case tryConsumeMapLen64 (BS.unsafeHead bs) bs of
527    DecodeFailure             -> go_fast_end bs da
528    DecodedToken sz (I64# i#)
529        -- Map length can't be negative, cast it to Word64#.
530      | isWord64Canonical sz (int64ToWord64# i#) -> k i# >>= go_fast (BS.unsafeDrop sz bs)
531      | otherwise                                -> go_fast_end bs da
532
533go_fast !bs da@(ConsumeTag64Canonical k) =
534  case tryConsumeTag64 (BS.unsafeHead bs) bs of
535    DecodeFailure             -> go_fast_end bs da
536    DecodedToken sz (W64# w#)
537      | isWord64Canonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
538      | otherwise               -> go_fast_end bs da
539#endif
540
541go_fast !bs da@(ConsumeInteger k) =
542    case tryConsumeInteger (BS.unsafeHead bs) bs of
543      DecodedToken sz (BigIntToken _ n) -> k n >>= go_fast (BS.unsafeDrop sz bs)
544      _                                 -> go_fast_end bs da
545
546go_fast !bs da@(ConsumeFloat k) =
547    case tryConsumeFloat (BS.unsafeHead bs) bs of
548      DecodeFailure     -> go_fast_end bs da
549      DecodedToken sz (F# f#) -> k f# >>= go_fast (BS.unsafeDrop sz bs)
550
551go_fast !bs da@(ConsumeDouble k) =
552    case tryConsumeDouble (BS.unsafeHead bs) bs of
553      DecodeFailure     -> go_fast_end bs da
554      DecodedToken sz (D# f#) -> k f# >>= go_fast (BS.unsafeDrop sz bs)
555
556go_fast !bs da@(ConsumeBytes k) =
557    case tryConsumeBytes (BS.unsafeHead bs) bs of
558      DecodeFailure                   -> go_fast_end bs da
559      DecodedToken sz (Fits _ bstr)   -> k bstr >>= go_fast (BS.unsafeDrop sz bs)
560      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenBytes
561                                                   (BS.unsafeDrop sz bs) k len
562
563go_fast !bs da@(ConsumeByteArray k) =
564    case tryConsumeBytes (BS.unsafeHead bs) bs of
565      DecodeFailure                 -> go_fast_end bs da
566      DecodedToken sz (Fits _ str)  -> k (BA.fromByteString str) >>= go_fast (BS.unsafeDrop sz bs)
567      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenByteArray
568                                                   (BS.unsafeDrop sz bs) k len
569
570go_fast !bs da@(ConsumeString k) =
571    case tryConsumeString (BS.unsafeHead bs) bs of
572      DecodeFailure                   -> go_fast_end bs da
573      DecodedToken sz (Fits _ str)    -> case T.decodeUtf8' str of
574        Right t -> k t >>= go_fast (BS.unsafeDrop sz bs)
575        Left _e -> return $! SlowFail bs "invalid UTF8"
576      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenString
577                                                   (BS.unsafeDrop sz bs) k len
578
579go_fast !bs da@(ConsumeUtf8ByteArray k) =
580    case tryConsumeString (BS.unsafeHead bs) bs of
581      DecodeFailure                   -> go_fast_end bs da
582      DecodedToken sz (Fits _ str)    -> k (BA.fromByteString str)
583                                         >>= go_fast (BS.unsafeDrop sz bs)
584      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenUtf8ByteArray
585                                                   (BS.unsafeDrop sz bs) k len
586
587go_fast !bs da@(ConsumeBool k) =
588    case tryConsumeBool (BS.unsafeHead bs) of
589      DecodeFailure     -> go_fast_end bs da
590      DecodedToken sz b -> k b >>= go_fast (BS.unsafeDrop sz bs)
591
592go_fast !bs da@(ConsumeSimple k) =
593    case tryConsumeSimple (BS.unsafeHead bs) bs of
594      DecodeFailure           -> go_fast_end bs da
595      DecodedToken sz (W# w#) -> k w# >>= go_fast (BS.unsafeDrop sz bs)
596
597go_fast !bs da@(ConsumeIntegerCanonical k) =
598    case tryConsumeInteger (BS.unsafeHead bs) bs of
599      DecodedToken sz (BigIntToken True n) -> k n >>= go_fast (BS.unsafeDrop sz bs)
600      _                                    -> go_fast_end bs da
601
602
603go_fast !bs da@(ConsumeFloat16Canonical k) =
604    case tryConsumeFloat (BS.unsafeHead bs) bs of
605      DecodeFailure     -> go_fast_end bs da
606      DecodedToken sz f@(F# f#)
607        | isFloat16Canonical sz bs f -> k f# >>= go_fast (BS.unsafeDrop sz bs)
608        | otherwise                  -> go_fast_end bs da
609
610go_fast !bs da@(ConsumeFloatCanonical k) =
611    case tryConsumeFloat (BS.unsafeHead bs) bs of
612      DecodeFailure     -> go_fast_end bs da
613      DecodedToken sz f@(F# f#)
614        | isFloatCanonical sz bs f -> k f# >>= go_fast (BS.unsafeDrop sz bs)
615        | otherwise                -> go_fast_end bs da
616
617go_fast !bs da@(ConsumeDoubleCanonical k) =
618    case tryConsumeDouble (BS.unsafeHead bs) bs of
619      DecodeFailure     -> go_fast_end bs da
620      DecodedToken sz f@(D# f#)
621        | isDoubleCanonical sz bs f -> k f# >>= go_fast (BS.unsafeDrop sz bs)
622        | otherwise                 -> go_fast_end bs da
623
624go_fast !bs da@(ConsumeBytesCanonical k) =
625    case tryConsumeBytes (BS.unsafeHead bs) bs of
626      DecodedToken sz (Fits    True bstr) -> k bstr >>= go_fast (BS.unsafeDrop sz bs)
627      DecodedToken sz (TooLong True len)  ->
628        return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) k len
629      _                                   -> go_fast_end bs da
630
631go_fast !bs da@(ConsumeByteArrayCanonical k) =
632    case tryConsumeBytes (BS.unsafeHead bs) bs of
633      DecodedToken sz (Fits True str)    ->
634        k (BA.fromByteString str) >>= go_fast (BS.unsafeDrop sz bs)
635      DecodedToken sz (TooLong True len) ->
636        return $! SlowConsumeTokenByteArray (BS.unsafeDrop sz bs) k len
637      _                                  -> go_fast_end bs da
638
639go_fast !bs da@(ConsumeStringCanonical k) =
640    case tryConsumeString (BS.unsafeHead bs) bs of
641      DecodedToken sz (Fits True str)    -> case T.decodeUtf8' str of
642        Right t -> k t >>= go_fast (BS.unsafeDrop sz bs)
643        Left _e -> return $! SlowFail bs "invalid UTF8"
644      DecodedToken sz (TooLong True len) ->
645        return $! SlowConsumeTokenString (BS.unsafeDrop sz bs) k len
646      _                                  -> go_fast_end bs da
647
648go_fast !bs da@(ConsumeUtf8ByteArrayCanonical k) =
649    case tryConsumeString (BS.unsafeHead bs) bs of
650      DecodedToken sz (Fits True str)    ->
651        k (BA.fromByteString str) >>= go_fast (BS.unsafeDrop sz bs)
652      DecodedToken sz (TooLong True len) ->
653        return $! SlowConsumeTokenUtf8ByteArray (BS.unsafeDrop sz bs) k len
654      _                                  -> go_fast_end bs da
655
656go_fast !bs da@(ConsumeSimpleCanonical k) =
657    case tryConsumeSimple (BS.unsafeHead bs) bs of
658      DecodeFailure           -> go_fast_end bs da
659      DecodedToken sz (W# w#)
660        | isSimpleCanonical sz w# -> k w# >>= go_fast (BS.unsafeDrop sz bs)
661        | otherwise               -> go_fast_end bs da
662
663go_fast !bs da@(ConsumeBytesIndef k) =
664    case tryConsumeBytesIndef (BS.unsafeHead bs) of
665      DecodeFailure     -> go_fast_end bs da
666      DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs)
667
668go_fast !bs da@(ConsumeStringIndef k) =
669    case tryConsumeStringIndef (BS.unsafeHead bs) of
670      DecodeFailure     -> go_fast_end bs da
671      DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs)
672
673go_fast !bs da@(ConsumeListLenIndef k) =
674    case tryConsumeListLenIndef (BS.unsafeHead bs) of
675      DecodeFailure     -> go_fast_end bs da
676      DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs)
677
678go_fast !bs da@(ConsumeMapLenIndef k) =
679    case tryConsumeMapLenIndef (BS.unsafeHead bs) of
680      DecodeFailure     -> go_fast_end bs da
681      DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs)
682
683go_fast !bs da@(ConsumeNull k) =
684    case tryConsumeNull (BS.unsafeHead bs) of
685      DecodeFailure     -> go_fast_end bs da
686      DecodedToken sz _ -> k >>= go_fast (BS.unsafeDrop sz bs)
687
688go_fast !bs da@(ConsumeListLenOrIndef k) =
689    case tryConsumeListLenOrIndef (BS.unsafeHead bs) bs of
690      DecodeFailure           -> go_fast_end bs da
691      DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
692
693go_fast !bs da@(ConsumeMapLenOrIndef k) =
694    case tryConsumeMapLenOrIndef (BS.unsafeHead bs) bs of
695      DecodeFailure           -> go_fast_end bs da
696      DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)
697
698go_fast !bs (ConsumeBreakOr k) =
699    case tryConsumeBreakOr (BS.unsafeHead bs) of
700      DecodeFailure     -> k False >>= go_fast bs
701      DecodedToken sz _ -> k True >>= go_fast (BS.unsafeDrop sz bs)
702
703go_fast !bs (PeekTokenType k) =
704    let !hdr  = BS.unsafeHead bs
705        !tkty = decodeTokenTypeTable `A.unsafeAt` word8ToInt hdr
706    in k tkty >>= go_fast bs
707
708go_fast !bs (PeekAvailable k) = k (case BS.length bs of I# len# -> len#) >>= go_fast bs
709
710go_fast !bs da@PeekByteOffset{} = go_fast_end bs da
711go_fast !bs da@D.Fail{} = go_fast_end bs da
712go_fast !bs da@D.Done{} = go_fast_end bs da
713
714
715-- This variant of the fast path has to do a few more checks because we're
716-- near the end of the chunk. The guarantee we provide here is that we will
717-- decode any tokens where the whole token fits within the input buffer. So
718-- if we return with input buffer space still unconsumed (and we're not done
719-- or failed) then there's one remaining token that spans the end of the
720-- input chunk (the slow path fixup code relies on this guarantee).
721--
722go_fast_end :: ByteString -> DecodeAction s a -> ST s (SlowPath s a)
723
724-- these three cases don't need any input
725
726go_fast_end !bs (D.Fail msg)      = return $! SlowFail bs msg
727go_fast_end !bs (D.Done x)        = return $! FastDone bs x
728go_fast_end !bs (PeekAvailable k) = k (case BS.length bs of I# len# -> len#) >>= go_fast_end bs
729
730go_fast_end !bs (PeekByteOffset k) = return $! SlowPeekByteOffset bs k
731
732-- the next two cases only need the 1 byte token header
733go_fast_end !bs da | BS.null bs = return $! SlowDecodeAction bs da
734
735go_fast_end !bs (ConsumeBreakOr k) =
736    case tryConsumeBreakOr (BS.unsafeHead bs) of
737      DecodeFailure     -> k False >>= go_fast_end bs
738      DecodedToken sz _ -> k True  >>= go_fast_end (BS.unsafeDrop sz bs)
739
740go_fast_end !bs (PeekTokenType k) =
741    let !hdr  = BS.unsafeHead bs
742        !tkty = decodeTokenTypeTable `A.unsafeAt` word8ToInt hdr
743    in k tkty >>= go_fast_end bs
744
745-- all the remaining cases have to decode the current token
746
747go_fast_end !bs da
748    | let !hdr = BS.unsafeHead bs
749    , BS.length bs < tokenSize hdr
750    = return $! SlowDecodeAction bs da
751
752go_fast_end !bs (ConsumeWord k) =
753    case tryConsumeWord (BS.unsafeHead bs) bs of
754      DecodeFailure           -> return $! SlowFail bs "expected word"
755      DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
756
757go_fast_end !bs (ConsumeWord8 k) =
758    case tryConsumeWord (BS.unsafeHead bs) bs of
759      DecodeFailure           -> return $! SlowFail bs "expected word8"
760      DecodedToken sz (W# w#) ->
761        case gtWord# w# 0xff## of
762          0#                  -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
763          _                   -> return $! SlowFail bs "expected word8"
764
765go_fast_end !bs (ConsumeWord16 k) =
766    case tryConsumeWord (BS.unsafeHead bs) bs of
767      DecodeFailure           -> return $! SlowFail bs "expected word16"
768      DecodedToken sz (W# w#) ->
769        case gtWord# w# 0xffff## of
770          0#                  -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
771          _                   -> return $! SlowFail bs "expected word16"
772
773go_fast_end !bs (ConsumeWord32 k) =
774    case tryConsumeWord (BS.unsafeHead bs) bs of
775      DecodeFailure           -> return $! SlowFail bs "expected word32"
776      DecodedToken sz (W# w#) ->
777#if defined(ARCH_32bit)
778                                 k w# >>= go_fast_end (BS.unsafeDrop sz bs)
779#else
780        case gtWord# w# 0xffffffff## of
781          0#                  -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
782          _                   -> return $! SlowFail bs "expected word32"
783#endif
784
785go_fast_end !bs (ConsumeNegWord k) =
786    case tryConsumeNegWord (BS.unsafeHead bs) bs of
787      DecodeFailure           -> return $! SlowFail bs "expected negative int"
788      DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
789
790go_fast_end !bs (ConsumeInt k) =
791    case tryConsumeInt (BS.unsafeHead bs) bs of
792      DecodeFailure           -> return $! SlowFail bs "expected int"
793      DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
794
795go_fast_end !bs (ConsumeInt8 k) =
796    case tryConsumeInt (BS.unsafeHead bs) bs of
797      DecodeFailure           -> return $! SlowFail bs "expected int8"
798      DecodedToken sz (I# n#) ->
799        case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of
800          0#                  -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
801          _                   -> return $! SlowFail bs "expected int8"
802
803go_fast_end !bs (ConsumeInt16 k) =
804    case tryConsumeInt (BS.unsafeHead bs) bs of
805      DecodeFailure           -> return $! SlowFail bs "expected int16"
806      DecodedToken sz (I# n#) ->
807        case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of
808          0#                  -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
809          _                   -> return $! SlowFail bs "expected int16"
810
811go_fast_end !bs (ConsumeInt32 k) =
812    case tryConsumeInt (BS.unsafeHead bs) bs of
813      DecodeFailure           -> return $! SlowFail bs "expected int32"
814      DecodedToken sz (I# n#) ->
815#if defined(ARCH_32bit)
816                                 k n# >>= go_fast_end (BS.unsafeDrop sz bs)
817#else
818        case (n# ># 0x7fffffff#) `orI#` (n# <# -0x80000000#) of
819          0#                  -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
820          _                   -> return $! SlowFail bs "expected int32"
821#endif
822
823go_fast_end !bs (ConsumeListLen k) =
824    case tryConsumeListLen (BS.unsafeHead bs) bs of
825      DecodeFailure           -> return $! SlowFail bs "expected list len"
826      DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
827
828go_fast_end !bs (ConsumeMapLen k) =
829    case tryConsumeMapLen (BS.unsafeHead bs) bs of
830      DecodeFailure           -> return $! SlowFail bs "expected map len"
831      DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
832
833go_fast_end !bs (ConsumeTag k) =
834    case tryConsumeTag (BS.unsafeHead bs) bs of
835      DecodeFailure           -> return $! SlowFail bs "expected tag"
836      DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
837
838go_fast_end !bs (ConsumeWordCanonical k) =
839    case tryConsumeWord (BS.unsafeHead bs) bs of
840      DecodeFailure           -> return $! SlowFail bs "expected word"
841      DecodedToken sz (W# w#)
842        | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
843        | otherwise             -> return $! SlowFail bs "non-canonical word"
844
845go_fast_end !bs (ConsumeWord8Canonical k) =
846    case tryConsumeWord (BS.unsafeHead bs) bs of
847      DecodeFailure           -> return $! SlowFail bs "expected word8"
848      DecodedToken sz (W# w#) -> case gtWord# w# 0xff## of
849          0# | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
850             | otherwise             -> return $! SlowFail bs "non-canonical word8"
851          _                          -> return $! SlowFail bs "expected word8"
852
853go_fast_end !bs (ConsumeWord16Canonical k) =
854    case tryConsumeWord (BS.unsafeHead bs) bs of
855      DecodeFailure           -> return $! SlowFail bs "expected word16"
856      DecodedToken sz (W# w#) -> case gtWord# w# 0xffff## of
857        0# | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
858           | otherwise             -> return $! SlowFail bs "non-canonical word16"
859        _                          -> return $! SlowFail bs "expected word16"
860
861go_fast_end !bs (ConsumeWord32Canonical k) =
862    case tryConsumeWord (BS.unsafeHead bs) bs of
863      DecodeFailure           -> return $! SlowFail bs "expected word32"
864      DecodedToken sz (W# w#) -> case w_out_of_range w# of
865        0# | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
866           | otherwise             -> return $! SlowFail bs "non-canonical word32"
867        _                          -> return $! SlowFail bs "expected word32"
868  where
869    w_out_of_range :: Word# -> Int#
870    w_out_of_range _w# =
871#if defined(ARCH_32bit)
872      0#
873#else
874      gtWord# _w# 0xffffffff##
875#endif
876
877go_fast_end !bs (ConsumeNegWordCanonical k) =
878    case tryConsumeNegWord (BS.unsafeHead bs) bs of
879      DecodeFailure           -> return $! SlowFail bs "expected negative int"
880      DecodedToken sz (W# w#)
881        | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
882        | otherwise             -> return $! SlowFail bs "non-canonical negative int"
883
884go_fast_end !bs (ConsumeIntCanonical k) =
885    case tryConsumeInt (BS.unsafeHead bs) bs of
886      DecodeFailure           -> return $! SlowFail bs "expected int"
887      DecodedToken sz (I# n#)
888        | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
889        | otherwise            -> return $! SlowFail bs "non-canonical int"
890
891go_fast_end !bs (ConsumeInt8Canonical k) =
892    case tryConsumeInt (BS.unsafeHead bs) bs of
893      DecodeFailure           -> return $! SlowFail bs "expected int8"
894      DecodedToken sz (I# n#) ->
895        case (n# ># 0x7f#) `orI#` (n# <# -0x80#) of
896          0# | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
897             | otherwise            -> return $! SlowFail bs "non-canonical int8"
898          _                         -> return $! SlowFail bs "expected int8"
899
900go_fast_end !bs (ConsumeInt16Canonical k) =
901    case tryConsumeInt (BS.unsafeHead bs) bs of
902      DecodeFailure           -> return $! SlowFail bs "expected int16"
903      DecodedToken sz (I# n#) ->
904        case (n# ># 0x7fff#) `orI#` (n# <# -0x8000#) of
905          0# | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
906             | otherwise            -> return $! SlowFail bs "non-canonical int16"
907          _                         -> return $! SlowFail bs "expected int16"
908
909go_fast_end !bs (ConsumeInt32Canonical k) =
910    case tryConsumeInt (BS.unsafeHead bs) bs of
911      DecodeFailure           -> return $! SlowFail bs "expected int32"
912      DecodedToken sz (I# n#) ->
913        case n_out_of_range n# of
914          0# | isIntCanonical sz n# -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
915             | otherwise            -> return $! SlowFail bs "non-canonical int32"
916          _                         -> return $! SlowFail bs "expected int32"
917  where
918    n_out_of_range :: Int# -> Int#
919    n_out_of_range _n# =
920#if defined(ARCH_32bit)
921      0#
922#else
923      (_n# ># 0x7fffffff#) `orI#` (_n# <# -0x80000000#)
924#endif
925
926go_fast_end !bs (ConsumeListLenCanonical k) =
927    case tryConsumeListLen (BS.unsafeHead bs) bs of
928      DecodeFailure           -> return $! SlowFail bs "expected list len"
929      DecodedToken sz (I# n#)
930          -- List length can't be negative, cast it to Word#.
931        | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
932        | otherwise                         -> return $! SlowFail bs "non-canonical list len"
933
934go_fast_end !bs (ConsumeMapLenCanonical k) =
935    case tryConsumeMapLen (BS.unsafeHead bs) bs of
936      DecodeFailure           -> return $! SlowFail bs "expected map len"
937      DecodedToken sz (I# n#)
938          -- Map length can't be negative, cast it to Word#.
939        | isWordCanonical sz (int2Word# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
940        | otherwise                         -> return $! SlowFail bs "non-canonical map len"
941
942go_fast_end !bs (ConsumeTagCanonical k) =
943    case tryConsumeTag (BS.unsafeHead bs) bs of
944      DecodeFailure           -> return $! SlowFail bs "expected tag"
945      DecodedToken sz (W# w#)
946        | isWordCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
947        | otherwise             -> return $! SlowFail bs "non-canonical tag"
948
949#if defined(ARCH_32bit)
950go_fast_end !bs (ConsumeWord64 k) =
951  case tryConsumeWord64 (BS.unsafeHead bs) bs of
952    DecodeFailure             -> return $! SlowFail bs "expected word64"
953    DecodedToken sz (W64# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
954
955go_fast_end !bs (ConsumeNegWord64 k) =
956  case tryConsumeNegWord64 (BS.unsafeHead bs) bs of
957    DecodeFailure             -> return $! SlowFail bs "expected negative int"
958    DecodedToken sz (W64# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
959
960go_fast_end !bs (ConsumeInt64 k) =
961  case tryConsumeInt64 (BS.unsafeHead bs) bs of
962    DecodeFailure             -> return $! SlowFail bs "expected int64"
963    DecodedToken sz (I64# i#) -> k i# >>= go_fast_end (BS.unsafeDrop sz bs)
964
965go_fast_end !bs (ConsumeListLen64 k) =
966  case tryConsumeListLen64 (BS.unsafeHead bs) bs of
967    DecodeFailure             -> return $! SlowFail bs "expected list len 64"
968    DecodedToken sz (I64# i#) -> k i# >>= go_fast_end (BS.unsafeDrop sz bs)
969
970go_fast_end !bs (ConsumeMapLen64 k) =
971  case tryConsumeMapLen64 (BS.unsafeHead bs) bs of
972    DecodeFailure             -> return $! SlowFail bs "expected map len 64"
973    DecodedToken sz (I64# i#) -> k i# >>= go_fast_end (BS.unsafeDrop sz bs)
974
975go_fast_end !bs (ConsumeTag64 k) =
976  case tryConsumeTag64 (BS.unsafeHead bs) bs of
977    DecodeFailure             -> return $! SlowFail bs "expected tag64"
978    DecodedToken sz (W64# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
979
980go_fast_end !bs (ConsumeWord64Canonical k) =
981  case tryConsumeWord64 (BS.unsafeHead bs) bs of
982    DecodeFailure             -> return $! SlowFail bs "expected word64"
983    DecodedToken sz (W64# w#)
984      | isWord64Canonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
985      | otherwise               -> return $! SlowFail bs "non-canonical word64"
986
987go_fast_end !bs (ConsumeNegWord64Canonical k) =
988  case tryConsumeNegWord64 (BS.unsafeHead bs) bs of
989    DecodeFailure             -> return $! SlowFail bs "expected negative int"
990    DecodedToken sz (W64# w#)
991      | isWord64Canonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
992      | otherwise               -> return $! SlowFail bs "non-canonical negative int"
993
994go_fast_end !bs (ConsumeInt64Canonical k) =
995  case tryConsumeInt64 (BS.unsafeHead bs) bs of
996    DecodeFailure             -> return $! SlowFail bs "expected int64"
997    DecodedToken sz (I64# i#)
998      | isInt64Canonical sz i# -> k i# >>= go_fast_end (BS.unsafeDrop sz bs)
999      | otherwise              -> return $! SlowFail bs "non-canonical int64"
1000
1001go_fast_end !bs (ConsumeListLen64Canonical k) =
1002  case tryConsumeListLen64 (BS.unsafeHead bs) bs of
1003    DecodeFailure             -> return $! SlowFail bs "expected list len 64"
1004    DecodedToken sz (I64# i#)
1005        -- List length can't be negative, cast it to Word64#.
1006      | isWord64Canonical sz (int64ToWord64# i#) ->
1007          k i# >>= go_fast_end (BS.unsafeDrop sz bs)
1008      | otherwise ->
1009          return $! SlowFail bs "non-canonical list len 64"
1010
1011go_fast_end !bs (ConsumeMapLen64Canonical k) =
1012  case tryConsumeMapLen64 (BS.unsafeHead bs) bs of
1013    DecodeFailure             -> return $! SlowFail bs "expected map len 64"
1014    DecodedToken sz (I64# i#)
1015        -- Map length can't be negative, cast it to Word64#.
1016      | isWord64Canonical sz (int64ToWord64# i#) ->
1017          k i# >>= go_fast_end (BS.unsafeDrop sz bs)
1018      | otherwise ->
1019          return $! SlowFail bs "non-canonical map len 64"
1020
1021go_fast_end !bs (ConsumeTag64Canonical k) =
1022  case tryConsumeTag64 (BS.unsafeHead bs) bs of
1023    DecodeFailure             -> return $! SlowFail bs "expected tag64"
1024    DecodedToken sz (W64# w#)
1025      | isWord64Canonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
1026      | otherwise               -> return $! SlowFail bs "non-canonical tag64"
1027
1028#endif
1029
1030go_fast_end !bs (ConsumeInteger k) =
1031    case tryConsumeInteger (BS.unsafeHead bs) bs of
1032      DecodeFailure                         -> return $! SlowFail bs "expected integer"
1033      DecodedToken sz (BigIntToken _ n)     -> k n >>= go_fast_end (BS.unsafeDrop sz bs)
1034      DecodedToken sz (BigUIntNeedBody _ len) -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) (adjustContBigUIntNeedBody k) len
1035      DecodedToken sz (BigNIntNeedBody _ len) -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) (adjustContBigNIntNeedBody k) len
1036      DecodedToken sz  BigUIntNeedHeader    -> return $! SlowDecodeAction      (BS.unsafeDrop sz bs) (adjustContBigUIntNeedHeader k)
1037      DecodedToken sz  BigNIntNeedHeader    -> return $! SlowDecodeAction      (BS.unsafeDrop sz bs) (adjustContBigNIntNeedHeader k)
1038
1039go_fast_end !bs (ConsumeFloat k) =
1040    case tryConsumeFloat (BS.unsafeHead bs) bs of
1041      DecodeFailure     -> return $! SlowFail bs "expected float"
1042      DecodedToken sz (F# f#) -> k f# >>= go_fast_end (BS.unsafeDrop sz bs)
1043
1044go_fast_end !bs (ConsumeDouble k) =
1045    case tryConsumeDouble (BS.unsafeHead bs) bs of
1046      DecodeFailure           -> return $! SlowFail bs "expected double"
1047      DecodedToken sz (D# f#) -> k f# >>= go_fast_end (BS.unsafeDrop sz bs)
1048
1049go_fast_end !bs (ConsumeBytes k) =
1050    case tryConsumeBytes (BS.unsafeHead bs) bs of
1051      DecodeFailure                   -> return $! SlowFail bs "expected bytes"
1052      DecodedToken sz (Fits _ bstr)   -> k bstr >>= go_fast_end (BS.unsafeDrop sz bs)
1053      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenBytes
1054                                                   (BS.unsafeDrop sz bs) k len
1055
1056go_fast_end !bs (ConsumeByteArray k) =
1057    case tryConsumeBytes (BS.unsafeHead bs) bs of
1058      DecodeFailure                   -> return $! SlowFail bs "expected string"
1059      DecodedToken sz (Fits _ str)    -> (k $! BA.fromByteString str)
1060                                         >>= go_fast_end (BS.unsafeDrop sz bs)
1061      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenByteArray
1062                                                   (BS.unsafeDrop sz bs) k len
1063
1064go_fast_end !bs (ConsumeString k) =
1065    case tryConsumeString (BS.unsafeHead bs) bs of
1066      DecodeFailure                   -> return $! SlowFail bs "expected string"
1067      DecodedToken sz (Fits _ str)    -> case T.decodeUtf8' str of
1068        Right t -> k t >>= go_fast_end (BS.unsafeDrop sz bs)
1069        Left _e -> return $! SlowFail bs "invalid UTF8"
1070      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenString
1071                                                   (BS.unsafeDrop sz bs) k len
1072
1073go_fast_end !bs (ConsumeUtf8ByteArray k) =
1074    case tryConsumeString (BS.unsafeHead bs) bs of
1075      DecodeFailure                   -> return $! SlowFail bs "expected string"
1076      DecodedToken sz (Fits _ str)    -> (k $! BA.fromByteString str)
1077                                         >>= go_fast_end (BS.unsafeDrop sz bs)
1078      DecodedToken sz (TooLong _ len) -> return $! SlowConsumeTokenUtf8ByteArray
1079                                                   (BS.unsafeDrop sz bs) k len
1080
1081go_fast_end !bs (ConsumeBool k) =
1082    case tryConsumeBool (BS.unsafeHead bs) of
1083      DecodeFailure     -> return $! SlowFail bs "expected bool"
1084      DecodedToken sz b -> k b >>= go_fast_end (BS.unsafeDrop sz bs)
1085
1086go_fast_end !bs (ConsumeSimple k) =
1087    case tryConsumeSimple (BS.unsafeHead bs) bs of
1088      DecodeFailure           -> return $! SlowFail bs "expected simple"
1089      DecodedToken sz (W# w#) -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
1090
1091go_fast_end !bs (ConsumeIntegerCanonical k) =
1092    case tryConsumeInteger (BS.unsafeHead bs) bs of
1093      DecodeFailure                         -> return $! SlowFail bs "expected integer"
1094      DecodedToken sz (BigIntToken True n)  -> k n >>= go_fast_end (BS.unsafeDrop sz bs)
1095      DecodedToken sz (BigUIntNeedBody True len) -> return $! SlowConsumeTokenBytes
1096        (BS.unsafeDrop sz bs) (adjustContCanonicalBigUIntNeedBody k) len
1097      DecodedToken sz (BigNIntNeedBody True len) -> return $! SlowConsumeTokenBytes
1098        (BS.unsafeDrop sz bs) (adjustContCanonicalBigNIntNeedBody k) len
1099      DecodedToken sz  BigUIntNeedHeader -> return $! SlowDecodeAction
1100        (BS.unsafeDrop sz bs) (adjustContCanonicalBigUIntNeedHeader k)
1101      DecodedToken sz  BigNIntNeedHeader -> return $! SlowDecodeAction
1102        (BS.unsafeDrop sz bs) (adjustContCanonicalBigNIntNeedHeader k)
1103      _ -> return $! SlowFail bs "non-canonical integer"
1104
1105go_fast_end !bs (ConsumeFloat16Canonical k) =
1106    case tryConsumeFloat (BS.unsafeHead bs) bs of
1107      DecodeFailure     -> return $! SlowFail bs "expected float"
1108      DecodedToken sz f@(F# f#)
1109        | isFloat16Canonical sz bs f -> k f# >>= go_fast_end (BS.unsafeDrop sz bs)
1110        | otherwise                  -> return $! SlowFail bs "non-canonical float16"
1111
1112go_fast_end !bs (ConsumeFloatCanonical k) =
1113    case tryConsumeFloat (BS.unsafeHead bs) bs of
1114      DecodeFailure     -> return $! SlowFail bs "expected float"
1115      DecodedToken sz f@(F# f#)
1116        | isFloatCanonical sz bs f -> k f# >>= go_fast_end (BS.unsafeDrop sz bs)
1117        | otherwise                -> return $! SlowFail bs "non-canonical float"
1118
1119go_fast_end !bs (ConsumeDoubleCanonical k) =
1120    case tryConsumeDouble (BS.unsafeHead bs) bs of
1121      DecodeFailure           -> return $! SlowFail bs "expected double"
1122      DecodedToken sz f@(D# f#)
1123        | isDoubleCanonical sz bs f -> k f# >>= go_fast_end (BS.unsafeDrop sz bs)
1124        | otherwise                 -> return $! SlowFail bs "non-canonical double"
1125
1126go_fast_end !bs (ConsumeBytesCanonical k) =
1127    case tryConsumeBytes (BS.unsafeHead bs) bs of
1128      DecodeFailure         -> return $! SlowFail bs "expected bytes"
1129      DecodedToken sz token -> case token of
1130        Fits True bstr   -> k bstr >>= go_fast_end (BS.unsafeDrop sz bs)
1131        TooLong True len -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) k len
1132        _                -> return $! SlowFail bs "non-canonical length prefix"
1133
1134go_fast_end !bs (ConsumeByteArrayCanonical k) =
1135    case tryConsumeBytes (BS.unsafeHead bs) bs of
1136      DecodeFailure         -> return $! SlowFail bs "expected string"
1137      DecodedToken sz token -> case token of
1138        Fits True str    ->
1139          (k $! BA.fromByteString str) >>= go_fast_end (BS.unsafeDrop sz bs)
1140        TooLong True len ->
1141           return $! SlowConsumeTokenByteArray (BS.unsafeDrop sz bs) k len
1142        _                -> return $! SlowFail bs "non-canonical length prefix"
1143
1144go_fast_end !bs (ConsumeStringCanonical k) =
1145    case tryConsumeString (BS.unsafeHead bs) bs of
1146      DecodeFailure         -> return $! SlowFail bs "expected string"
1147      DecodedToken sz token -> case token of
1148        Fits True str    -> case T.decodeUtf8' str of
1149          Right t -> k t >>= go_fast_end (BS.unsafeDrop sz bs)
1150          Left _e -> return $! SlowFail bs "invalid UTF8"
1151        TooLong True len -> return $! SlowConsumeTokenString (BS.unsafeDrop sz bs) k len
1152        _                -> return $! SlowFail bs "non-canonical length prefix"
1153
1154go_fast_end !bs (ConsumeUtf8ByteArrayCanonical k) =
1155    case tryConsumeString (BS.unsafeHead bs) bs of
1156      DecodeFailure                 -> return $! SlowFail bs "expected string"
1157      DecodedToken sz token -> case token of
1158        Fits True str    ->
1159          (k $! BA.fromByteString str) >>= go_fast_end (BS.unsafeDrop sz bs)
1160        TooLong True len ->
1161          return $! SlowConsumeTokenUtf8ByteArray (BS.unsafeDrop sz bs) k len
1162        _                ->
1163          return $! SlowFail bs "non-canonical length prefix"
1164
1165go_fast_end !bs (ConsumeSimpleCanonical k) =
1166    case tryConsumeSimple (BS.unsafeHead bs) bs of
1167      DecodeFailure           -> return $! SlowFail bs "expected simple"
1168      DecodedToken sz (W# w#)
1169        | isSimpleCanonical sz w# -> k w# >>= go_fast_end (BS.unsafeDrop sz bs)
1170        | otherwise               -> return $! SlowFail bs "non-canonical simple"
1171
1172go_fast_end !bs (ConsumeBytesIndef k) =
1173    case tryConsumeBytesIndef (BS.unsafeHead bs) of
1174      DecodeFailure     -> return $! SlowFail bs "expected bytes start"
1175      DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs)
1176
1177go_fast_end !bs (ConsumeStringIndef k) =
1178    case tryConsumeStringIndef (BS.unsafeHead bs) of
1179      DecodeFailure     -> return $! SlowFail bs "expected string start"
1180      DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs)
1181
1182go_fast_end !bs (ConsumeListLenIndef k) =
1183    case tryConsumeListLenIndef (BS.unsafeHead bs) of
1184      DecodeFailure     -> return $! SlowFail bs "expected list start"
1185      DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs)
1186
1187go_fast_end !bs (ConsumeMapLenIndef k) =
1188    case tryConsumeMapLenIndef (BS.unsafeHead bs) of
1189      DecodeFailure     -> return $! SlowFail bs "expected map start"
1190      DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs)
1191
1192go_fast_end !bs (ConsumeNull k) =
1193    case tryConsumeNull (BS.unsafeHead bs) of
1194      DecodeFailure     -> return $! SlowFail bs "expected null"
1195      DecodedToken sz _ -> k >>= go_fast_end (BS.unsafeDrop sz bs)
1196
1197go_fast_end !bs (ConsumeListLenOrIndef k) =
1198    case tryConsumeListLenOrIndef (BS.unsafeHead bs) bs of
1199      DecodeFailure           -> return $! SlowFail bs "expected list len or indef"
1200      DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
1201
1202go_fast_end !bs (ConsumeMapLenOrIndef k) =
1203    case tryConsumeMapLenOrIndef (BS.unsafeHead bs) bs of
1204      DecodeFailure           -> return $! SlowFail bs "expected map len or indef"
1205      DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
1206
1207
1208-- The slow path starts off by running the fast path on the current chunk
1209-- then looking at where it finished, fixing up the chunk boundary issues,
1210-- getting more input and going around again.
1211--
1212-- The offset here is the offset after of all data consumed so far,
1213-- so not including the current chunk.
1214--
1215go_slow :: DecodeAction s a -> ByteString -> ByteOffset
1216        -> IncrementalDecoder s (ByteString, ByteOffset, a)
1217go_slow da bs !offset = do
1218  slowpath <- lift $ go_fast bs da
1219  case slowpath of
1220    FastDone bs' x -> return (bs', offset', x)
1221      where
1222        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1223
1224    SlowConsumeTokenBytes bs' k len -> do
1225      (bstr, bs'') <- getTokenVarLen len bs' offset'
1226      lift (k bstr) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len)
1227      where
1228        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1229
1230    SlowConsumeTokenByteArray bs' k len -> do
1231      (bstr, bs'') <- getTokenVarLen len bs' offset'
1232      let !str = BA.fromByteString bstr
1233      lift (k str) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len)
1234      where
1235        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1236
1237    SlowConsumeTokenString bs' k len -> do
1238      (bstr, bs'') <- getTokenVarLen len bs' offset'
1239      case T.decodeUtf8' bstr of
1240        Right str -> lift (k str) >>= \daz ->
1241                     go_slow daz bs'' (offset' + intToInt64 len)
1242        Left _e   -> decodeFail bs' offset' "invalid UTF8"
1243      where
1244        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1245
1246    SlowConsumeTokenUtf8ByteArray bs' k len -> do
1247      (bstr, bs'') <- getTokenVarLen len bs' offset'
1248      let !str = BA.fromByteString bstr
1249      lift (k str) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len)
1250      where
1251        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1252
1253    -- we didn't have enough input in the buffer
1254    SlowDecodeAction bs' da' | BS.null bs' -> do
1255      -- in this case we're exactly out of input
1256      -- so we can get more input and carry on
1257      mbs <- needChunk
1258      case mbs of
1259        Nothing   -> decodeFail bs' offset' "end of input"
1260        Just bs'' -> go_slow da' bs'' offset'
1261      where
1262        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1263
1264    SlowDecodeAction bs' da' ->
1265      -- of course we should only end up here when we really are out of
1266      -- input, otherwise go_fast_end could have continued
1267      assert (BS.length bs' < tokenSize (BS.head bs')) $
1268      go_slow_fixup da' bs' offset'
1269      where
1270        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1271
1272    SlowPeekByteOffset bs' k ->
1273      lift (k off#) >>= \daz -> go_slow daz bs' offset'
1274      where
1275        !offset'@(I64# off#) = offset + intToInt64 (BS.length bs - BS.length bs')
1276
1277    SlowFail bs' msg -> decodeFail bs' offset' msg
1278      where
1279        !offset' = offset + intToInt64 (BS.length bs - BS.length bs')
1280
1281-- The complicated case is when a token spans a chunk boundary.
1282--
1283-- Our goal is to get enough input so that go_fast_end can consume exactly one
1284-- token without need for further fixups.
1285--
1286go_slow_fixup :: DecodeAction s a -> ByteString -> ByteOffset
1287              -> IncrementalDecoder s (ByteString, ByteOffset, a)
1288go_slow_fixup da !bs !offset = do
1289    let !hdr = BS.head bs
1290        !sz  = tokenSize hdr
1291    mbs <- needChunk
1292    case mbs of
1293      Nothing -> decodeFail bs offset "end of input"
1294
1295      Just bs'
1296          -- We have enough input now, try reading one final token
1297        | BS.length bs + BS.length bs' >= sz
1298       -> go_slow_overlapped da sz bs bs' offset
1299
1300          -- We still don't have enough input, get more
1301        | otherwise
1302       -> go_slow_fixup da (bs <> bs') offset
1303
1304-- We've now got more input, but we have one token that spanned the old and
1305-- new input buffers, so we have to decode that one before carrying on
1306go_slow_overlapped :: DecodeAction s a -> Int -> ByteString -> ByteString
1307                   -> ByteOffset
1308                   -> IncrementalDecoder s (ByteString, ByteOffset, a)
1309go_slow_overlapped da sz bs_cur bs_next !offset =
1310
1311    -- we have:
1312    --   sz            the size of the pending input token
1313    --   bs_cur        the tail end of the previous input buffer
1314    --   bs_next       the next input chunk
1315
1316    -- we know the old buffer is too small, but the combo is enough
1317    assert (BS.length bs_cur < sz) $
1318    assert (BS.length bs_cur + BS.length bs_next >= sz) $
1319
1320    -- we make:
1321    --   bs_tok        a buffer containing only the pending input token
1322    --   bs'           the tail of the next input chunk,
1323    --                   which will become the next input buffer
1324
1325    let bs_tok   = bs_cur <> BS.unsafeTake (sz - BS.length bs_cur) bs_next
1326        bs'      =           BS.unsafeDrop (sz - BS.length bs_cur) bs_next
1327        offset'  = offset + intToInt64 sz in
1328
1329    -- so the token chunk should be exactly the right size
1330    assert (BS.length bs_tok == sz) $
1331    -- and overall we shouldn't loose any input
1332    assert (BS.length bs_cur + BS.length bs_next == sz + BS.length bs') $ do
1333
1334    -- so now we can run the fast path to consume just this one token
1335    slowpath <- lift $ go_fast_end bs_tok da
1336    case slowpath of
1337
1338      -- typically we'll fall out of the fast path having
1339      -- consumed exactly one token, now with no trailing data
1340      SlowDecodeAction bs_empty da' ->
1341        assert (BS.null bs_empty) $
1342        go_slow da' bs' offset'
1343
1344      -- but the other possibilities can happen too
1345      FastDone bs_empty x ->
1346        assert (BS.null bs_empty) $
1347        return (bs', offset', x)
1348
1349      SlowConsumeTokenBytes bs_empty k len ->
1350        assert (BS.null bs_empty) $ do
1351        (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len
1352        lift (k bstr) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len)
1353
1354      SlowConsumeTokenByteArray bs_empty k len ->
1355        assert (BS.null bs_empty) $ do
1356        (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len
1357        let !ba = BA.fromByteString bstr
1358        lift (k ba) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len)
1359
1360      SlowConsumeTokenString bs_empty k len ->
1361        assert (BS.null bs_empty) $ do
1362        (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len
1363        case T.decodeUtf8' bstr of
1364          Right str -> lift (k str) >>= \daz ->
1365                       go_slow daz bs'' (offset' + intToInt64 len)
1366          Left _e   -> decodeFail bs' offset' "invalid UTF8"
1367
1368      SlowConsumeTokenUtf8ByteArray bs_empty k len ->
1369        assert (BS.null bs_empty) $ do
1370        (bstr, bs'') <- getTokenShortOrVarLen bs' offset' len
1371        let !ba = BA.fromByteString bstr
1372        lift (k ba) >>= \daz -> go_slow daz bs'' (offset' + intToInt64 len)
1373
1374      SlowPeekByteOffset bs_empty k ->
1375        assert (BS.null bs_empty) $ do
1376        lift (k off#) >>= \daz -> go_slow daz bs' offset'
1377        where
1378          !(I64# off#) = offset'
1379
1380      SlowFail bs_unconsumed msg ->
1381        decodeFail (bs_unconsumed <> bs') offset'' msg
1382        where
1383          !offset'' = offset + intToInt64 (sz - BS.length bs_unconsumed)
1384  where
1385    {-# INLINE getTokenShortOrVarLen #-}
1386    getTokenShortOrVarLen :: BS.ByteString
1387                          -> ByteOffset
1388                          -> Int
1389                          -> IncrementalDecoder s (ByteString, ByteString)
1390    getTokenShortOrVarLen bs' offset' len
1391      | BS.length bs' < len = getTokenVarLen len bs' offset'
1392      | otherwise           = let !bstr = BS.take len bs'
1393                                  !bs'' = BS.drop len bs'
1394                               in return (bstr, bs'')
1395
1396
1397-- TODO FIXME: we can do slightly better here. If we're returning a
1398-- lazy string (String, lazy Text, lazy ByteString) then we don't have
1399-- to strictify here and if we're returning a strict string perhaps we
1400-- can still stream the utf8 validation/converstion
1401
1402-- TODO FIXME: also consider sharing or not sharing here, and possibly
1403-- rechunking.
1404
1405getTokenVarLen :: Int -> ByteString -> ByteOffset
1406               -> IncrementalDecoder s (ByteString, ByteString)
1407getTokenVarLen len bs offset =
1408    assert (len > BS.length bs) $ do
1409    mbs <- needChunk
1410    case mbs of
1411      Nothing -> decodeFail BS.empty offset "end of input"
1412      Just bs'
1413        | let n = len - BS.length bs
1414        , BS.length bs' >= n ->
1415            let !tok = bs <> BS.unsafeTake n bs'
1416             in return (tok, BS.drop n bs')
1417
1418        | otherwise -> getTokenVarLenSlow
1419                         [bs',bs]
1420                         (len - (BS.length bs + BS.length bs'))
1421                         offset
1422
1423getTokenVarLenSlow :: [ByteString] -> Int -> ByteOffset
1424                   -> IncrementalDecoder s (ByteString, ByteString)
1425getTokenVarLenSlow bss n offset = do
1426    mbs <- needChunk
1427    case mbs of
1428      Nothing -> decodeFail BS.empty offset "end of input"
1429      Just bs
1430        | BS.length bs >= n ->
1431            let !tok = BS.concat (reverse (BS.unsafeTake n bs : bss))
1432             in return (tok, BS.drop n bs)
1433        | otherwise -> getTokenVarLenSlow (bs:bss) (n - BS.length bs) offset
1434
1435
1436
1437tokenSize :: Word8 -> Int
1438tokenSize hdr =
1439    word8ToInt $
1440      decodeTableSz `A.unsafeAt` (word8ToInt hdr .&. 0x1f)
1441
1442decodeTableSz :: UArray Word8 Word8
1443decodeTableSz =
1444  array (0, 0x1f) $
1445      [ (encodeHeader 0 n, 1) | n <- [0..0x1f] ]
1446   ++ [ (encodeHeader 0 n, s) | (n, s) <- zip [24..27] [2,3,5,9] ]
1447
1448decodeTokenTypeTable :: Array Word8 TokenType
1449decodeTokenTypeTable =
1450  array (minBound, maxBound) $
1451    [ (encodeHeader 0 n,  TypeUInt) | n <-  [0..26] ]
1452 ++ [ (encodeHeader 0 27, TypeUInt64)
1453    , (encodeHeader 0 31, TypeInvalid) ]
1454
1455 ++ [ (encodeHeader 1 n,  TypeNInt) | n <-  [0..26] ]
1456 ++ [ (encodeHeader 1 27, TypeNInt64)
1457    , (encodeHeader 1 31, TypeInvalid) ]
1458
1459 ++ [ (encodeHeader 2 n,  TypeBytes) | n <-  [0..27] ]
1460 ++ [ (encodeHeader 2 31, TypeBytesIndef) ]
1461
1462 ++ [ (encodeHeader 3 n,  TypeString) | n <-  [0..27] ]
1463 ++ [ (encodeHeader 3 31, TypeStringIndef) ]
1464
1465 ++ [ (encodeHeader 4 n,  TypeListLen) | n <-  [0..26] ]
1466 ++ [ (encodeHeader 4 27, TypeListLen64)
1467    , (encodeHeader 4 31, TypeListLenIndef) ]
1468
1469 ++ [ (encodeHeader 5 n,  TypeMapLen) | n <-  [0..26] ]
1470 ++ [ (encodeHeader 5 27, TypeMapLen64)
1471    , (encodeHeader 5 31, TypeMapLenIndef) ]
1472
1473 ++ [ (encodeHeader 6 n,  TypeTag) | n <- 0:1:[4..26] ]
1474 ++ [ (encodeHeader 6 2,  TypeInteger)
1475    , (encodeHeader 6 3,  TypeInteger)
1476    , (encodeHeader 6 27, TypeTag64)
1477    , (encodeHeader 6 31, TypeInvalid) ]
1478
1479 ++ [ (encodeHeader 7 n,  TypeSimple) | n <-  [0..19] ]
1480 ++ [ (encodeHeader 7 20, TypeBool)
1481    , (encodeHeader 7 21, TypeBool)
1482    , (encodeHeader 7 22, TypeNull)
1483    , (encodeHeader 7 23, TypeSimple)
1484    , (encodeHeader 7 24, TypeSimple)
1485    , (encodeHeader 7 25, TypeFloat16)
1486    , (encodeHeader 7 26, TypeFloat32)
1487    , (encodeHeader 7 27, TypeFloat64)
1488    , (encodeHeader 7 31, TypeBreak) ]
1489
1490 ++ [ (encodeHeader mt n, TypeInvalid) | mt <- [0..7], n <- [28..30] ]
1491
1492encodeHeader :: Word8 -> Word8 -> Word8
1493encodeHeader mt ai = mt `shiftL` 5 .|. ai
1494
1495data DecodedToken a = DecodedToken !Int !a | DecodeFailure
1496  deriving Show
1497-- TODO add classification for DecodeFailure
1498
1499-- | Note that canonicity information is calculated lazily. This way we don't
1500-- need to concern ourselves with two distinct paths, while according to
1501-- benchmarks it doesn't affect performance in the non-canonical case.
1502data LongToken a = Fits Bool {- canonical? -} !a
1503                 | TooLong Bool {- canonical? -} !Int
1504  deriving Show
1505
1506-- Canoncal NaN floats:
1507--
1508-- In these float/double canonical tests we check NaNs are canonical too.
1509-- There are lots of bit values representing NaN, for each of the flat types.
1510-- The rule from CBOR RFC 7049, section 3.9 is that the canonical NaN is the
1511-- CBOR term f97e00 which is the canonical half-float representation. We do
1512-- this by testing for the size being 3 (since tryConsumeFloat/Double only
1513-- return 3 when the header byte is 0xf9) and the 16 bytes being 0x7e00.
1514
1515{-# INLINE isFloat16Canonical #-}
1516isFloat16Canonical :: Int -> BS.ByteString -> Float -> Bool
1517isFloat16Canonical sz bs f
1518  | sz /= 3   = False
1519  | isNaN f   = eatTailWord16 bs == 0x7e00
1520  | otherwise = True
1521
1522{-# INLINE isFloatCanonical #-}
1523isFloatCanonical :: Int -> BS.ByteString -> Float -> Bool
1524isFloatCanonical sz bs f
1525  | isNaN f   = sz == 3 && eatTailWord16 bs == 0x7e00
1526  | otherwise = sz == 5
1527
1528{-# INLINE isDoubleCanonical #-}
1529isDoubleCanonical :: Int -> BS.ByteString -> Double -> Bool
1530isDoubleCanonical sz bs f
1531  | isNaN f   = sz == 3 && eatTailWord16 bs == 0x7e00
1532  | otherwise = sz == 9
1533
1534{-# INLINE isWordCanonical #-}
1535isWordCanonical :: Int -> Word# -> Bool
1536isWordCanonical sz w#
1537  | sz == 2   = isTrue# (w# `gtWord#` 0x17##)
1538  | sz == 3   = isTrue# (w# `gtWord#` 0xff##)
1539  | sz == 5   = isTrue# (w# `gtWord#` 0xffff##)
1540  | sz == 9   = isTrue# (w# `gtWord#` 0xffffffff##)
1541  | otherwise = True
1542
1543{-# INLINE isIntCanonical #-}
1544isIntCanonical :: Int -> Int# -> Bool
1545isIntCanonical sz i#
1546  | isTrue# (i# <# 0#) = isWordCanonical sz (not# w#)
1547  | otherwise          = isWordCanonical sz       w#
1548  where
1549    w# = int2Word# i#
1550
1551#if defined(ARCH_32bit)
1552{-# INLINE isWord64Canonical #-}
1553isWord64Canonical :: Int -> Word64# -> Bool
1554isWord64Canonical sz w#
1555  | sz == 2   = isTrue# (w# `gtWord64#` wordToWord64# 0x17##)
1556  | sz == 3   = isTrue# (w# `gtWord64#` wordToWord64# 0xff##)
1557  | sz == 5   = isTrue# (w# `gtWord64#` wordToWord64# 0xffff##)
1558  | sz == 9   = isTrue# (w# `gtWord64#` wordToWord64# 0xffffffff##)
1559  | otherwise = True
1560
1561{-# INLINE isInt64Canonical #-}
1562isInt64Canonical :: Int -> Int64# -> Bool
1563isInt64Canonical sz i#
1564  | isTrue# (i# `ltInt64#` intToInt64# 0#) = isWord64Canonical sz (not64# w#)
1565  | otherwise                              = isWord64Canonical sz         w#
1566  where
1567    w# = int64ToWord64# i#
1568#endif
1569
1570{-# INLINE isSimpleCanonical #-}
1571isSimpleCanonical :: Int -> Word# -> Bool
1572isSimpleCanonical 2 w# = isTrue# (w# `gtWord#` 0x17##)
1573isSimpleCanonical _ _  = True -- only size 1 and 2 are possible here
1574
1575
1576-- TODO FIXME: check with 7.10 and file ticket:
1577-- a case analysis against 0x00 .. 0xff :: Word8 turns into a huge chain
1578-- of >= tests. It could use a jump table, or at least it could use a binary
1579-- division. Whereas for Int or Word it does the right thing.
1580
1581{-# INLINE tryConsumeWord #-}
1582tryConsumeWord :: Word8 -> ByteString -> DecodedToken Word
1583tryConsumeWord hdr !bs = case word8ToWord hdr of
1584  -- Positive integers (type 0)
1585  0x00 -> DecodedToken 1 0
1586  0x01 -> DecodedToken 1 1
1587  0x02 -> DecodedToken 1 2
1588  0x03 -> DecodedToken 1 3
1589  0x04 -> DecodedToken 1 4
1590  0x05 -> DecodedToken 1 5
1591  0x06 -> DecodedToken 1 6
1592  0x07 -> DecodedToken 1 7
1593  0x08 -> DecodedToken 1 8
1594  0x09 -> DecodedToken 1 9
1595  0x0a -> DecodedToken 1 10
1596  0x0b -> DecodedToken 1 11
1597  0x0c -> DecodedToken 1 12
1598  0x0d -> DecodedToken 1 13
1599  0x0e -> DecodedToken 1 14
1600  0x0f -> DecodedToken 1 15
1601  0x10 -> DecodedToken 1 16
1602  0x11 -> DecodedToken 1 17
1603  0x12 -> DecodedToken 1 18
1604  0x13 -> DecodedToken 1 19
1605  0x14 -> DecodedToken 1 20
1606  0x15 -> DecodedToken 1 21
1607  0x16 -> DecodedToken 1 22
1608  0x17 -> DecodedToken 1 23
1609  0x18 -> DecodedToken 2 $! word8ToWord  (eatTailWord8 bs)
1610  0x19 -> DecodedToken 3 $! word16ToWord (eatTailWord16 bs)
1611  0x1a -> DecodedToken 5 $! word32ToWord (eatTailWord32 bs)
1612#if defined(ARCH_64bit)
1613  0x1b -> DecodedToken 9 $! word64ToWord (eatTailWord64 bs)
1614#else
1615  0x1b -> case word64ToWord (eatTailWord64 bs) of
1616            Just n  -> DecodedToken 9 n
1617            Nothing -> DecodeFailure
1618#endif
1619  _    -> DecodeFailure
1620
1621
1622{-# INLINE tryConsumeNegWord #-}
1623tryConsumeNegWord :: Word8 -> ByteString -> DecodedToken Word
1624tryConsumeNegWord hdr !bs = case word8ToWord hdr of
1625  -- Positive integers (type 0)
1626  0x20 -> DecodedToken 1 0
1627  0x21 -> DecodedToken 1 1
1628  0x22 -> DecodedToken 1 2
1629  0x23 -> DecodedToken 1 3
1630  0x24 -> DecodedToken 1 4
1631  0x25 -> DecodedToken 1 5
1632  0x26 -> DecodedToken 1 6
1633  0x27 -> DecodedToken 1 7
1634  0x28 -> DecodedToken 1 8
1635  0x29 -> DecodedToken 1 9
1636  0x2a -> DecodedToken 1 10
1637  0x2b -> DecodedToken 1 11
1638  0x2c -> DecodedToken 1 12
1639  0x2d -> DecodedToken 1 13
1640  0x2e -> DecodedToken 1 14
1641  0x2f -> DecodedToken 1 15
1642  0x30 -> DecodedToken 1 16
1643  0x31 -> DecodedToken 1 17
1644  0x32 -> DecodedToken 1 18
1645  0x33 -> DecodedToken 1 19
1646  0x34 -> DecodedToken 1 20
1647  0x35 -> DecodedToken 1 21
1648  0x36 -> DecodedToken 1 22
1649  0x37 -> DecodedToken 1 23
1650  0x38 -> DecodedToken 2 $! (word8ToWord  (eatTailWord8 bs))
1651  0x39 -> DecodedToken 3 $! (word16ToWord (eatTailWord16 bs))
1652  0x3a -> DecodedToken 5 $! (word32ToWord (eatTailWord32 bs))
1653#if defined(ARCH_64bit)
1654  0x3b -> DecodedToken 9 $! (word64ToWord (eatTailWord64 bs))
1655#else
1656  0x3b -> case word64ToWord (eatTailWord64 bs) of
1657            Just n  -> DecodedToken 9 n
1658            Nothing -> DecodeFailure
1659#endif
1660  _    -> DecodeFailure
1661
1662
1663{-# INLINE tryConsumeInt #-}
1664tryConsumeInt :: Word8 -> ByteString -> DecodedToken Int
1665tryConsumeInt hdr !bs = case word8ToWord hdr of
1666  -- Positive integers (type 0)
1667  0x00 -> DecodedToken 1 0
1668  0x01 -> DecodedToken 1 1
1669  0x02 -> DecodedToken 1 2
1670  0x03 -> DecodedToken 1 3
1671  0x04 -> DecodedToken 1 4
1672  0x05 -> DecodedToken 1 5
1673  0x06 -> DecodedToken 1 6
1674  0x07 -> DecodedToken 1 7
1675  0x08 -> DecodedToken 1 8
1676  0x09 -> DecodedToken 1 9
1677  0x0a -> DecodedToken 1 10
1678  0x0b -> DecodedToken 1 11
1679  0x0c -> DecodedToken 1 12
1680  0x0d -> DecodedToken 1 13
1681  0x0e -> DecodedToken 1 14
1682  0x0f -> DecodedToken 1 15
1683  0x10 -> DecodedToken 1 16
1684  0x11 -> DecodedToken 1 17
1685  0x12 -> DecodedToken 1 18
1686  0x13 -> DecodedToken 1 19
1687  0x14 -> DecodedToken 1 20
1688  0x15 -> DecodedToken 1 21
1689  0x16 -> DecodedToken 1 22
1690  0x17 -> DecodedToken 1 23
1691  0x18 -> DecodedToken 2 $! (word8ToInt  (eatTailWord8 bs))
1692  0x19 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs))
1693#if defined(ARCH_64bit)
1694  0x1a -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs))
1695#else
1696  0x1a -> case word32ToInt (eatTailWord32 bs) of
1697            Just n  -> DecodedToken 5 n
1698            Nothing -> DecodeFailure
1699#endif
1700  0x1b -> case word64ToInt (eatTailWord64 bs) of
1701            Just n  -> DecodedToken 9 n
1702            Nothing -> DecodeFailure
1703
1704  -- Negative integers (type 1)
1705  0x20 -> DecodedToken 1 (-1)
1706  0x21 -> DecodedToken 1 (-2)
1707  0x22 -> DecodedToken 1 (-3)
1708  0x23 -> DecodedToken 1 (-4)
1709  0x24 -> DecodedToken 1 (-5)
1710  0x25 -> DecodedToken 1 (-6)
1711  0x26 -> DecodedToken 1 (-7)
1712  0x27 -> DecodedToken 1 (-8)
1713  0x28 -> DecodedToken 1 (-9)
1714  0x29 -> DecodedToken 1 (-10)
1715  0x2a -> DecodedToken 1 (-11)
1716  0x2b -> DecodedToken 1 (-12)
1717  0x2c -> DecodedToken 1 (-13)
1718  0x2d -> DecodedToken 1 (-14)
1719  0x2e -> DecodedToken 1 (-15)
1720  0x2f -> DecodedToken 1 (-16)
1721  0x30 -> DecodedToken 1 (-17)
1722  0x31 -> DecodedToken 1 (-18)
1723  0x32 -> DecodedToken 1 (-19)
1724  0x33 -> DecodedToken 1 (-20)
1725  0x34 -> DecodedToken 1 (-21)
1726  0x35 -> DecodedToken 1 (-22)
1727  0x36 -> DecodedToken 1 (-23)
1728  0x37 -> DecodedToken 1 (-24)
1729  0x38 -> DecodedToken 2 $! (-1 - word8ToInt  (eatTailWord8 bs))
1730  0x39 -> DecodedToken 3 $! (-1 - word16ToInt (eatTailWord16 bs))
1731#if defined(ARCH_64bit)
1732  0x3a -> DecodedToken 5 $! (-1 - word32ToInt (eatTailWord32 bs))
1733#else
1734  0x3a -> case word32ToInt (eatTailWord32 bs) of
1735            Just n  -> DecodedToken 5 (-1 - n)
1736            Nothing -> DecodeFailure
1737#endif
1738  0x3b -> case word64ToInt (eatTailWord64 bs) of
1739            Just n  -> DecodedToken 9 (-1 - n)
1740            Nothing -> DecodeFailure
1741  _    -> DecodeFailure
1742
1743
1744{-# INLINE tryConsumeInteger #-}
1745tryConsumeInteger :: Word8 -> ByteString -> DecodedToken (BigIntToken Integer)
1746tryConsumeInteger hdr !bs = case word8ToWord hdr of
1747
1748  -- Positive integers (type 0)
1749  0x00 -> DecodedToken 1 (BigIntToken True 0)
1750  0x01 -> DecodedToken 1 (BigIntToken True 1)
1751  0x02 -> DecodedToken 1 (BigIntToken True 2)
1752  0x03 -> DecodedToken 1 (BigIntToken True 3)
1753  0x04 -> DecodedToken 1 (BigIntToken True 4)
1754  0x05 -> DecodedToken 1 (BigIntToken True 5)
1755  0x06 -> DecodedToken 1 (BigIntToken True 6)
1756  0x07 -> DecodedToken 1 (BigIntToken True 7)
1757  0x08 -> DecodedToken 1 (BigIntToken True 8)
1758  0x09 -> DecodedToken 1 (BigIntToken True 9)
1759  0x0a -> DecodedToken 1 (BigIntToken True 10)
1760  0x0b -> DecodedToken 1 (BigIntToken True 11)
1761  0x0c -> DecodedToken 1 (BigIntToken True 12)
1762  0x0d -> DecodedToken 1 (BigIntToken True 13)
1763  0x0e -> DecodedToken 1 (BigIntToken True 14)
1764  0x0f -> DecodedToken 1 (BigIntToken True 15)
1765  0x10 -> DecodedToken 1 (BigIntToken True 16)
1766  0x11 -> DecodedToken 1 (BigIntToken True 17)
1767  0x12 -> DecodedToken 1 (BigIntToken True 18)
1768  0x13 -> DecodedToken 1 (BigIntToken True 19)
1769  0x14 -> DecodedToken 1 (BigIntToken True 20)
1770  0x15 -> DecodedToken 1 (BigIntToken True 21)
1771  0x16 -> DecodedToken 1 (BigIntToken True 22)
1772  0x17 -> DecodedToken 1 (BigIntToken True 23)
1773
1774  0x18 -> let !w@(W8# w#) = eatTailWord8 bs
1775              sz = 2
1776          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! toInteger w)
1777  0x19 -> let !w@(W16# w#) = eatTailWord16 bs
1778              sz = 3
1779          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! toInteger w)
1780  0x1a -> let !w@(W32# w#) = eatTailWord32 bs
1781              sz = 5
1782          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! toInteger w)
1783  0x1b -> let !w@(W64# w#) = eatTailWord64 bs
1784              sz = 9
1785#if defined(ARCH_32bit)
1786          in DecodedToken sz (BigIntToken (isWord64Canonical sz w#) $! toInteger w)
1787#else
1788          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! toInteger w)
1789#endif
1790
1791  -- Negative integers (type 1)
1792  0x20 -> DecodedToken 1 (BigIntToken True (-1))
1793  0x21 -> DecodedToken 1 (BigIntToken True (-2))
1794  0x22 -> DecodedToken 1 (BigIntToken True (-3))
1795  0x23 -> DecodedToken 1 (BigIntToken True (-4))
1796  0x24 -> DecodedToken 1 (BigIntToken True (-5))
1797  0x25 -> DecodedToken 1 (BigIntToken True (-6))
1798  0x26 -> DecodedToken 1 (BigIntToken True (-7))
1799  0x27 -> DecodedToken 1 (BigIntToken True (-8))
1800  0x28 -> DecodedToken 1 (BigIntToken True (-9))
1801  0x29 -> DecodedToken 1 (BigIntToken True (-10))
1802  0x2a -> DecodedToken 1 (BigIntToken True (-11))
1803  0x2b -> DecodedToken 1 (BigIntToken True (-12))
1804  0x2c -> DecodedToken 1 (BigIntToken True (-13))
1805  0x2d -> DecodedToken 1 (BigIntToken True (-14))
1806  0x2e -> DecodedToken 1 (BigIntToken True (-15))
1807  0x2f -> DecodedToken 1 (BigIntToken True (-16))
1808  0x30 -> DecodedToken 1 (BigIntToken True (-17))
1809  0x31 -> DecodedToken 1 (BigIntToken True (-18))
1810  0x32 -> DecodedToken 1 (BigIntToken True (-19))
1811  0x33 -> DecodedToken 1 (BigIntToken True (-20))
1812  0x34 -> DecodedToken 1 (BigIntToken True (-21))
1813  0x35 -> DecodedToken 1 (BigIntToken True (-22))
1814  0x36 -> DecodedToken 1 (BigIntToken True (-23))
1815  0x37 -> DecodedToken 1 (BigIntToken True (-24))
1816  0x38 -> let !w@(W8# w#) = eatTailWord8 bs
1817              sz = 2
1818          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! (-1 - toInteger w))
1819  0x39 -> let !w@(W16# w#) = eatTailWord16 bs
1820              sz = 3
1821          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! (-1 - toInteger w))
1822  0x3a -> let !w@(W32# w#) = eatTailWord32 bs
1823              sz = 5
1824          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! (-1 - toInteger w))
1825  0x3b -> let !w@(W64# w#) = eatTailWord64 bs
1826              sz = 9
1827#if defined(ARCH_32bit)
1828          in DecodedToken sz (BigIntToken (isWord64Canonical sz w#) $! (-1 - toInteger w))
1829#else
1830          in DecodedToken sz (BigIntToken (isWordCanonical sz w#)   $! (-1 - toInteger w))
1831#endif
1832
1833  0xc2 -> readBigUInt bs
1834  0xc3 -> readBigNInt bs
1835
1836  _    -> DecodeFailure
1837
1838
1839{-# INLINE tryConsumeBytes #-}
1840tryConsumeBytes :: Word8 -> ByteString -> DecodedToken (LongToken ByteString)
1841tryConsumeBytes hdr !bs = case word8ToWord hdr of
1842
1843  -- Bytes (type 2)
1844  0x40 -> readBytesSmall 0 bs
1845  0x41 -> readBytesSmall 1 bs
1846  0x42 -> readBytesSmall 2 bs
1847  0x43 -> readBytesSmall 3 bs
1848  0x44 -> readBytesSmall 4 bs
1849  0x45 -> readBytesSmall 5 bs
1850  0x46 -> readBytesSmall 6 bs
1851  0x47 -> readBytesSmall 7 bs
1852  0x48 -> readBytesSmall 8 bs
1853  0x49 -> readBytesSmall 9 bs
1854  0x4a -> readBytesSmall 10 bs
1855  0x4b -> readBytesSmall 11 bs
1856  0x4c -> readBytesSmall 12 bs
1857  0x4d -> readBytesSmall 13 bs
1858  0x4e -> readBytesSmall 14 bs
1859  0x4f -> readBytesSmall 15 bs
1860  0x50 -> readBytesSmall 16 bs
1861  0x51 -> readBytesSmall 17 bs
1862  0x52 -> readBytesSmall 18 bs
1863  0x53 -> readBytesSmall 19 bs
1864  0x54 -> readBytesSmall 20 bs
1865  0x55 -> readBytesSmall 21 bs
1866  0x56 -> readBytesSmall 22 bs
1867  0x57 -> readBytesSmall 23 bs
1868  0x58 -> readBytes8  bs
1869  0x59 -> readBytes16 bs
1870  0x5a -> readBytes32 bs
1871  0x5b -> readBytes64 bs
1872  _    -> DecodeFailure
1873
1874
1875{-# INLINE tryConsumeString #-}
1876tryConsumeString :: Word8 -> ByteString -> DecodedToken (LongToken ByteString)
1877tryConsumeString hdr !bs = case word8ToWord hdr of
1878
1879  -- Strings (type 3)
1880  0x60 -> readBytesSmall 0 bs
1881  0x61 -> readBytesSmall 1 bs
1882  0x62 -> readBytesSmall 2 bs
1883  0x63 -> readBytesSmall 3 bs
1884  0x64 -> readBytesSmall 4 bs
1885  0x65 -> readBytesSmall 5 bs
1886  0x66 -> readBytesSmall 6 bs
1887  0x67 -> readBytesSmall 7 bs
1888  0x68 -> readBytesSmall 8 bs
1889  0x69 -> readBytesSmall 9 bs
1890  0x6a -> readBytesSmall 10 bs
1891  0x6b -> readBytesSmall 11 bs
1892  0x6c -> readBytesSmall 12 bs
1893  0x6d -> readBytesSmall 13 bs
1894  0x6e -> readBytesSmall 14 bs
1895  0x6f -> readBytesSmall 15 bs
1896  0x70 -> readBytesSmall 16 bs
1897  0x71 -> readBytesSmall 17 bs
1898  0x72 -> readBytesSmall 18 bs
1899  0x73 -> readBytesSmall 19 bs
1900  0x74 -> readBytesSmall 20 bs
1901  0x75 -> readBytesSmall 21 bs
1902  0x76 -> readBytesSmall 22 bs
1903  0x77 -> readBytesSmall 23 bs
1904  0x78 -> readBytes8  bs
1905  0x79 -> readBytes16 bs
1906  0x7a -> readBytes32 bs
1907  0x7b -> readBytes64 bs
1908  _    -> DecodeFailure
1909
1910
1911{-# INLINE tryConsumeListLen #-}
1912tryConsumeListLen :: Word8 -> ByteString -> DecodedToken Int
1913tryConsumeListLen hdr !bs = case word8ToWord hdr of
1914  -- List structures (type 4)
1915  0x80 -> DecodedToken 1 0
1916  0x81 -> DecodedToken 1 1
1917  0x82 -> DecodedToken 1 2
1918  0x83 -> DecodedToken 1 3
1919  0x84 -> DecodedToken 1 4
1920  0x85 -> DecodedToken 1 5
1921  0x86 -> DecodedToken 1 6
1922  0x87 -> DecodedToken 1 7
1923  0x88 -> DecodedToken 1 8
1924  0x89 -> DecodedToken 1 9
1925  0x8a -> DecodedToken 1 10
1926  0x8b -> DecodedToken 1 11
1927  0x8c -> DecodedToken 1 12
1928  0x8d -> DecodedToken 1 13
1929  0x8e -> DecodedToken 1 14
1930  0x8f -> DecodedToken 1 15
1931  0x90 -> DecodedToken 1 16
1932  0x91 -> DecodedToken 1 17
1933  0x92 -> DecodedToken 1 18
1934  0x93 -> DecodedToken 1 19
1935  0x94 -> DecodedToken 1 20
1936  0x95 -> DecodedToken 1 21
1937  0x96 -> DecodedToken 1 22
1938  0x97 -> DecodedToken 1 23
1939  0x98 -> DecodedToken 2 (word8ToInt  (eatTailWord8 bs))
1940  0x99 -> DecodedToken 3 (word16ToInt (eatTailWord16 bs))
1941#if defined(ARCH_64bit)
1942  0x9a -> DecodedToken 5 (word32ToInt (eatTailWord32 bs))
1943#else
1944  0x9a -> case word32ToInt (eatTailWord32 bs) of
1945            Just n  -> DecodedToken 5 n
1946            Nothing -> DecodeFailure
1947#endif
1948  0x9b -> case word64ToInt (eatTailWord64 bs) of
1949            Just n  -> DecodedToken 9 n
1950            Nothing -> DecodeFailure
1951  _    -> DecodeFailure
1952
1953
1954{-# INLINE tryConsumeMapLen #-}
1955tryConsumeMapLen :: Word8 -> ByteString -> DecodedToken Int
1956tryConsumeMapLen hdr !bs = case word8ToWord hdr of
1957  -- Map structures (type 5)
1958  0xa0 -> DecodedToken 1 0
1959  0xa1 -> DecodedToken 1 1
1960  0xa2 -> DecodedToken 1 2
1961  0xa3 -> DecodedToken 1 3
1962  0xa4 -> DecodedToken 1 4
1963  0xa5 -> DecodedToken 1 5
1964  0xa6 -> DecodedToken 1 6
1965  0xa7 -> DecodedToken 1 7
1966  0xa8 -> DecodedToken 1 8
1967  0xa9 -> DecodedToken 1 9
1968  0xaa -> DecodedToken 1 10
1969  0xab -> DecodedToken 1 11
1970  0xac -> DecodedToken 1 12
1971  0xad -> DecodedToken 1 13
1972  0xae -> DecodedToken 1 14
1973  0xaf -> DecodedToken 1 15
1974  0xb0 -> DecodedToken 1 16
1975  0xb1 -> DecodedToken 1 17
1976  0xb2 -> DecodedToken 1 18
1977  0xb3 -> DecodedToken 1 19
1978  0xb4 -> DecodedToken 1 20
1979  0xb5 -> DecodedToken 1 21
1980  0xb6 -> DecodedToken 1 22
1981  0xb7 -> DecodedToken 1 23
1982  0xb8 -> DecodedToken 2 $! (word8ToInt  (eatTailWord8 bs))
1983  0xb9 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs))
1984#if defined(ARCH_64bit)
1985  0xba -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs))
1986#else
1987  0xba -> case word32ToInt (eatTailWord32 bs) of
1988            Just n  -> DecodedToken 5 n
1989            Nothing -> DecodeFailure
1990#endif
1991  0xbb -> case word64ToInt (eatTailWord64 bs) of
1992            Just n  -> DecodedToken 9 n
1993            Nothing -> DecodeFailure
1994  _    -> DecodeFailure
1995
1996
1997{-# INLINE tryConsumeListLenIndef #-}
1998tryConsumeListLenIndef :: Word8 -> DecodedToken ()
1999tryConsumeListLenIndef hdr = case word8ToWord hdr of
2000  0x9f -> DecodedToken 1 ()
2001  _    -> DecodeFailure
2002
2003
2004{-# INLINE tryConsumeMapLenIndef #-}
2005tryConsumeMapLenIndef :: Word8 -> DecodedToken ()
2006tryConsumeMapLenIndef hdr = case word8ToWord hdr of
2007  0xbf -> DecodedToken 1 ()
2008  _    -> DecodeFailure
2009
2010
2011{-# INLINE tryConsumeListLenOrIndef #-}
2012tryConsumeListLenOrIndef :: Word8 -> ByteString -> DecodedToken Int
2013tryConsumeListLenOrIndef hdr !bs = case word8ToWord hdr of
2014
2015  -- List structures (type 4)
2016  0x80 -> DecodedToken 1 0
2017  0x81 -> DecodedToken 1 1
2018  0x82 -> DecodedToken 1 2
2019  0x83 -> DecodedToken 1 3
2020  0x84 -> DecodedToken 1 4
2021  0x85 -> DecodedToken 1 5
2022  0x86 -> DecodedToken 1 6
2023  0x87 -> DecodedToken 1 7
2024  0x88 -> DecodedToken 1 8
2025  0x89 -> DecodedToken 1 9
2026  0x8a -> DecodedToken 1 10
2027  0x8b -> DecodedToken 1 11
2028  0x8c -> DecodedToken 1 12
2029  0x8d -> DecodedToken 1 13
2030  0x8e -> DecodedToken 1 14
2031  0x8f -> DecodedToken 1 15
2032  0x90 -> DecodedToken 1 16
2033  0x91 -> DecodedToken 1 17
2034  0x92 -> DecodedToken 1 18
2035  0x93 -> DecodedToken 1 19
2036  0x94 -> DecodedToken 1 20
2037  0x95 -> DecodedToken 1 21
2038  0x96 -> DecodedToken 1 22
2039  0x97 -> DecodedToken 1 23
2040  0x98 -> DecodedToken 2 $! (word8ToInt  (eatTailWord8 bs))
2041  0x99 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs))
2042#if defined(ARCH_64bit)
2043  0x9a -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs))
2044#else
2045  0x9a -> case word32ToInt (eatTailWord32 bs) of
2046            Just n  -> DecodedToken 5 n
2047            Nothing -> DecodeFailure
2048#endif
2049  0x9b -> case word64ToInt (eatTailWord64 bs) of
2050            Just n  -> DecodedToken 9 n
2051            Nothing -> DecodeFailure
2052  0x9f -> DecodedToken 1 (-1) -- indefinite length
2053  _    -> DecodeFailure
2054
2055
2056{-# INLINE tryConsumeMapLenOrIndef #-}
2057tryConsumeMapLenOrIndef :: Word8 -> ByteString -> DecodedToken Int
2058tryConsumeMapLenOrIndef hdr !bs = case word8ToWord hdr of
2059
2060  -- Map structures (type 5)
2061  0xa0 -> DecodedToken 1 0
2062  0xa1 -> DecodedToken 1 1
2063  0xa2 -> DecodedToken 1 2
2064  0xa3 -> DecodedToken 1 3
2065  0xa4 -> DecodedToken 1 4
2066  0xa5 -> DecodedToken 1 5
2067  0xa6 -> DecodedToken 1 6
2068  0xa7 -> DecodedToken 1 7
2069  0xa8 -> DecodedToken 1 8
2070  0xa9 -> DecodedToken 1 9
2071  0xaa -> DecodedToken 1 10
2072  0xab -> DecodedToken 1 11
2073  0xac -> DecodedToken 1 12
2074  0xad -> DecodedToken 1 13
2075  0xae -> DecodedToken 1 14
2076  0xaf -> DecodedToken 1 15
2077  0xb0 -> DecodedToken 1 16
2078  0xb1 -> DecodedToken 1 17
2079  0xb2 -> DecodedToken 1 18
2080  0xb3 -> DecodedToken 1 19
2081  0xb4 -> DecodedToken 1 20
2082  0xb5 -> DecodedToken 1 21
2083  0xb6 -> DecodedToken 1 22
2084  0xb7 -> DecodedToken 1 23
2085  0xb8 -> DecodedToken 2 $! (word8ToInt  (eatTailWord8 bs))
2086  0xb9 -> DecodedToken 3 $! (word16ToInt (eatTailWord16 bs))
2087#if defined(ARCH_64bit)
2088  0xba -> DecodedToken 5 $! (word32ToInt (eatTailWord32 bs))
2089#else
2090  0xba -> case word32ToInt (eatTailWord32 bs) of
2091            Just n  -> DecodedToken 5 n
2092            Nothing -> DecodeFailure
2093#endif
2094  0xbb -> case word64ToInt (eatTailWord64 bs) of
2095            Just n  -> DecodedToken 9 n
2096            Nothing -> DecodeFailure
2097  0xbf -> DecodedToken 1 (-1) -- indefinite length
2098  _    -> DecodeFailure
2099
2100
2101{-# INLINE tryConsumeTag #-}
2102tryConsumeTag :: Word8 -> ByteString -> DecodedToken Word
2103tryConsumeTag hdr !bs = case word8ToWord hdr of
2104
2105  -- Tagged values (type 6)
2106  0xc0 -> DecodedToken 1 0
2107  0xc1 -> DecodedToken 1 1
2108  0xc2 -> DecodedToken 1 2
2109  0xc3 -> DecodedToken 1 3
2110  0xc4 -> DecodedToken 1 4
2111  0xc5 -> DecodedToken 1 5
2112  0xc6 -> DecodedToken 1 6
2113  0xc7 -> DecodedToken 1 7
2114  0xc8 -> DecodedToken 1 8
2115  0xc9 -> DecodedToken 1 9
2116  0xca -> DecodedToken 1 10
2117  0xcb -> DecodedToken 1 11
2118  0xcc -> DecodedToken 1 12
2119  0xcd -> DecodedToken 1 13
2120  0xce -> DecodedToken 1 14
2121  0xcf -> DecodedToken 1 15
2122  0xd0 -> DecodedToken 1 16
2123  0xd1 -> DecodedToken 1 17
2124  0xd2 -> DecodedToken 1 18
2125  0xd3 -> DecodedToken 1 19
2126  0xd4 -> DecodedToken 1 20
2127  0xd5 -> DecodedToken 1 21
2128  0xd6 -> DecodedToken 1 22
2129  0xd7 -> DecodedToken 1 23
2130  0xd8 -> DecodedToken 2 $! (word8ToWord  (eatTailWord8 bs))
2131  0xd9 -> DecodedToken 3 $! (word16ToWord (eatTailWord16 bs))
2132  0xda -> DecodedToken 5 $! (word32ToWord (eatTailWord32 bs))
2133#if defined(ARCH_64bit)
2134  0xdb -> DecodedToken 9 $! (word64ToWord (eatTailWord64 bs))
2135#else
2136  0xdb -> case word64ToWord (eatTailWord64 bs) of
2137            Just n  -> DecodedToken 9 n
2138            Nothing -> DecodeFailure
2139#endif
2140  _    -> DecodeFailure
2141
2142--
2143-- 64-on-32 bit code paths
2144--
2145
2146#if defined(ARCH_32bit)
2147tryConsumeWord64 :: Word8 -> ByteString -> DecodedToken Word64
2148tryConsumeWord64 hdr !bs = case word8ToWord hdr of
2149  -- Positive integers (type 0)
2150  0x00 -> DecodedToken 1 0
2151  0x01 -> DecodedToken 1 1
2152  0x02 -> DecodedToken 1 2
2153  0x03 -> DecodedToken 1 3
2154  0x04 -> DecodedToken 1 4
2155  0x05 -> DecodedToken 1 5
2156  0x06 -> DecodedToken 1 6
2157  0x07 -> DecodedToken 1 7
2158  0x08 -> DecodedToken 1 8
2159  0x09 -> DecodedToken 1 9
2160  0x0a -> DecodedToken 1 10
2161  0x0b -> DecodedToken 1 11
2162  0x0c -> DecodedToken 1 12
2163  0x0d -> DecodedToken 1 13
2164  0x0e -> DecodedToken 1 14
2165  0x0f -> DecodedToken 1 15
2166  0x10 -> DecodedToken 1 16
2167  0x11 -> DecodedToken 1 17
2168  0x12 -> DecodedToken 1 18
2169  0x13 -> DecodedToken 1 19
2170  0x14 -> DecodedToken 1 20
2171  0x15 -> DecodedToken 1 21
2172  0x16 -> DecodedToken 1 22
2173  0x17 -> DecodedToken 1 23
2174  0x18 -> DecodedToken 2 $! (word8ToWord64  (eatTailWord8  bs))
2175  0x19 -> DecodedToken 3 $! (word16ToWord64 (eatTailWord16 bs))
2176  0x1a -> DecodedToken 5 $! (word32ToWord64 (eatTailWord32 bs))
2177  0x1b -> DecodedToken 9 $!                 (eatTailWord64 bs)
2178  _    -> DecodeFailure
2179{-# INLINE tryConsumeWord64 #-}
2180
2181tryConsumeNegWord64 :: Word8 -> ByteString -> DecodedToken Word64
2182tryConsumeNegWord64 hdr !bs = case word8ToWord hdr of
2183  -- Positive integers (type 0)
2184  0x20 -> DecodedToken 1 0
2185  0x21 -> DecodedToken 1 1
2186  0x22 -> DecodedToken 1 2
2187  0x23 -> DecodedToken 1 3
2188  0x24 -> DecodedToken 1 4
2189  0x25 -> DecodedToken 1 5
2190  0x26 -> DecodedToken 1 6
2191  0x27 -> DecodedToken 1 7
2192  0x28 -> DecodedToken 1 8
2193  0x29 -> DecodedToken 1 9
2194  0x2a -> DecodedToken 1 10
2195  0x2b -> DecodedToken 1 11
2196  0x2c -> DecodedToken 1 12
2197  0x2d -> DecodedToken 1 13
2198  0x2e -> DecodedToken 1 14
2199  0x2f -> DecodedToken 1 15
2200  0x30 -> DecodedToken 1 16
2201  0x31 -> DecodedToken 1 17
2202  0x32 -> DecodedToken 1 18
2203  0x33 -> DecodedToken 1 19
2204  0x34 -> DecodedToken 1 20
2205  0x35 -> DecodedToken 1 21
2206  0x36 -> DecodedToken 1 22
2207  0x37 -> DecodedToken 1 23
2208  0x38 -> DecodedToken 2 $! (word8ToWord64  (eatTailWord8  bs))
2209  0x39 -> DecodedToken 3 $! (word16ToWord64 (eatTailWord16 bs))
2210  0x3a -> DecodedToken 5 $! (word32ToWord64 (eatTailWord32 bs))
2211  0x3b -> DecodedToken 9 $!                 (eatTailWord64 bs)
2212  _    -> DecodeFailure
2213{-# INLINE tryConsumeNegWord64 #-}
2214
2215tryConsumeInt64 :: Word8 -> ByteString -> DecodedToken Int64
2216tryConsumeInt64 hdr !bs = case word8ToWord hdr of
2217  -- Positive integers (type 0)
2218  0x00 -> DecodedToken 1 0
2219  0x01 -> DecodedToken 1 1
2220  0x02 -> DecodedToken 1 2
2221  0x03 -> DecodedToken 1 3
2222  0x04 -> DecodedToken 1 4
2223  0x05 -> DecodedToken 1 5
2224  0x06 -> DecodedToken 1 6
2225  0x07 -> DecodedToken 1 7
2226  0x08 -> DecodedToken 1 8
2227  0x09 -> DecodedToken 1 9
2228  0x0a -> DecodedToken 1 10
2229  0x0b -> DecodedToken 1 11
2230  0x0c -> DecodedToken 1 12
2231  0x0d -> DecodedToken 1 13
2232  0x0e -> DecodedToken 1 14
2233  0x0f -> DecodedToken 1 15
2234  0x10 -> DecodedToken 1 16
2235  0x11 -> DecodedToken 1 17
2236  0x12 -> DecodedToken 1 18
2237  0x13 -> DecodedToken 1 19
2238  0x14 -> DecodedToken 1 20
2239  0x15 -> DecodedToken 1 21
2240  0x16 -> DecodedToken 1 22
2241  0x17 -> DecodedToken 1 23
2242  0x18 -> DecodedToken 2 $! (word8ToInt64  (eatTailWord8  bs))
2243  0x19 -> DecodedToken 3 $! (word16ToInt64 (eatTailWord16 bs))
2244  0x1a -> DecodedToken 5 $! (word32ToInt64 (eatTailWord32 bs))
2245  0x1b -> case word64ToInt64 (eatTailWord64 bs) of
2246            Just n  -> DecodedToken 9 n
2247            Nothing -> DecodeFailure
2248
2249  -- Negative integers (type 1)
2250  0x20 -> DecodedToken 1 (-1)
2251  0x21 -> DecodedToken 1 (-2)
2252  0x22 -> DecodedToken 1 (-3)
2253  0x23 -> DecodedToken 1 (-4)
2254  0x24 -> DecodedToken 1 (-5)
2255  0x25 -> DecodedToken 1 (-6)
2256  0x26 -> DecodedToken 1 (-7)
2257  0x27 -> DecodedToken 1 (-8)
2258  0x28 -> DecodedToken 1 (-9)
2259  0x29 -> DecodedToken 1 (-10)
2260  0x2a -> DecodedToken 1 (-11)
2261  0x2b -> DecodedToken 1 (-12)
2262  0x2c -> DecodedToken 1 (-13)
2263  0x2d -> DecodedToken 1 (-14)
2264  0x2e -> DecodedToken 1 (-15)
2265  0x2f -> DecodedToken 1 (-16)
2266  0x30 -> DecodedToken 1 (-17)
2267  0x31 -> DecodedToken 1 (-18)
2268  0x32 -> DecodedToken 1 (-19)
2269  0x33 -> DecodedToken 1 (-20)
2270  0x34 -> DecodedToken 1 (-21)
2271  0x35 -> DecodedToken 1 (-22)
2272  0x36 -> DecodedToken 1 (-23)
2273  0x37 -> DecodedToken 1 (-24)
2274  0x38 -> DecodedToken 2 $! (-1 - word8ToInt64  (eatTailWord8  bs))
2275  0x39 -> DecodedToken 3 $! (-1 - word16ToInt64 (eatTailWord16 bs))
2276  0x3a -> DecodedToken 5 $! (-1 - word32ToInt64 (eatTailWord32 bs))
2277  0x3b -> case word64ToInt64 (eatTailWord64 bs) of
2278            Just n  -> DecodedToken 9 (-1 - n)
2279            Nothing -> DecodeFailure
2280  _    -> DecodeFailure
2281{-# INLINE tryConsumeInt64 #-}
2282
2283tryConsumeListLen64 :: Word8 -> ByteString -> DecodedToken Int64
2284tryConsumeListLen64 hdr !bs = case word8ToWord hdr of
2285  -- List structures (type 4)
2286  0x80 -> DecodedToken 1 0
2287  0x81 -> DecodedToken 1 1
2288  0x82 -> DecodedToken 1 2
2289  0x83 -> DecodedToken 1 3
2290  0x84 -> DecodedToken 1 4
2291  0x85 -> DecodedToken 1 5
2292  0x86 -> DecodedToken 1 6
2293  0x87 -> DecodedToken 1 7
2294  0x88 -> DecodedToken 1 8
2295  0x89 -> DecodedToken 1 9
2296  0x8a -> DecodedToken 1 10
2297  0x8b -> DecodedToken 1 11
2298  0x8c -> DecodedToken 1 12
2299  0x8d -> DecodedToken 1 13
2300  0x8e -> DecodedToken 1 14
2301  0x8f -> DecodedToken 1 15
2302  0x90 -> DecodedToken 1 16
2303  0x91 -> DecodedToken 1 17
2304  0x92 -> DecodedToken 1 18
2305  0x93 -> DecodedToken 1 19
2306  0x94 -> DecodedToken 1 20
2307  0x95 -> DecodedToken 1 21
2308  0x96 -> DecodedToken 1 22
2309  0x97 -> DecodedToken 1 23
2310  0x98 -> DecodedToken 2 $! (word8ToInt64  (eatTailWord8  bs))
2311  0x99 -> DecodedToken 3 $! (word16ToInt64 (eatTailWord16 bs))
2312  0x9a -> DecodedToken 5 $! (word32ToInt64 (eatTailWord32 bs))
2313  0x9b -> case word64ToInt64 (eatTailWord64 bs) of
2314            Just n  -> DecodedToken 9 n
2315            Nothing -> DecodeFailure
2316  _    -> DecodeFailure
2317{-# INLINE tryConsumeListLen64 #-}
2318
2319tryConsumeMapLen64 :: Word8 -> ByteString -> DecodedToken Int64
2320tryConsumeMapLen64 hdr !bs = case word8ToWord hdr of
2321  -- Map structures (type 5)
2322  0xa0 -> DecodedToken 1 0
2323  0xa1 -> DecodedToken 1 1
2324  0xa2 -> DecodedToken 1 2
2325  0xa3 -> DecodedToken 1 3
2326  0xa4 -> DecodedToken 1 4
2327  0xa5 -> DecodedToken 1 5
2328  0xa6 -> DecodedToken 1 6
2329  0xa7 -> DecodedToken 1 7
2330  0xa8 -> DecodedToken 1 8
2331  0xa9 -> DecodedToken 1 9
2332  0xaa -> DecodedToken 1 10
2333  0xab -> DecodedToken 1 11
2334  0xac -> DecodedToken 1 12
2335  0xad -> DecodedToken 1 13
2336  0xae -> DecodedToken 1 14
2337  0xaf -> DecodedToken 1 15
2338  0xb0 -> DecodedToken 1 16
2339  0xb1 -> DecodedToken 1 17
2340  0xb2 -> DecodedToken 1 18
2341  0xb3 -> DecodedToken 1 19
2342  0xb4 -> DecodedToken 1 20
2343  0xb5 -> DecodedToken 1 21
2344  0xb6 -> DecodedToken 1 22
2345  0xb7 -> DecodedToken 1 23
2346  0xb8 -> DecodedToken 2 $! (word8ToInt64  (eatTailWord8  bs))
2347  0xb9 -> DecodedToken 3 $! (word16ToInt64 (eatTailWord16 bs))
2348  0xba -> DecodedToken 5 $! (word32ToInt64 (eatTailWord32 bs))
2349  0xbb -> case word64ToInt64 (eatTailWord64 bs) of
2350            Just n  -> DecodedToken 9 n
2351            Nothing -> DecodeFailure
2352  _    -> DecodeFailure
2353{-# INLINE tryConsumeMapLen64 #-}
2354
2355tryConsumeTag64 :: Word8 -> ByteString -> DecodedToken Word64
2356tryConsumeTag64 hdr !bs = case word8ToWord hdr of
2357  -- Tagged values (type 6)
2358  0xc0 -> DecodedToken 1 0
2359  0xc1 -> DecodedToken 1 1
2360  0xc2 -> DecodedToken 1 2
2361  0xc3 -> DecodedToken 1 3
2362  0xc4 -> DecodedToken 1 4
2363  0xc5 -> DecodedToken 1 5
2364  0xc6 -> DecodedToken 1 6
2365  0xc7 -> DecodedToken 1 7
2366  0xc8 -> DecodedToken 1 8
2367  0xc9 -> DecodedToken 1 9
2368  0xca -> DecodedToken 1 10
2369  0xcb -> DecodedToken 1 11
2370  0xcc -> DecodedToken 1 12
2371  0xcd -> DecodedToken 1 13
2372  0xce -> DecodedToken 1 14
2373  0xcf -> DecodedToken 1 15
2374  0xd0 -> DecodedToken 1 16
2375  0xd1 -> DecodedToken 1 17
2376  0xd2 -> DecodedToken 1 18
2377  0xd3 -> DecodedToken 1 19
2378  0xd4 -> DecodedToken 1 20
2379  0xd5 -> DecodedToken 1 21
2380  0xd6 -> DecodedToken 1 22
2381  0xd7 -> DecodedToken 1 23
2382  0xd8 -> DecodedToken 2 $! (word8ToWord64  (eatTailWord8  bs))
2383  0xd9 -> DecodedToken 3 $! (word16ToWord64 (eatTailWord16 bs))
2384  0xda -> DecodedToken 5 $! (word32ToWord64 (eatTailWord32 bs))
2385  0xdb -> DecodedToken 9 $!                 (eatTailWord64 bs)
2386  _    -> DecodeFailure
2387{-# INLINE tryConsumeTag64 #-}
2388#endif
2389
2390{-# INLINE tryConsumeFloat #-}
2391tryConsumeFloat :: Word8 -> ByteString -> DecodedToken Float
2392tryConsumeFloat hdr !bs = case word8ToWord hdr of
2393  0xf9 -> DecodedToken 3 $! (wordToFloat16 (eatTailWord16 bs))
2394  0xfa -> DecodedToken 5 $! (wordToFloat32 (eatTailWord32 bs))
2395  _    -> DecodeFailure
2396
2397
2398{-# INLINE tryConsumeDouble #-}
2399tryConsumeDouble :: Word8 -> ByteString -> DecodedToken Double
2400tryConsumeDouble hdr !bs = case word8ToWord hdr of
2401  0xf9 -> DecodedToken 3 $! (float2Double $ wordToFloat16 (eatTailWord16 bs))
2402  0xfa -> DecodedToken 5 $! (float2Double $ wordToFloat32 (eatTailWord32 bs))
2403  0xfb -> DecodedToken 9 $!                (wordToFloat64 (eatTailWord64 bs))
2404  _    -> DecodeFailure
2405
2406
2407{-# INLINE tryConsumeBool #-}
2408tryConsumeBool :: Word8 -> DecodedToken Bool
2409tryConsumeBool hdr = case word8ToWord hdr of
2410  0xf4 -> DecodedToken 1 False
2411  0xf5 -> DecodedToken 1 True
2412  _    -> DecodeFailure
2413
2414
2415{-# INLINE tryConsumeSimple #-}
2416tryConsumeSimple :: Word8 -> ByteString -> DecodedToken Word
2417tryConsumeSimple hdr !bs = case word8ToWord hdr of
2418
2419  -- Simple and floats (type 7)
2420  0xe0 -> DecodedToken 1 0
2421  0xe1 -> DecodedToken 1 1
2422  0xe2 -> DecodedToken 1 2
2423  0xe3 -> DecodedToken 1 3
2424  0xe4 -> DecodedToken 1 4
2425  0xe5 -> DecodedToken 1 5
2426  0xe6 -> DecodedToken 1 6
2427  0xe7 -> DecodedToken 1 7
2428  0xe8 -> DecodedToken 1 8
2429  0xe9 -> DecodedToken 1 9
2430  0xea -> DecodedToken 1 10
2431  0xeb -> DecodedToken 1 11
2432  0xec -> DecodedToken 1 12
2433  0xed -> DecodedToken 1 13
2434  0xee -> DecodedToken 1 14
2435  0xef -> DecodedToken 1 15
2436  0xf0 -> DecodedToken 1 16
2437  0xf1 -> DecodedToken 1 17
2438  0xf2 -> DecodedToken 1 18
2439  0xf3 -> DecodedToken 1 19
2440  0xf4 -> DecodedToken 1 20
2441  0xf5 -> DecodedToken 1 21
2442  0xf6 -> DecodedToken 1 22
2443  0xf7 -> DecodedToken 1 23
2444  0xf8 -> DecodedToken 2 $! (word8ToWord (eatTailWord8 bs))
2445  _    -> DecodeFailure
2446
2447
2448{-# INLINE tryConsumeBytesIndef #-}
2449tryConsumeBytesIndef :: Word8 -> DecodedToken ()
2450tryConsumeBytesIndef hdr = case word8ToWord hdr of
2451  0x5f -> DecodedToken 1 ()
2452  _    -> DecodeFailure
2453
2454
2455{-# INLINE tryConsumeStringIndef #-}
2456tryConsumeStringIndef :: Word8 -> DecodedToken ()
2457tryConsumeStringIndef hdr = case word8ToWord hdr of
2458  0x7f -> DecodedToken 1 ()
2459  _    -> DecodeFailure
2460
2461
2462{-# INLINE tryConsumeNull #-}
2463tryConsumeNull :: Word8 -> DecodedToken ()
2464tryConsumeNull hdr = case word8ToWord hdr of
2465  0xf6 -> DecodedToken 1 ()
2466  _    -> DecodeFailure
2467
2468
2469{-# INLINE tryConsumeBreakOr #-}
2470tryConsumeBreakOr :: Word8 -> DecodedToken ()
2471tryConsumeBreakOr hdr = case word8ToWord hdr of
2472  0xff -> DecodedToken 1 ()
2473  _    -> DecodeFailure
2474
2475{-# INLINE readBytesSmall #-}
2476readBytesSmall :: Int -> ByteString -> DecodedToken (LongToken ByteString)
2477readBytesSmall n bs
2478  -- if n <= bound then ok return it all
2479  | n + hdrsz <= BS.length bs
2480  = DecodedToken (n+hdrsz) $ Fits True $
2481      BS.unsafeTake n (BS.unsafeDrop hdrsz bs)
2482
2483  -- if n > bound then slow path, multi-chunk
2484  | otherwise
2485  = DecodedToken hdrsz $ TooLong True n
2486  where
2487    hdrsz = 1
2488
2489{-# INLINE readBytes8 #-}
2490{-# INLINE readBytes16 #-}
2491{-# INLINE readBytes32 #-}
2492{-# INLINE readBytes64 #-}
2493readBytes8, readBytes16, readBytes32, readBytes64
2494  :: ByteString -> DecodedToken (LongToken ByteString)
2495
2496readBytes8 bs
2497  | n <= BS.length bs - hdrsz
2498  = DecodedToken (n+hdrsz) $ Fits lengthCanonical $
2499      BS.unsafeTake n (BS.unsafeDrop hdrsz bs)
2500
2501  -- if n > bound then slow path, multi-chunk
2502  | otherwise
2503  = DecodedToken hdrsz $ TooLong lengthCanonical n
2504  where
2505    hdrsz           = 2
2506    !n@(I# n#)      = word8ToInt (eatTailWord8 bs)
2507    lengthCanonical = isIntCanonical hdrsz n#
2508
2509readBytes16 bs
2510  | n <= BS.length bs - hdrsz
2511  = DecodedToken (n+hdrsz) $ Fits lengthCanonical $
2512      BS.unsafeTake n (BS.unsafeDrop hdrsz bs)
2513
2514  -- if n > bound then slow path, multi-chunk
2515  | otherwise
2516  = DecodedToken hdrsz $ TooLong lengthCanonical n
2517  where
2518    hdrsz           = 3
2519    !n@(I# n#)      = word16ToInt (eatTailWord16 bs)
2520    lengthCanonical = isIntCanonical hdrsz n#
2521
2522readBytes32 bs = case word32ToInt (eatTailWord32 bs) of
2523#if defined(ARCH_32bit)
2524    Just n@(I# n#)
2525#else
2526    n@(I# n#)
2527#endif
2528      | n <= BS.length bs - hdrsz
2529                  -> DecodedToken (n+hdrsz) $ Fits (isIntCanonical hdrsz n#) $
2530                       BS.unsafeTake n (BS.unsafeDrop hdrsz bs)
2531
2532      -- if n > bound then slow path, multi-chunk
2533      | otherwise -> DecodedToken hdrsz $ TooLong (isIntCanonical hdrsz n#) n
2534
2535#if defined(ARCH_32bit)
2536    Nothing       -> DecodeFailure
2537#endif
2538  where
2539    hdrsz = 5
2540
2541readBytes64 bs = case word64ToInt (eatTailWord64 bs) of
2542    Just n@(I# n#)
2543      | n <= BS.length bs - hdrsz
2544                  -> DecodedToken (n+hdrsz) $ Fits (isIntCanonical hdrsz n#) $
2545                            BS.unsafeTake n (BS.unsafeDrop hdrsz bs)
2546
2547      -- if n > bound then slow path, multi-chunk
2548      | otherwise -> DecodedToken hdrsz $ TooLong (isIntCanonical hdrsz n#) n
2549
2550    Nothing       -> DecodeFailure
2551  where
2552    hdrsz = 9
2553
2554------------------------------------------------------------------------------
2555-- Reading big integers
2556--
2557
2558-- Big ints consist of two CBOR tokens: a tag token (2 for positive, 3 for
2559-- negative) followed by a bytes token. Our usual invariant (for go_fast and
2560-- go_fast_end) only guarantees that we've got enough space to decode the
2561-- first token. So given that there's two tokens and the second is variable
2562-- length then there are several points where we can discover we're out of
2563-- input buffer space.
2564--
2565-- In those cases we need to break out of the fast path but we must arrange
2566-- things so that we can continue later once we've got more input buffer.
2567--
2568-- In particular, we might run out of space when:
2569--   1. trying to decode the header of the second token (bytes); or
2570--   2. trying to read the bytes body
2571--
2572--- The existing mechanisms we've got to drop out of the fast path are:
2573--   * SlowDecodeAction to re-read a whole token
2574--   * SlowConsumeTokenBytes to read the body of a bytes token
2575--
2576-- Of course when we resume we need to convert the bytes into an integer.
2577-- Rather than making new fast path return mechanisms we can reuse the
2578-- existing ones, so long as we're prepared to allocate new continuation
2579-- closures. This seems a reasonable price to pay to reduce complexity since
2580-- decoding a big int across an input buffer boundary ought to be rare, and
2581-- allocating a new continuation closure isn't that expensive.
2582--
2583-- Note that canonicity information is calculated lazily. This way we don't need
2584-- to concern ourselves with two distinct paths, while according to benchmarks
2585-- it doesn't affect performance in the non-canonical case.
2586
2587data BigIntToken a = BigIntToken     Bool {- canonical? -} Integer
2588                   | BigUIntNeedBody Bool {- canonical? -} Int
2589                   | BigNIntNeedBody Bool {- canonical? -} Int
2590                   | BigUIntNeedHeader
2591                   | BigNIntNeedHeader
2592
2593-- So when we have to break out because we can't read the whole bytes body
2594-- in one go then we need to use SlowConsumeTokenBytes but we can adjust the
2595-- continuation so that when we get the ByteString back we convert it to an
2596-- Integer before calling the original continuation.
2597
2598adjustContBigUIntNeedBody, adjustContBigNIntNeedBody
2599  :: (Integer -> ST s (DecodeAction s a))
2600  -> (ByteString -> ST s (DecodeAction s a))
2601
2602adjustContBigUIntNeedBody k = \bs -> k $! uintegerFromBytes bs
2603adjustContBigNIntNeedBody k = \bs -> k $! nintegerFromBytes bs
2604
2605adjustContCanonicalBigUIntNeedBody, adjustContCanonicalBigNIntNeedBody
2606  :: (Integer -> ST s (DecodeAction s a))
2607  -> (ByteString -> ST s (DecodeAction s a))
2608
2609adjustContCanonicalBigUIntNeedBody k = \bs ->
2610  if isBigIntRepCanonical bs
2611  then k $! uintegerFromBytes bs
2612  else pure $! D.Fail ("non-canonical integer")
2613
2614adjustContCanonicalBigNIntNeedBody k = \bs ->
2615  if isBigIntRepCanonical bs
2616  then k $! nintegerFromBytes bs
2617  else pure $! D.Fail ("non-canonical integer")
2618
2619-- And when we have to break out because we can't read the bytes token header
2620-- in one go then we need to use SlowDecodeAction but we have to make two
2621-- adjustments. When we resume we need to read a bytes token, not a big int.
2622-- That is we don't want to re-read the tag token. Indeed we cannot even if we
2623-- wanted to because the slow path code only guarantees to arrange for one
2624-- complete token header in the input buffer. So we must pretend that we did
2625-- in fact want to read a bytes token using ConsumeBytes, and then we can
2626-- adjust the continuation for that in the same way as above.
2627
2628adjustContBigUIntNeedHeader, adjustContBigNIntNeedHeader
2629  :: (Integer -> ST s (DecodeAction s a))
2630  -> DecodeAction s a
2631
2632adjustContBigUIntNeedHeader k = ConsumeBytes (\bs -> k $! uintegerFromBytes bs)
2633adjustContBigNIntNeedHeader k = ConsumeBytes (\bs -> k $! nintegerFromBytes bs)
2634
2635adjustContCanonicalBigUIntNeedHeader, adjustContCanonicalBigNIntNeedHeader
2636  :: (Integer -> ST s (DecodeAction s a))
2637  -> DecodeAction s a
2638
2639adjustContCanonicalBigUIntNeedHeader k = ConsumeBytesCanonical $ \bs ->
2640  if isBigIntRepCanonical bs
2641  then k $! uintegerFromBytes bs
2642  else pure $! D.Fail ("non-canonical integer")
2643
2644adjustContCanonicalBigNIntNeedHeader k = ConsumeBytesCanonical $ \bs ->
2645  if isBigIntRepCanonical bs
2646  then k $! nintegerFromBytes bs
2647  else pure $! D.Fail ("non-canonical integer")
2648
2649-- So finally when reading the input buffer we check if we have enough space
2650-- to read the header of the bytes token and then try to read the bytes body,
2651-- using the appropriate break-out codes as above.
2652
2653{-# INLINE readBigUInt #-}
2654readBigUInt :: ByteString -> DecodedToken (BigIntToken a)
2655readBigUInt bs
2656    | let bs' = BS.unsafeTail bs
2657    , not (BS.null bs')
2658    , let !hdr = BS.unsafeHead bs'
2659    , BS.length bs' >= tokenSize hdr
2660    = case tryConsumeBytes hdr bs' of
2661        DecodeFailure                           -> DecodeFailure
2662        DecodedToken sz (Fits canonical bstr)   -> DecodedToken (1+sz)
2663          (BigIntToken (canonical && isBigIntRepCanonical bstr)
2664                       (uintegerFromBytes bstr))
2665        DecodedToken sz (TooLong canonical len) ->
2666          DecodedToken (1+sz) (BigUIntNeedBody canonical len)
2667
2668    | otherwise
2669    = DecodedToken 1 BigUIntNeedHeader
2670
2671{-# INLINE readBigNInt #-}
2672readBigNInt :: ByteString -> DecodedToken (BigIntToken a)
2673readBigNInt bs
2674    | let bs' = BS.unsafeTail bs
2675    , not (BS.null bs')
2676    , let !hdr = BS.unsafeHead bs'
2677    , BS.length bs' >= tokenSize hdr
2678    = case tryConsumeBytes hdr bs' of
2679        DecodeFailure                           -> DecodeFailure
2680        DecodedToken sz (Fits canonical bstr)   -> DecodedToken (1+sz)
2681          (BigIntToken (canonical && isBigIntRepCanonical bstr)
2682                       (nintegerFromBytes bstr))
2683        DecodedToken sz (TooLong canonical len) ->
2684          DecodedToken (1+sz) (BigNIntNeedBody canonical len)
2685
2686    | otherwise
2687    = DecodedToken 1 BigNIntNeedHeader
2688
2689-- Binary representation of a big integer is canonical if it's at least 9 bytes
2690-- long (as for smaller values the canonical representation is the same one as
2691-- for Int) and the leading byte is not zero (meaning that it's the smallest
2692-- representation for the number in question).
2693isBigIntRepCanonical :: ByteString -> Bool
2694isBigIntRepCanonical bstr = BS.length bstr > 8 && BS.unsafeHead bstr /= 0x00
2695