1{-# LANGUAGE BangPatterns        #-}
2{-# LANGUAGE CPP                 #-}
3{-# LANGUAGE DeriveFunctor       #-}
4{-# LANGUAGE MagicHash           #-}
5{-# LANGUAGE RankNTypes          #-}
6{-# LANGUAGE ScopedTypeVariables #-}
7{-# LANGUAGE PatternSynonyms     #-}
8
9#include "cbor.h"
10
11#if defined(OPTIMIZE_GMP)
12#if __GLASGOW_HASKELL__ >= 900
13#define HAVE_GHC_BIGNUM 1
14{-# LANGUAGE UnboxedSums         #-}
15#endif
16#endif
17
18-- |
19-- Module      : Codec.CBOR.Write
20-- Copyright   : (c) Duncan Coutts 2015-2017
21-- License     : BSD3-style (see LICENSE.txt)
22--
23-- Maintainer  : duncan@community.haskell.org
24-- Stability   : experimental
25-- Portability : non-portable (GHC extensions)
26--
27-- Functions for writing out CBOR 'Encoding' values in a variety of forms.
28--
29module Codec.CBOR.Write
30  ( toBuilder          -- :: Encoding -> B.Builder
31  , toLazyByteString   -- :: Encoding -> L.ByteString
32  , toStrictByteString -- :: Encoding -> S.ByteString
33  ) where
34
35import           Data.Bits
36import           Data.Int
37
38#if ! MIN_VERSION_base(4,11,0)
39import           Data.Monoid
40#endif
41
42import           Data.Word
43import           Foreign.Ptr
44
45import qualified Data.ByteString                       as S
46import qualified Data.ByteString.Builder               as B
47import qualified Data.ByteString.Builder.Internal      as BI
48import           Data.ByteString.Builder.Prim          (condB, (>$<), (>*<))
49import qualified Data.ByteString.Builder.Prim          as P
50import qualified Data.ByteString.Builder.Prim.Internal as PI
51import qualified Data.ByteString.Lazy                  as L
52import qualified Data.Text                             as T
53import qualified Data.Text.Encoding                    as T
54
55import           Control.Exception.Base                (assert)
56import           GHC.Exts
57import           GHC.IO                                (IO(IO))
58#if defined(HAVE_GHC_BIGNUM)
59import qualified GHC.Num.Integer
60import qualified GHC.Num.BigNat                        as Gmp
61import qualified GHC.Num.BigNat
62import           GHC.Num.BigNat                        (BigNat)
63#else
64import qualified GHC.Integer.GMP.Internals             as Gmp
65import           GHC.Integer.GMP.Internals             (BigNat)
66#endif
67
68#if __GLASGOW_HASKELL__ < 710
69import           GHC.Word
70#endif
71
72import qualified Codec.CBOR.ByteArray.Sliced           as BAS
73import           Codec.CBOR.Encoding
74import           Codec.CBOR.Magic
75
76--------------------------------------------------------------------------------
77
78-- | Turn an 'Encoding' into a lazy 'L.ByteString' in CBOR binary
79-- format.
80--
81-- @since 0.2.0.0
82toLazyByteString :: Encoding     -- ^ The 'Encoding' of a CBOR value.
83                 -> L.ByteString -- ^ The encoded CBOR value.
84toLazyByteString = B.toLazyByteString . toBuilder
85
86-- | Turn an 'Encoding' into a strict 'S.ByteString' in CBOR binary
87-- format.
88--
89-- @since 0.2.0.0
90toStrictByteString :: Encoding     -- ^ The 'Encoding' of a CBOR value.
91                   -> S.ByteString -- ^ The encoded value.
92toStrictByteString = L.toStrict . B.toLazyByteString . toBuilder
93
94-- | Turn an 'Encoding' into a 'L.ByteString' 'B.Builder' in CBOR
95-- binary format.
96--
97-- @since 0.2.0.0
98toBuilder :: Encoding  -- ^ The 'Encoding' of a CBOR value.
99          -> B.Builder -- ^ The encoded value as a 'B.Builder'.
100toBuilder =
101    \(Encoding vs0) -> BI.builder (buildStep (vs0 TkEnd))
102
103buildStep :: Tokens
104          -> (BI.BufferRange -> IO (BI.BuildSignal a))
105          -> BI.BufferRange
106          -> IO (BI.BuildSignal a)
107buildStep vs1 k (BI.BufferRange op0 ope0) =
108    go vs1 op0
109  where
110    go vs !op
111      | op `plusPtr` bound <= ope0 = case vs of
112          TkWord     x vs' -> PI.runB wordMP     x op >>= go vs'
113          TkWord64   x vs' -> PI.runB word64MP   x op >>= go vs'
114
115          TkInt      x vs' -> PI.runB intMP      x op >>= go vs'
116          TkInt64    x vs' -> PI.runB int64MP    x op >>= go vs'
117
118          TkBytes        x vs' -> BI.runBuilderWith
119                                    (bytesMP  x) (buildStep vs' k)
120                                    (BI.BufferRange op ope0)
121          TkByteArray    x vs' -> BI.runBuilderWith
122                                    (byteArrayMP x) (buildStep vs' k)
123                                    (BI.BufferRange op ope0)
124
125          TkUtf8ByteArray x vs' -> BI.runBuilderWith
126                                     (utf8ByteArrayMP x) (buildStep vs' k)
127                                     (BI.BufferRange op ope0)
128          TkString        x vs' -> BI.runBuilderWith
129                                     (stringMP x) (buildStep vs' k)
130                                     (BI.BufferRange op ope0)
131
132          TkBytesBegin vs' -> PI.runB bytesBeginMP  () op >>= go vs'
133          TkStringBegin vs'-> PI.runB stringBeginMP () op >>= go vs'
134
135          TkListLen  x vs' -> PI.runB arrayLenMP     x op >>= go vs'
136          TkListBegin  vs' -> PI.runB arrayBeginMP  () op >>= go vs'
137
138          TkMapLen   x vs' -> PI.runB mapLenMP       x op >>= go vs'
139          TkMapBegin   vs' -> PI.runB mapBeginMP    () op >>= go vs'
140
141          TkTag      x vs' -> PI.runB tagMP          x op >>= go vs'
142          TkTag64    x vs' -> PI.runB tag64MP        x op >>= go vs'
143
144#if defined(OPTIMIZE_GMP)
145          -- This code is specialized for GMP implementation of Integer. By
146          -- looking directly at the constructors we can avoid some checks.
147          -- S# hold an Int, so we can just use intMP.
148          TkInteger (SmallInt i) vs' ->
149               PI.runB intMP (I# i) op >>= go vs'
150          -- PosBigInt is guaranteed to be > 0.
151          TkInteger integer@(PosBigInt bigNat) vs'
152            | integer <= fromIntegral (maxBound :: Word64) ->
153                PI.runB word64MP (fromIntegral integer) op >>= go vs'
154            | otherwise ->
155               let buffer = BI.BufferRange op ope0
156               in BI.runBuilderWith
157                    (bigNatMP bigNat) (buildStep vs' k) buffer
158          -- Jn# is guaranteed to be < 0.
159          TkInteger integer@(NegBigInt bigNat) vs'
160            | integer >= -1 - fromIntegral (maxBound :: Word64) ->
161                PI.runB negInt64MP (fromIntegral (-1 - integer)) op >>= go vs'
162            | otherwise ->
163                let buffer = BI.BufferRange op ope0
164                in BI.runBuilderWith
165                     (negBigNatMP bigNat) (buildStep vs' k) buffer
166#else
167          TkInteger  x vs'
168            | x >= 0
169            , x <= fromIntegral (maxBound :: Word64)
170                            -> PI.runB word64MP (fromIntegral x) op >>= go vs'
171            | x <  0
172            , x >= -1 - fromIntegral (maxBound :: Word64)
173                            -> PI.runB negInt64MP (fromIntegral (-1 - x)) op >>= go vs'
174            | otherwise     -> BI.runBuilderWith
175                                 (integerMP x) (buildStep vs' k)
176                                 (BI.BufferRange op ope0)
177#endif
178
179          TkBool False vs' -> PI.runB falseMP   () op >>= go vs'
180          TkBool True  vs' -> PI.runB trueMP    () op >>= go vs'
181          TkNull       vs' -> PI.runB nullMP    () op >>= go vs'
182          TkUndef      vs' -> PI.runB undefMP   () op >>= go vs'
183          TkSimple   w vs' -> PI.runB simpleMP   w op >>= go vs'
184          TkFloat16  f vs' -> PI.runB halfMP     f op >>= go vs'
185          TkFloat32  f vs' -> PI.runB floatMP    f op >>= go vs'
186          TkFloat64  f vs' -> PI.runB doubleMP   f op >>= go vs'
187          TkBreak      vs' -> PI.runB breakMP   () op >>= go vs'
188
189          TkEncoded  x vs' -> BI.runBuilderWith
190                                (B.byteString x) (buildStep vs' k)
191                                (BI.BufferRange op ope0)
192
193          TkEnd            -> k (BI.BufferRange op ope0)
194
195      | otherwise = return $ BI.bufferFull bound op (buildStep vs k)
196
197    -- The maximum size in bytes of the fixed-size encodings
198    bound :: Int
199    bound = 9
200
201header :: P.BoundedPrim Word8
202header = P.liftFixedToBounded P.word8
203
204constHeader :: Word8 -> P.BoundedPrim ()
205constHeader h = P.liftFixedToBounded (const h >$< P.word8)
206
207withHeader :: P.FixedPrim a -> P.BoundedPrim (Word8, a)
208withHeader p = P.liftFixedToBounded (P.word8 >*< p)
209
210withConstHeader :: Word8 -> P.FixedPrim a -> P.BoundedPrim a
211withConstHeader h p = P.liftFixedToBounded ((,) h >$< (P.word8 >*< p))
212
213
214{-
215From RFC 7049:
216
217   Major type 0:  an unsigned integer.  The 5-bit additional information
218      is either the integer itself (for additional information values 0
219      through 23) or the length of additional data.  Additional
220      information 24 means the value is represented in an additional
221      uint8_t, 25 means a uint16_t, 26 means a uint32_t, and 27 means a
222      uint64_t.  For example, the integer 10 is denoted as the one byte
223      0b000_01010 (major type 0, additional information 10).  The
224      integer 500 would be 0b000_11001 (major type 0, additional
225      information 25) followed by the two bytes 0x01f4, which is 500 in
226      decimal.
227
228-}
229
230{-# INLINE wordMP #-}
231wordMP :: P.BoundedPrim Word
232wordMP =
233    condB (<= 0x17)       (fromIntegral >$< header) $
234    condB (<= 0xff)       (fromIntegral >$< withConstHeader 24 P.word8) $
235    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 25 P.word16BE) $
236#if defined(ARCH_64bit)
237    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 26 P.word32BE) $
238                          (fromIntegral >$< withConstHeader 27 P.word64BE)
239#else
240                          (fromIntegral >$< withConstHeader 26 P.word32BE)
241#endif
242
243{-# INLINE word64MP #-}
244word64MP :: P.BoundedPrim Word64
245word64MP =
246    condB (<= 0x17)       (fromIntegral >$< header) $
247    condB (<= 0xff)       (fromIntegral >$< withConstHeader 24 P.word8) $
248    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 25 P.word16BE) $
249    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 26 P.word32BE) $
250                          (fromIntegral >$< withConstHeader 27 P.word64BE)
251
252{-
253From RFC 7049:
254
255   Major type 1:  a negative integer.  The encoding follows the rules
256      for unsigned integers (major type 0), except that the value is
257      then -1 minus the encoded unsigned integer.  For example, the
258      integer -500 would be 0b001_11001 (major type 1, additional
259      information 25) followed by the two bytes 0x01f3, which is 499 in
260      decimal.
261-}
262
263negInt64MP :: P.BoundedPrim Word64
264negInt64MP =
265    condB (<= 0x17)       (fromIntegral . (0x20 +) >$< header) $
266    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0x38 P.word8) $
267    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0x39 P.word16BE) $
268    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0x3a P.word32BE) $
269                          (fromIntegral >$< withConstHeader 0x3b P.word64BE)
270
271{-
272   Major types 0 and 1 are designed in such a way that they can be
273   encoded in C from a signed integer without actually doing an if-then-
274   else for positive/negative (Figure 2).  This uses the fact that
275   (-1-n), the transformation for major type 1, is the same as ~n
276   (bitwise complement) in C unsigned arithmetic; ~n can then be
277   expressed as (-1)^n for the negative case, while 0^n leaves n
278   unchanged for non-negative.  The sign of a number can be converted to
279   -1 for negative and 0 for non-negative (0 or positive) by arithmetic-
280   shifting the number by one bit less than the bit length of the number
281   (for example, by 63 for 64-bit numbers).
282
283   void encode_sint(int64_t n) {
284     uint64t ui = n >> 63;    // extend sign to whole length
285     mt = ui & 0x20;          // extract major type
286     ui ^= n;                 // complement negatives
287     if (ui < 24)
288       *p++ = mt + ui;
289     else if (ui < 256) {
290       *p++ = mt + 24;
291       *p++ = ui;
292     } else
293          ...
294
295            Figure 2: Pseudocode for Encoding a Signed Integer
296-}
297
298{-# INLINE intMP #-}
299intMP :: P.BoundedPrim Int
300intMP =
301    prep >$< (
302      condB ((<= 0x17)       . snd) (encIntSmall >$< header) $
303      condB ((<= 0xff)       . snd) (encInt8  >$< withHeader P.word8) $
304      condB ((<= 0xffff)     . snd) (encInt16 >$< withHeader P.word16BE) $
305#if defined(ARCH_64bit)
306      condB ((<= 0xffffffff) . snd) (encInt32 >$< withHeader P.word32BE)
307                                    (encInt64 >$< withHeader P.word64BE)
308#else
309                                    (encInt32 >$< withHeader P.word32BE)
310#endif
311    )
312  where
313    prep :: Int -> (Word8, Word)
314    prep n = (mt, ui)
315      where
316        sign :: Word     -- extend sign to whole length
317        sign = fromIntegral (n `unsafeShiftR` intBits)
318#if MIN_VERSION_base(4,7,0)
319        intBits = finiteBitSize (undefined :: Int) - 1
320#else
321        intBits = bitSize (undefined :: Int) - 1
322#endif
323
324        mt   :: Word8    -- select major type
325        mt   = fromIntegral (sign .&. 0x20)
326
327        ui   :: Word     -- complement negatives
328        ui   = fromIntegral n `xor` sign
329
330    encIntSmall :: (Word8, Word) -> Word8
331    encIntSmall (mt, ui) =  mt + fromIntegral ui
332    encInt8     (mt, ui) = (mt + 24, fromIntegral ui)
333    encInt16    (mt, ui) = (mt + 25, fromIntegral ui)
334    encInt32    (mt, ui) = (mt + 26, fromIntegral ui)
335#if defined(ARCH_64bit)
336    encInt64    (mt, ui) = (mt + 27, fromIntegral ui)
337#endif
338
339
340{-# INLINE int64MP #-}
341int64MP :: P.BoundedPrim Int64
342int64MP =
343    prep >$< (
344      condB ((<= 0x17)       . snd) (encIntSmall >$< header) $
345      condB ((<= 0xff)       . snd) (encInt8  >$< withHeader P.word8) $
346      condB ((<= 0xffff)     . snd) (encInt16 >$< withHeader P.word16BE) $
347      condB ((<= 0xffffffff) . snd) (encInt32 >$< withHeader P.word32BE)
348                                    (encInt64 >$< withHeader P.word64BE)
349    )
350  where
351    prep :: Int64 -> (Word8, Word64)
352    prep n = (mt, ui)
353      where
354        sign :: Word64   -- extend sign to whole length
355        sign = fromIntegral (n `unsafeShiftR` intBits)
356#if MIN_VERSION_base(4,7,0)
357        intBits = finiteBitSize (undefined :: Int64) - 1
358#else
359        intBits = bitSize (undefined :: Int64) - 1
360#endif
361
362        mt   :: Word8    -- select major type
363        mt   = fromIntegral (sign .&. 0x20)
364
365        ui   :: Word64   -- complement negatives
366        ui   = fromIntegral n `xor` sign
367
368    encIntSmall (mt, ui) =  mt + fromIntegral ui
369    encInt8     (mt, ui) = (mt + 24, fromIntegral ui)
370    encInt16    (mt, ui) = (mt + 25, fromIntegral ui)
371    encInt32    (mt, ui) = (mt + 26, fromIntegral ui)
372    encInt64    (mt, ui) = (mt + 27, fromIntegral ui)
373
374{-
375   Major type 2:  a byte string.  The string's length in bytes is
376      represented following the rules for positive integers (major type
377      0).  For example, a byte string whose length is 5 would have an
378      initial byte of 0b010_00101 (major type 2, additional information
379      5 for the length), followed by 5 bytes of binary content.  A byte
380      string whose length is 500 would have 3 initial bytes of
381      0b010_11001 (major type 2, additional information 25 to indicate a
382      two-byte length) followed by the two bytes 0x01f4 for a length of
383      500, followed by 500 bytes of binary content.
384-}
385
386bytesMP :: S.ByteString -> B.Builder
387bytesMP bs =
388    P.primBounded bytesLenMP (fromIntegral $ S.length bs) <> B.byteString bs
389
390bytesLenMP :: P.BoundedPrim Word
391bytesLenMP =
392    condB (<= 0x17)       (fromIntegral . (0x40 +) >$< header) $
393    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0x58 P.word8) $
394    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0x59 P.word16BE) $
395    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0x5a P.word32BE) $
396                          (fromIntegral >$< withConstHeader 0x5b P.word64BE)
397byteArrayMP :: BAS.SlicedByteArray -> B.Builder
398byteArrayMP ba =
399    P.primBounded bytesLenMP n <> BAS.toBuilder ba
400  where n = fromIntegral $ BAS.sizeofSlicedByteArray ba
401
402bytesBeginMP :: P.BoundedPrim ()
403bytesBeginMP = constHeader 0x5f
404
405{-
406   Major type 3:  a text string, specifically a string of Unicode
407      characters that is encoded as UTF-8 [RFC3629].  The format of this
408      type is identical to that of byte strings (major type 2), that is,
409      as with major type 2, the length gives the number of bytes.  This
410      type is provided for systems that need to interpret or display
411      human-readable text, and allows the differentiation between
412      unstructured bytes and text that has a specified repertoire and
413      encoding.  In contrast to formats such as JSON, the Unicode
414      characters in this type are never escaped.  Thus, a newline
415      character (U+000A) is always represented in a string as the byte
416      0x0a, and never as the bytes 0x5c6e (the characters "\" and "n")
417      or as 0x5c7530303061 (the characters "\", "u", "0", "0", "0", and
418      "a").
419-}
420
421stringMP :: T.Text -> B.Builder
422stringMP t =
423    P.primBounded stringLenMP (fromIntegral $ S.length bs) <> B.byteString bs
424  where
425    bs  = T.encodeUtf8 t
426
427stringLenMP :: P.BoundedPrim Word
428stringLenMP =
429    condB (<= 0x17)       (fromIntegral . (0x60 +) >$< header) $
430    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0x78 P.word8) $
431    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0x79 P.word16BE) $
432    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0x7a P.word32BE) $
433                          (fromIntegral >$< withConstHeader 0x7b P.word64BE)
434
435stringBeginMP :: P.BoundedPrim ()
436stringBeginMP = constHeader 0x7f
437
438utf8ByteArrayMP :: BAS.SlicedByteArray -> B.Builder
439utf8ByteArrayMP t =
440    P.primBounded stringLenMP n <> BAS.toBuilder t
441  where
442    n = fromIntegral $ BAS.sizeofSlicedByteArray t
443
444{-
445   Major type 4:  an array of data items.  Arrays are also called lists,
446      sequences, or tuples.  The array's length follows the rules for
447      byte strings (major type 2), except that the length denotes the
448      number of data items, not the length in bytes that the array takes
449      up.  Items in an array do not need to all be of the same type.
450      For example, an array that contains 10 items of any type would
451      have an initial byte of 0b100_01010 (major type of 4, additional
452      information of 10 for the length) followed by the 10 remaining
453      items.
454-}
455
456arrayLenMP :: P.BoundedPrim Word
457arrayLenMP =
458    condB (<= 0x17)       (fromIntegral . (0x80 +) >$< header) $
459    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0x98 P.word8) $
460    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0x99 P.word16BE) $
461    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0x9a P.word32BE) $
462                          (fromIntegral >$< withConstHeader 0x9b P.word64BE)
463
464arrayBeginMP :: P.BoundedPrim ()
465arrayBeginMP = constHeader 0x9f
466
467{-
468   Major type 5:  a map of pairs of data items.  Maps are also called
469      tables, dictionaries, hashes, or objects (in JSON).  A map is
470      comprised of pairs of data items, each pair consisting of a key
471      that is immediately followed by a value.  The map's length follows
472      the rules for byte strings (major type 2), except that the length
473      denotes the number of pairs, not the length in bytes that the map
474      takes up.  For example, a map that contains 9 pairs would have an
475      initial byte of 0b101_01001 (major type of 5, additional
476      information of 9 for the number of pairs) followed by the 18
477      remaining items.  The first item is the first key, the second item
478      is the first value, the third item is the second key, and so on.
479      A map that has duplicate keys may be well-formed, but it is not
480      valid, and thus it causes indeterminate decoding; see also
481      Section 3.7.
482-}
483
484mapLenMP :: P.BoundedPrim Word
485mapLenMP =
486    condB (<= 0x17)       (fromIntegral . (0xa0 +) >$< header) $
487    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0xb8 P.word8) $
488    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0xb9 P.word16BE) $
489    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0xba P.word32BE) $
490                          (fromIntegral >$< withConstHeader 0xbb P.word64BE)
491
492mapBeginMP :: P.BoundedPrim ()
493mapBeginMP = constHeader 0xbf
494
495{-
496   Major type 6:  optional semantic tagging of other major types.
497
498      In CBOR, a data item can optionally be preceded by a tag to give it
499      additional semantics while retaining its structure.  The tag is major
500      type 6, and represents an integer number as indicated by the tag's
501      integer value; the (sole) data item is carried as content data.
502
503      The initial bytes of the tag follow the rules for positive integers
504      (major type 0).
505-}
506
507tagMP :: P.BoundedPrim Word
508tagMP =
509    condB (<= 0x17)       (fromIntegral . (0xc0 +) >$< header) $
510    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0xd8 P.word8) $
511    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0xd9 P.word16BE) $
512#if defined(ARCH_64bit)
513    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0xda P.word32BE) $
514                          (fromIntegral >$< withConstHeader 0xdb P.word64BE)
515#else
516                          (fromIntegral >$< withConstHeader 0xda P.word32BE)
517#endif
518
519tag64MP :: P.BoundedPrim Word64
520tag64MP =
521    condB (<= 0x17)       (fromIntegral . (0xc0 +) >$< header) $
522    condB (<= 0xff)       (fromIntegral >$< withConstHeader 0xd8 P.word8) $
523    condB (<= 0xffff)     (fromIntegral >$< withConstHeader 0xd9 P.word16BE) $
524    condB (<= 0xffffffff) (fromIntegral >$< withConstHeader 0xda P.word32BE) $
525                          (fromIntegral >$< withConstHeader 0xdb P.word64BE)
526
527{-
528   Major type 7:  floating-point numbers and simple data types that need
529      no content, as well as the "break" stop code.
530
531      Major type 7 is for two types of data: floating-point numbers and
532      "simple values" that do not need any content.  Each value of the
533      5-bit additional information in the initial byte has its own separate
534      meaning, as defined in Table 1.  Like the major types for integers,
535      items of this major type do not carry content data; all the
536      information is in the initial bytes.
537
538    +-------------+--------------------------------------------------+
539    | 5-Bit Value | Semantics                                        |
540    +-------------+--------------------------------------------------+
541    | 0..23       | Simple value (value 0..23)                       |
542    |             |                                                  |
543    | 24          | Simple value (value 32..255 in following byte)   |
544    |             |                                                  |
545    | 25          | IEEE 754 Half-Precision Float (16 bits follow)   |
546    |             |                                                  |
547    | 26          | IEEE 754 Single-Precision Float (32 bits follow) |
548    |             |                                                  |
549    | 27          | IEEE 754 Double-Precision Float (64 bits follow) |
550    |             |                                                  |
551    | 28-30       | (Unassigned)                                     |
552    |             |                                                  |
553    | 31          | "break" stop code for indefinite-length items    |
554    +-------------+--------------------------------------------------+
555-}
556
557simpleMP :: P.BoundedPrim Word8
558simpleMP =
559    condB (<= 0x17) ((0xe0 +) >$< header) $
560                    (withConstHeader 0xf8 P.word8)
561
562falseMP :: P.BoundedPrim ()
563falseMP = constHeader 0xf4
564
565trueMP :: P.BoundedPrim ()
566trueMP = constHeader 0xf5
567
568nullMP :: P.BoundedPrim ()
569nullMP = constHeader 0xf6
570
571undefMP :: P.BoundedPrim ()
572undefMP = constHeader 0xf7
573
574-- Canonical encoding of a NaN as per RFC 7049, section 3.9.
575canonicalNaN :: PI.BoundedPrim a
576canonicalNaN = P.liftFixedToBounded $ const (0xf9, (0x7e, 0x00))
577                                   >$< P.word8 >*< P.word8 >*< P.word8
578
579halfMP :: P.BoundedPrim Float
580halfMP = condB isNaN canonicalNaN
581                     (floatToWord16 >$< withConstHeader 0xf9 P.word16BE)
582
583floatMP :: P.BoundedPrim Float
584floatMP = condB isNaN canonicalNaN
585                      (withConstHeader 0xfa P.floatBE)
586
587doubleMP :: P.BoundedPrim Double
588doubleMP = condB isNaN canonicalNaN
589                       (withConstHeader 0xfb P.doubleBE)
590
591breakMP :: P.BoundedPrim ()
592breakMP = constHeader 0xff
593
594#if defined(OPTIMIZE_GMP)
595-- ---------------------------------------- --
596-- Implementation optimized for integer-gmp --
597-- ---------------------------------------- --
598
599-- Below is where we try to abstract over the differences between the legacy
600-- integer-gmp interface and ghc-bignum, shipped in GHC >= 9.0.
601
602-- | Write the limbs of a 'BigNat' to the given address in big-endian byte
603-- ordering.
604exportBigNatToAddr :: BigNat -> Addr# -> IO Word
605
606#if defined(HAVE_GHC_BIGNUM)
607
608pattern SmallInt  n = GHC.Num.Integer.IS n
609pattern PosBigInt n = GHC.Num.Integer.IP n
610pattern NegBigInt n = GHC.Num.Integer.IN n
611
612bigNatSizeInBytes :: GHC.Num.BigNat.BigNat -> Word
613bigNatSizeInBytes bigNat =
614  Gmp.bigNatSizeInBase 256 (GHC.Num.BigNat.unBigNat bigNat)
615
616bigNatMP :: GHC.Num.BigNat.BigNat# -> B.Builder
617bigNatMP n = P.primBounded header 0xc2 <> bigNatToBuilder (GHC.Num.BigNat.BN# n)
618
619negBigNatMP :: GHC.Num.BigNat.BigNat# -> B.Builder
620negBigNatMP n =
621  -- If value `n` is stored in CBOR, it is interpreted as -1 - n. Since BigNat
622  -- already represents n (note: it's unsigned), we simply decrement it to get
623  -- the correct encoding.
624     P.primBounded header 0xc3
625  <> bigNatToBuilder (subtractOneBigNat (GHC.Num.BigNat.BN# n))
626  where
627    subtractOneBigNat (GHC.Num.BigNat.BN# nat) =
628      case GHC.Num.BigNat.bigNatSubWord# nat 1## of
629        (#       | r #) -> GHC.Num.BigNat.BN# r
630        (# (# #) | #)   -> error "subtractOneBigNat: impossible"
631
632exportBigNatToAddr (GHC.Num.BigNat.BN# b) addr = IO $ \s ->
633  -- The last parameter (`1#`) makes the export function use big endian encoding.
634  case GHC.Num.BigNat.bigNatToAddr# b addr 1# s of
635    (# s', w #) -> (# s', W# w #)
636#else
637
638pattern SmallInt  n = Gmp.S# n
639pattern PosBigInt n = Gmp.Jp# n
640pattern NegBigInt n = Gmp.Jn# n
641
642bigNatSizeInBytes :: BigNat -> Word
643bigNatSizeInBytes bigNat = W# (Gmp.sizeInBaseBigNat bigNat 256#)
644
645bigNatMP :: BigNat -> B.Builder
646bigNatMP n = P.primBounded header 0xc2 <> bigNatToBuilder n
647
648negBigNatMP :: BigNat -> B.Builder
649negBigNatMP n =
650  -- If value `n` is stored in CBOR, it is interpreted as -1 - n. Since BigNat
651  -- already represents n (note: it's unsigned), we simply decrement it to get
652  -- the correct encoding.
653     P.primBounded header 0xc3
654  <> bigNatToBuilder (subtractOneBigNat n)
655  where
656    subtractOneBigNat n = Gmp.minusBigNatWord n (int2Word# 1#)
657
658exportBigNatToAddr bigNat addr# =
659  -- The last parameter (`1#`) makes the export function use big endian encoding.
660  Gmp.exportBigNatToAddr bigNat addr# 1#
661#endif
662
663bigNatToBuilder :: BigNat -> B.Builder
664bigNatToBuilder = bigNatBuilder
665  where
666    bigNatBuilder :: BigNat -> B.Builder
667    bigNatBuilder bigNat =
668        let sizeW = bigNatSizeInBytes bigNat
669#if MIN_VERSION_bytestring(0,10,12)
670            bounded = PI.boundedPrim (fromIntegral sizeW) (dumpBigNat sizeW)
671#else
672            bounded = PI.boudedPrim (fromIntegral sizeW) (dumpBigNat sizeW)
673#endif
674        in P.primBounded bytesLenMP sizeW <> P.primBounded bounded bigNat
675
676    dumpBigNat :: Word -> BigNat -> Ptr a -> IO (Ptr a)
677    dumpBigNat (W# sizeW#) bigNat ptr@(Ptr addr#) = do
678        (W# written#) <- exportBigNatToAddr bigNat addr#
679        let !newPtr = ptr `plusPtr` (I# (word2Int# written#))
680            sanity = isTrue# (sizeW# `eqWord#` written#)
681        return $ assert sanity newPtr
682
683#else
684
685-- ---------------------- --
686-- Generic implementation --
687-- ---------------------- --
688integerMP :: Integer -> B.Builder
689integerMP n
690  | n >= 0    = P.primBounded header 0xc2 <> integerToBuilder n
691  | otherwise = P.primBounded header 0xc3 <> integerToBuilder (-1 - n)
692
693integerToBuilder :: Integer -> B.Builder
694integerToBuilder n = bytesMP (integerToBytes n)
695
696integerToBytes :: Integer -> S.ByteString
697integerToBytes n0
698  | n0 == 0   = S.pack [0]
699  | otherwise = S.pack (reverse (go n0))
700  where
701    go n | n == 0    = []
702         | otherwise = narrow n : go (n `shiftR` 8)
703
704    narrow :: Integer -> Word8
705    narrow = fromIntegral
706#endif
707