1 /*
2 * CBOR bindings for Duktape.
3 *
4 * https://tools.ietf.org/html/rfc7049
5 */
6
7 #include <math.h>
8 #include <string.h>
9 #include "duktape.h"
10 #include "duk_cbor.h"
11
12 /* #define DUK_CBOR_DPRINT */
13 /* #define DUK_CBOR_STRESS */
14
15 #if 1
16 #define DUK_CBOR_ASSERT(x) do {} while (0)
17 #else
18 #include <stdio.h>
19 #include <stdlib.h>
20 #define DUK_CBOR_ASSERT(x) do { \
21 if (!(x)) { \
22 fprintf(stderr, "ASSERT FAILED on %s:%d\n", __FILE__, __LINE__); \
23 fflush(stderr); \
24 abort(); \
25 } \
26 } while (0)
27 #endif
28
29 #if 0
30 #define DUK_CBOR_LIKELY(x) __builtin_expect (!!(x), 1)
31 #define DUK_CBOR_UNLIKELY(x) __builtin_expect (!!(x), 0)
32 #define DUK_CBOR_INLINE inline
33 #define DUK_CBOR_NOINLINE __attribute__((noinline))
34 #else
35 #define DUK_CBOR_LIKELY(x) (x)
36 #define DUK_CBOR_UNLIKELY(x) (x)
37 #define DUK_CBOR_INLINE
38 #define DUK_CBOR_NOINLINE
39 #endif
40
41 /* #define DUK_CBOR_GCC_BUILTINS */
42
43 /* Default behavior for encoding strings: use CBOR text string if string
44 * is UTF-8 compatible, otherwise use CBOR byte string. These defines
45 * can be used to force either type for all strings. Using text strings
46 * for non-UTF-8 data is technically invalid CBOR.
47 */
48 /* #define DUK_CBOR_TEXT_STRINGS */
49 /* #define DUK_CBOR_BYTE_STRINGS */
50
51 /* Misc. defines. */
52 /* #define DUK_CBOR_PREFER_SIZE */
53 /* #define DUK_CBOR_DOUBLE_AS_IS */
54 /* #define DUK_CBOR_DECODE_FASTPATH */
55
56 typedef struct {
57 duk_context *ctx;
58 duk_uint8_t *ptr;
59 duk_uint8_t *buf;
60 duk_uint8_t *buf_end;
61 duk_size_t len;
62 duk_idx_t idx_buf;
63 } duk_cbor_encode_context;
64
65 typedef struct {
66 duk_context *ctx;
67 const duk_uint8_t *buf;
68 duk_size_t off;
69 duk_size_t len;
70 } duk_cbor_decode_context;
71
72 typedef union {
73 duk_uint8_t x[8];
74 duk_uint16_t s[4];
75 duk_uint32_t i[2];
76 #if 0
77 duk_uint64_t i64[1];
78 #endif
79 double d;
80 } duk_cbor_dblunion;
81
82 typedef union {
83 duk_uint8_t x[4];
84 duk_uint16_t s[2];
85 duk_uint32_t i[1];
86 float f;
87 } duk_cbor_fltunion;
88
89 static void duk__cbor_encode_value(duk_cbor_encode_context *enc_ctx);
90 static void duk__cbor_decode_value(duk_cbor_decode_context *dec_ctx);
91
92 /*
93 * Misc
94 */
95
96 /* XXX: These are sometimes portability concerns and would be nice to expose
97 * from Duktape itself as portability helpers.
98 */
99
duk__cbor_signbit(double d)100 static int duk__cbor_signbit(double d) {
101 return signbit(d);
102 }
103
duk__cbor_fpclassify(double d)104 static int duk__cbor_fpclassify(double d) {
105 return fpclassify(d);
106 }
107
duk__cbor_isnan(double d)108 static int duk__cbor_isnan(double d) {
109 return isnan(d);
110 }
111
duk__cbor_isinf(double d)112 static int duk__cbor_isinf(double d) {
113 return isinf(d);
114 }
115
duk__cbor_double_to_uint32(double d)116 static duk_uint32_t duk__cbor_double_to_uint32(double d) {
117 /* Out of range casts are undefined behavior, so caller must avoid. */
118 DUK_CBOR_ASSERT(d >= 0.0 && d <= 4294967295.0);
119 return (duk_uint32_t) d;
120 }
121
122 /* Endian detection. Technically happens at runtime, but in practice
123 * resolves at compile time to a constant and gets inlined.
124 */
125 #define DUK__CBOR_LITTLE_ENDIAN 1
126 #define DUK__CBOR_MIXED_ENDIAN 2
127 #define DUK__CBOR_BIG_ENDIAN 3
128
duk__cbor_check_endian(void)129 static int duk__cbor_check_endian(void) {
130 duk_cbor_dblunion u;
131
132 /* >>> struct.pack('>d', 1.23456789).encode('hex')
133 * '3ff3c0ca4283de1b'
134 */
135
136 u.d = 1.23456789;
137 if (u.x[0] == 0x1bU) {
138 return DUK__CBOR_LITTLE_ENDIAN;
139 } else if (u.x[0] == 0x3fU) {
140 return DUK__CBOR_BIG_ENDIAN;
141 } else if (u.x[0] == 0xcaU) {
142 return DUK__CBOR_MIXED_ENDIAN;
143 } else {
144 DUK_CBOR_ASSERT(0);
145 }
146 return 0;
147 }
148
duk__cbor_bswap16(duk_uint16_t x)149 static DUK_CBOR_INLINE duk_uint16_t duk__cbor_bswap16(duk_uint16_t x) {
150 #if defined(DUK_CBOR_GCC_BUILTINS)
151 return __builtin_bswap16(x);
152 #else
153 /* XXX: matches, DUK_BSWAP16(), use that if exposed. */
154 return (x >> 8) | (x << 8);
155 #endif
156 }
157
duk__cbor_bswap32(duk_uint32_t x)158 static DUK_CBOR_INLINE duk_uint32_t duk__cbor_bswap32(duk_uint32_t x) {
159 #if defined(DUK_CBOR_GCC_BUILTINS)
160 return __builtin_bswap32(x);
161 #else
162 /* XXX: matches, DUK_BSWAP32(), use that if exposed. */
163 return (x >> 24) | ((x >> 8) & 0xff00UL) | ((x << 8) & 0xff0000UL) | (x << 24);
164 #endif
165 }
166
167 #if 0
168 static duk_uint64_t duk__cbor_bswap64(duk_uint64_t x) {
169 /* XXX */
170 }
171 #endif
172
duk__cbor_write_uint16_big(duk_uint8_t * p,duk_uint16_t x)173 static DUK_CBOR_INLINE void duk__cbor_write_uint16_big(duk_uint8_t *p, duk_uint16_t x) {
174 #if 0
175 *p++ = (duk_uint8_t) ((x >> 8) & 0xffU);
176 *p++ = (duk_uint8_t) (x & 0xffU);
177 #endif
178 duk_uint16_t a;
179
180 switch (duk__cbor_check_endian()) {
181 case DUK__CBOR_LITTLE_ENDIAN:
182 case DUK__CBOR_MIXED_ENDIAN:
183 a = duk__cbor_bswap16(x);
184 (void) memcpy((void *) p, (const void *) &a, 2);
185 break;
186 case DUK__CBOR_BIG_ENDIAN:
187 a = x;
188 (void) memcpy((void *) p, (const void *) &a, 2);
189 break;
190 default:
191 DUK_CBOR_ASSERT(0);
192 }
193 }
194
duk__cbor_read_uint16_big(const duk_uint8_t * p)195 static DUK_CBOR_INLINE duk_uint16_t duk__cbor_read_uint16_big(const duk_uint8_t *p) {
196 duk_uint16_t a, x;
197
198 #if 0
199 x = (((duk_uint16_t) p[0]) << 8U) +
200 ((duk_uint16_t) p[1]);
201 #endif
202 switch (duk__cbor_check_endian()) {
203 case DUK__CBOR_LITTLE_ENDIAN:
204 case DUK__CBOR_MIXED_ENDIAN:
205 (void) memcpy((void *) &a, (const void *) p, 2);
206 x = duk__cbor_bswap16(a);
207 break;
208 case DUK__CBOR_BIG_ENDIAN:
209 (void) memcpy((void *) &a, (const void *) p, 2);
210 x = a;
211 break;
212 default:
213 DUK_CBOR_ASSERT(0);
214 x = 0;
215 }
216 return x;
217 }
218
duk__cbor_write_uint32_big(duk_uint8_t * p,duk_uint32_t x)219 static DUK_CBOR_INLINE void duk__cbor_write_uint32_big(duk_uint8_t *p, duk_uint32_t x) {
220 #if 0
221 *p++ = (duk_uint8_t) ((x >> 24) & 0xffU);
222 *p++ = (duk_uint8_t) ((x >> 16) & 0xffU);
223 *p++ = (duk_uint8_t) ((x >> 8) & 0xffU);
224 *p++ = (duk_uint8_t) (x & 0xffU);
225 #endif
226 duk_uint32_t a;
227
228 switch (duk__cbor_check_endian()) {
229 case DUK__CBOR_LITTLE_ENDIAN:
230 case DUK__CBOR_MIXED_ENDIAN:
231 a = duk__cbor_bswap32(x);
232 (void) memcpy((void *) p, (const void *) &a, 4);
233 break;
234 case DUK__CBOR_BIG_ENDIAN:
235 a = x;
236 (void) memcpy((void *) p, (const void *) &a, 4);
237 break;
238 default:
239 DUK_CBOR_ASSERT(0);
240 }
241 }
242
duk__cbor_read_uint32_big(const duk_uint8_t * p)243 static DUK_CBOR_INLINE duk_uint32_t duk__cbor_read_uint32_big(const duk_uint8_t *p) {
244 duk_uint32_t a, x;
245
246 #if 0
247 x = (((duk_uint32_t) p[0]) << 24U) +
248 (((duk_uint32_t) p[1]) << 16U) +
249 (((duk_uint32_t) p[2]) << 8U) +
250 ((duk_uint32_t) p[3]);
251 #endif
252 switch (duk__cbor_check_endian()) {
253 case DUK__CBOR_LITTLE_ENDIAN:
254 case DUK__CBOR_MIXED_ENDIAN:
255 (void) memcpy((void *) &a, (const void *) p, 4);
256 x = duk__cbor_bswap32(a);
257 break;
258 case DUK__CBOR_BIG_ENDIAN:
259 (void) memcpy((void *) &a, (const void *) p, 4);
260 x = a;
261 break;
262 default:
263 DUK_CBOR_ASSERT(0);
264 x = 0;
265 }
266 return x;
267 }
268
duk__cbor_write_double_big(duk_uint8_t * p,double x)269 static DUK_CBOR_INLINE void duk__cbor_write_double_big(duk_uint8_t *p, double x) {
270 duk_cbor_dblunion u;
271 duk_uint32_t a, b;
272
273 u.d = x;
274
275 switch (duk__cbor_check_endian()) {
276 case DUK__CBOR_LITTLE_ENDIAN:
277 #if 0
278 u.i64[0] = duk__cbor_bswap64(u.i64[0]);
279 (void) memcpy((void *) p, (const void *) u.x, 8);
280 #endif
281 a = u.i[0];
282 b = u.i[1];
283 u.i[0] = duk__cbor_bswap32(b);
284 u.i[1] = duk__cbor_bswap32(a);
285 (void) memcpy((void *) p, (const void *) u.x, 8);
286 break;
287 case DUK__CBOR_MIXED_ENDIAN:
288 a = u.i[0];
289 b = u.i[1];
290 u.i[0] = duk__cbor_bswap32(a);
291 u.i[1] = duk__cbor_bswap32(b);
292 (void) memcpy((void *) p, (const void *) u.x, 8);
293 break;
294 case DUK__CBOR_BIG_ENDIAN:
295 (void) memcpy((void *) p, (const void *) u.x, 8);
296 break;
297 default:
298 DUK_CBOR_ASSERT(0);
299 }
300 }
301
duk__cbor_write_float_big(duk_uint8_t * p,float x)302 static DUK_CBOR_INLINE void duk__cbor_write_float_big(duk_uint8_t *p, float x) {
303 duk_cbor_fltunion u;
304 duk_uint32_t a;
305
306 u.f = x;
307 switch (duk__cbor_check_endian()) {
308 case DUK__CBOR_LITTLE_ENDIAN:
309 case DUK__CBOR_MIXED_ENDIAN:
310 a = u.i[0];
311 u.i[0] = duk__cbor_bswap32(a);
312 (void) memcpy((void *) p, (const void *) u.x, 4);
313 break;
314 case DUK__CBOR_BIG_ENDIAN:
315 (void) memcpy((void *) p, (const void *) u.x, 4);
316 break;
317 default:
318 DUK_CBOR_ASSERT(0);
319 }
320 }
321
duk__cbor_dblunion_host_to_little(duk_cbor_dblunion * u)322 static DUK_CBOR_INLINE void duk__cbor_dblunion_host_to_little(duk_cbor_dblunion *u) {
323 duk_uint32_t a, b;
324
325 switch (duk__cbor_check_endian()) {
326 case DUK__CBOR_LITTLE_ENDIAN:
327 /* HGFEDCBA -> HGFEDCBA */
328 break;
329 case DUK__CBOR_MIXED_ENDIAN:
330 /* DCBAHGFE -> HGFEDCBA */
331 a = u->i[0];
332 b = u->i[1];
333 u->i[0] = b;
334 u->i[1] = a;
335 break;
336 case DUK__CBOR_BIG_ENDIAN:
337 /* ABCDEFGH -> HGFEDCBA */
338 #if 0
339 u->i64[0] = duk__cbor_bswap64(u->i64[0]);
340 #endif
341 a = u->i[0];
342 b = u->i[1];
343 u->i[0] = duk__cbor_bswap32(b);
344 u->i[1] = duk__cbor_bswap32(a);
345 break;
346 }
347 }
348
duk__cbor_dblunion_little_to_host(duk_cbor_dblunion * u)349 static DUK_CBOR_INLINE void duk__cbor_dblunion_little_to_host(duk_cbor_dblunion *u) {
350 duk__cbor_dblunion_host_to_little(u);
351 }
352
duk__cbor_dblunion_host_to_big(duk_cbor_dblunion * u)353 static DUK_CBOR_INLINE void duk__cbor_dblunion_host_to_big(duk_cbor_dblunion *u) {
354 duk_uint32_t a, b;
355
356 switch (duk__cbor_check_endian()) {
357 case DUK__CBOR_LITTLE_ENDIAN:
358 /* HGFEDCBA -> ABCDEFGH */
359 #if 0
360 u->i64[0] = duk__cbor_bswap64(u->i64[0]);
361 #else
362 a = u->i[0];
363 b = u->i[1];
364 u->i[0] = duk__cbor_bswap32(b);
365 u->i[1] = duk__cbor_bswap32(a);
366 #endif
367 break;
368 case DUK__CBOR_MIXED_ENDIAN:
369 /* DCBAHGFE -> ABCDEFGH */
370 a = u->i[0];
371 b = u->i[1];
372 u->i[0] = duk__cbor_bswap32(a);
373 u->i[1] = duk__cbor_bswap32(b);
374 break;
375 case DUK__CBOR_BIG_ENDIAN:
376 /* ABCDEFGH -> ABCDEFGH */
377 break;
378 }
379 }
380
duk__cbor_dblunion_big_to_host(duk_cbor_dblunion * u)381 static DUK_CBOR_INLINE void duk__cbor_dblunion_big_to_host(duk_cbor_dblunion *u) {
382 duk__cbor_dblunion_host_to_big(u);
383 }
384
duk__cbor_fltunion_host_to_big(duk_cbor_fltunion * u)385 static DUK_CBOR_INLINE void duk__cbor_fltunion_host_to_big(duk_cbor_fltunion *u) {
386 switch (duk__cbor_check_endian()) {
387 case DUK__CBOR_LITTLE_ENDIAN:
388 case DUK__CBOR_MIXED_ENDIAN:
389 /* DCBA -> ABCD */
390 u->i[0] = duk__cbor_bswap32(u->i[0]);
391 break;
392 case DUK__CBOR_BIG_ENDIAN:
393 /* ABCD -> ABCD */
394 break;
395 }
396 }
397
duk__cbor_fltunion_big_to_host(duk_cbor_fltunion * u)398 static DUK_CBOR_INLINE void duk__cbor_fltunion_big_to_host(duk_cbor_fltunion *u) {
399 duk__cbor_fltunion_host_to_big(u);
400 }
401
402 /*
403 * Encoding
404 */
405
duk__cbor_encode_error(duk_cbor_encode_context * enc_ctx)406 static void duk__cbor_encode_error(duk_cbor_encode_context *enc_ctx) {
407 (void) duk_type_error(enc_ctx->ctx, "cbor encode error");
408 }
409
410 /* Check whether a string is UTF-8 compatible or not. */
duk__cbor_is_utf8_compatible(const duk_uint8_t * buf,duk_size_t len)411 static int duk__cbor_is_utf8_compatible(const duk_uint8_t *buf, duk_size_t len) {
412 duk_size_t i = 0;
413 #if !defined(DUK_CBOR_PREFER_SIZE)
414 duk_size_t len_safe;
415 #endif
416
417 /* Many practical strings are ASCII only, so use a fast path check
418 * to check chunks of bytes at once with minimal branch cost.
419 */
420 #if !defined(DUK_CBOR_PREFER_SIZE)
421 len_safe = len & ~0x03UL;
422 for (; i < len_safe; i += 4) {
423 duk_uint8_t t = buf[i] | buf[i + 1] | buf[i + 2] | buf[i + 3];
424 if (DUK_CBOR_UNLIKELY((t & 0x80U) != 0U)) {
425 /* At least one byte was outside 0x00-0x7f, break
426 * out to slow path (and remain there).
427 *
428 * XXX: We could also deal with the problem character
429 * and resume fast path later.
430 */
431 break;
432 }
433 }
434 #endif
435
436 for (; i < len;) {
437 duk_uint8_t t;
438 duk_size_t left;
439 duk_size_t ncont;
440 duk_uint32_t cp;
441 duk_uint32_t mincp;
442
443 t = buf[i++];
444 if (DUK_CBOR_LIKELY((t & 0x80U) == 0U)) {
445 /* Fast path, ASCII. */
446 continue;
447 }
448
449 /* Non-ASCII start byte, slow path.
450 *
451 * 10xx xxxx -> continuation byte
452 * 110x xxxx + 1*CONT -> [0x80, 0x7ff]
453 * 1110 xxxx + 2*CONT -> [0x800, 0xffff], must reject [0xd800,0xdfff]
454 * 1111 0xxx + 3*CONT -> [0x10000, 0x10ffff]
455 */
456 left = len - i;
457 if (t <= 0xdfU) { /* 1101 1111 = 0xdf */
458 if (t <= 0xbfU) { /* 1011 1111 = 0xbf */
459 return 0;
460 }
461 ncont = 1;
462 mincp = 0x80UL;
463 cp = t & 0x1fU;
464 } else if (t <= 0xefU) { /* 1110 1111 = 0xef */
465 ncont = 2;
466 mincp = 0x800UL;
467 cp = t & 0x0fU;
468 } else if (t <= 0xf7U) { /* 1111 0111 = 0xf7 */
469 ncont = 3;
470 mincp = 0x10000UL;
471 cp = t & 0x07U;
472 } else {
473 return 0;
474 }
475 if (left < ncont) {
476 return 0;
477 }
478 while (ncont > 0U) {
479 t = buf[i++];
480 if ((t & 0xc0U) != 0x80U) { /* 10xx xxxx */
481 return 0;
482 }
483 cp = (cp << 6) + (t & 0x3fU);
484 ncont--;
485 }
486 if (cp < mincp || cp > 0x10ffffUL || (cp >= 0xd800UL && cp <= 0xdfffUL)) {
487 return 0;
488 }
489 }
490
491 return 1;
492 }
493
494 /* Check that a size_t is in uint32 range to avoid out-of-range casts. */
duk__cbor_encode_sizet_uint32_check(duk_cbor_encode_context * enc_ctx,duk_size_t len)495 static void duk__cbor_encode_sizet_uint32_check(duk_cbor_encode_context *enc_ctx, duk_size_t len) {
496 if (DUK_CBOR_UNLIKELY(sizeof(duk_size_t) > sizeof(duk_uint32_t) && len > (duk_size_t) DUK_UINT32_MAX)) {
497 duk__cbor_encode_error(enc_ctx);
498 }
499 }
500
duk__cbor_encode_ensure_slowpath(duk_cbor_encode_context * enc_ctx,duk_size_t len)501 static DUK_CBOR_NOINLINE void duk__cbor_encode_ensure_slowpath(duk_cbor_encode_context *enc_ctx, duk_size_t len) {
502 duk_size_t oldlen;
503 duk_size_t minlen;
504 duk_size_t newlen;
505 duk_uint8_t *p_new;
506 duk_size_t old_data_len;
507
508 DUK_CBOR_ASSERT(enc_ctx->ptr >= enc_ctx->buf);
509 DUK_CBOR_ASSERT(enc_ctx->buf_end >= enc_ctx->ptr);
510 DUK_CBOR_ASSERT(enc_ctx->buf_end >= enc_ctx->buf);
511
512 /* Overflow check.
513 *
514 * Limit example: 0xffffffffUL / 2U = 0x7fffffffUL, we reject >= 0x80000000UL.
515 */
516 oldlen = enc_ctx->len;
517 minlen = oldlen + len;
518 if (DUK_CBOR_UNLIKELY(oldlen > DUK_SIZE_MAX / 2U || minlen < oldlen)) {
519 duk__cbor_encode_error(enc_ctx);
520 }
521
522 #if defined(DUK_CBOR_STRESS)
523 newlen = oldlen + 1U;
524 #else
525 newlen = oldlen * 2U;
526 #endif
527 DUK_CBOR_ASSERT(newlen >= oldlen);
528
529 if (minlen > newlen) {
530 newlen = minlen;
531 }
532 DUK_CBOR_ASSERT(newlen >= oldlen);
533 DUK_CBOR_ASSERT(newlen >= minlen);
534 DUK_CBOR_ASSERT(newlen > 0U);
535
536 #if defined(DUK_CBOR_DPRINT)
537 fprintf(stderr, "cbor encode buffer resized to %ld\n", (long) newlen);
538 #endif
539
540 p_new = (duk_uint8_t *) duk_resize_buffer(enc_ctx->ctx, enc_ctx->idx_buf, newlen);
541 DUK_CBOR_ASSERT(p_new != NULL);
542 old_data_len = (duk_size_t) (enc_ctx->ptr - enc_ctx->buf);
543 enc_ctx->buf = p_new;
544 enc_ctx->buf_end = p_new + newlen;
545 enc_ctx->ptr = p_new + old_data_len;
546 enc_ctx->len = newlen;
547 }
548
duk__cbor_encode_ensure(duk_cbor_encode_context * enc_ctx,duk_size_t len)549 static DUK_CBOR_INLINE void duk__cbor_encode_ensure(duk_cbor_encode_context *enc_ctx, duk_size_t len) {
550 if (DUK_CBOR_LIKELY((duk_size_t) (enc_ctx->buf_end - enc_ctx->ptr) >= len)) {
551 return;
552 }
553 duk__cbor_encode_ensure_slowpath(enc_ctx, len);
554 }
555
duk__cbor_get_reserve(duk_cbor_encode_context * enc_ctx)556 static duk_size_t duk__cbor_get_reserve(duk_cbor_encode_context *enc_ctx) {
557 DUK_CBOR_ASSERT(enc_ctx->ptr >= enc_ctx->buf);
558 DUK_CBOR_ASSERT(enc_ctx->ptr <= enc_ctx->buf_end);
559 return (duk_size_t) (enc_ctx->buf_end - enc_ctx->ptr);
560 }
561
duk__cbor_encode_uint32(duk_cbor_encode_context * enc_ctx,duk_uint32_t u,duk_uint8_t base)562 static void duk__cbor_encode_uint32(duk_cbor_encode_context *enc_ctx, duk_uint32_t u, duk_uint8_t base) {
563 duk_uint8_t *p;
564
565 /* Caller must ensure space. */
566 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 4);
567
568 p = enc_ctx->ptr;
569 if (DUK_CBOR_LIKELY(u <= 23U)) {
570 *p++ = (duk_uint8_t) (base + (duk_uint8_t) u);
571 } else if (u <= 0xffUL) {
572 *p++ = base + 0x18U;
573 *p++ = (duk_uint8_t) u;
574 } else if (u <= 0xffffUL) {
575 *p++ = base + 0x19U;
576 duk__cbor_write_uint16_big(p, (duk_uint16_t) u);
577 p += 2;
578 } else {
579 *p++ = base + 0x1aU;
580 duk__cbor_write_uint32_big(p, u);
581 p += 4;
582 }
583 enc_ctx->ptr = p;
584 }
585
586 #if defined(DUK_CBOR_DOUBLE_AS_IS)
duk__cbor_encode_double(duk_cbor_encode_context * enc_ctx,double d)587 static void duk__cbor_encode_double(duk_cbor_encode_context *enc_ctx, double d) {
588 duk_uint8_t *p;
589
590 /* Caller must ensure space. */
591 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
592
593 p = enc_ctx->ptr;
594 *p++ = 0xfbU;
595 duk__cbor_write_double_big(p, d);
596 p += 8;
597 enc_ctx->ptr = p;
598 }
599 #else /* DUK_CBOR_DOUBLE_AS_IS */
duk__cbor_encode_double_fp(duk_cbor_encode_context * enc_ctx,double d)600 static void duk__cbor_encode_double_fp(duk_cbor_encode_context *enc_ctx, double d) {
601 duk_cbor_dblunion u;
602 duk_uint16_t u16;
603 duk_int16_t exp;
604 duk_uint8_t *p;
605
606 DUK_CBOR_ASSERT(duk__cbor_fpclassify(d) != FP_ZERO);
607
608 /* Caller must ensure space. */
609 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
610
611 /* Organize into little endian (no-op if platform is little endian). */
612 u.d = d;
613 duk__cbor_dblunion_host_to_little(&u);
614
615 /* Check if 'd' can represented as a normal half-float.
616 * Denormal half-floats could also be used, but that check
617 * isn't done now (denormal half-floats are decoded of course).
618 * So just check exponent range and that at most 10 significant
619 * bits (excluding implicit leading 1) are used in 'd'.
620 */
621 u16 = (((duk_uint16_t) u.x[7]) << 8) | ((duk_uint16_t) u.x[6]);
622 exp = (duk_int16_t) ((u16 & 0x7ff0U) >> 4) - 1023;
623
624 if (exp >= -14 && exp <= 15) {
625 /* Half-float normal exponents (excl. denormals).
626 *
627 * 7 6 5 4 3 2 1 0 (LE index)
628 * double: seeeeeee eeeemmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm
629 * half: seeeee mmmm mmmmmm00 00000000 00000000 00000000 00000000 00000000
630 */
631 int use_half_float;
632
633 use_half_float =
634 (u.x[0] == 0 && u.x[1] == 0 && u.x[2] == 0 && u.x[3] == 0 &&
635 u.x[4] == 0 && (u.x[5] & 0x03U) == 0);
636
637 if (use_half_float) {
638 duk_uint32_t t;
639
640 exp += 15;
641 t = (duk_uint32_t) (u.x[7] & 0x80U) << 8;
642 t += (duk_uint32_t) exp << 10;
643 t += ((duk_uint32_t) u.x[6] & 0x0fU) << 6;
644 t += ((duk_uint32_t) u.x[5]) >> 2;
645
646 /* seeeeemm mmmmmmmm */
647 p = enc_ctx->ptr;
648 *p++ = 0xf9U;
649 duk__cbor_write_uint16_big(p, (duk_uint16_t) t);
650 p += 2;
651 enc_ctx->ptr = p;
652 return;
653 }
654 }
655
656 /* Same check for plain float. Also no denormal support here. */
657 if (exp >= -126 && exp <= 127) {
658 /* Float normal exponents (excl. denormals).
659 *
660 * double: seeeeeee eeeemmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm
661 * float: seeee eeeemmmm mmmmmmmm mmmmmmmm mmm00000 00000000 00000000 00000000
662 */
663 int use_float;
664 duk_float_t d_float;
665
666 /* We could do this explicit mantissa check, but doing
667 * a double-float-double cast is fine because we've
668 * already verified that the exponent is in range so
669 * that the narrower cast is not undefined behavior.
670 */
671 #if 0
672 use_float =
673 (u.x[0] == 0 && u.x[1] == 0 && u.x[2] == 0 && (u.x[3] & 0xe0U) == 0);
674 #endif
675 d_float = (duk_float_t) d;
676 use_float = ((duk_double_t) d_float == d);
677 if (use_float) {
678 p = enc_ctx->ptr;
679 *p++ = 0xfaU;
680 duk__cbor_write_float_big(p, d_float);
681 p += 4;
682 enc_ctx->ptr = p;
683 return;
684 }
685 }
686
687 /* Special handling for NaN and Inf which we want to encode as
688 * half-floats. They share the same (maximum) exponent.
689 */
690 if (exp == 1024) {
691 DUK_CBOR_ASSERT(duk__cbor_isnan(d) || duk__cbor_isinf(d));
692 p = enc_ctx->ptr;
693 *p++ = 0xf9U;
694 if (duk__cbor_isnan(d)) {
695 /* Shortest NaN encoding is using a half-float. Lose the
696 * exact NaN bits in the process. IEEE double would be
697 * 7ff8 0000 0000 0000, i.e. a quiet NaN in most architectures
698 * (https://en.wikipedia.org/wiki/NaN#Encoding). The
699 * equivalent half float is 7e00.
700 */
701 *p++ = 0x7eU;
702 } else {
703 /* Shortest +/- Infinity encoding is using a half-float. */
704 if (duk__cbor_signbit(d)) {
705 *p++ = 0xfcU;
706 } else {
707 *p++ = 0x7cU;
708 }
709 }
710 *p++ = 0x00U;
711 enc_ctx->ptr = p;
712 return;
713 }
714
715 /* Cannot use half-float or float, encode as full IEEE double. */
716 p = enc_ctx->ptr;
717 *p++ = 0xfbU;
718 duk__cbor_write_double_big(p, d);
719 p += 8;
720 enc_ctx->ptr = p;
721 }
722
duk__cbor_encode_double(duk_cbor_encode_context * enc_ctx,double d)723 static void duk__cbor_encode_double(duk_cbor_encode_context *enc_ctx, double d) {
724 duk_uint8_t *p;
725 double d_floor;
726
727 /* Integers and floating point values of all types are conceptually
728 * equivalent in CBOR. Try to always choose the shortest encoding
729 * which is not always immediately obvious. For example, NaN and Inf
730 * can be most compactly represented as a half-float (assuming NaN
731 * bits are not preserved), and 0x1'0000'0000 as a single precision
732 * float. Shortest forms in preference order (prefer integer over
733 * float when equal length):
734 *
735 * uint 1 byte [0,23] (not -0)
736 * sint 1 byte [-24,-1]
737 * uint+1 2 bytes [24,255]
738 * sint+1 2 bytes [-256,-25]
739 * uint+2 3 bytes [256,65535]
740 * sint+2 3 bytes [-65536,-257]
741 * half-float 3 bytes -0, NaN, +/- Infinity, range [-65504,65504]
742 * uint+4 5 bytes [65536,4294967295]
743 * sint+4 5 bytes [-4294967296,-258]
744 * float 5 bytes range [-(1 - 2^(-24)) * 2^128, (1 - 2^(-24)) * 2^128]
745 * uint+8 9 bytes [4294967296,18446744073709551615]
746 * sint+8 9 bytes [-18446744073709551616,-4294967297]
747 * double 9 bytes
748 *
749 * For whole numbers (compatible with integers):
750 * - 1-byte or 2-byte uint/sint representation is preferred for
751 * [-256,255].
752 * - 3-byte uint/sint is preferred for [-65536,65535]. Half floats
753 * are never preferred because they have the same length.
754 * - 5-byte uint/sint is preferred for [-4294967296,4294967295].
755 * Single precision floats are never preferred, and half-floats
756 * don't reach above the 3-byte uint/sint range so they're never
757 * preferred.
758 * - So, for all integers up to signed/unsigned 32-bit range the
759 * preferred encoding is always an integer uint/sint.
760 * - For integers above 32 bits the situation is more complicated.
761 * Half-floats are never useful for them because of their limited
762 * range, but IEEE single precision floats (5 bytes encoded) can
763 * represent some integers between the 32-bit and 64-bit ranges
764 * which require 9 bytes as a uint/sint.
765 *
766 * For floating point values not compatible with integers, the
767 * preferred encoding is quite clear:
768 * - For +Inf/-Inf use half-float.
769 * - For NaN use a half-float, assuming NaN bits ("payload") is
770 * not worth preserving. Duktape doesn't in general guarantee
771 * preservation of the NaN payload so using a half-float seems
772 * consistent with that.
773 * - For remaining values, prefer the shortest form which doesn't
774 * lose any precision. For normal half-floats and single precision
775 * floats this is simple: just check exponent and mantissa bits
776 * using a fixed mask. For denormal half-floats and single
777 * precision floats the check is a bit more complicated: a normal
778 * IEEE double can sometimes be represented as a denormal
779 * half-float or single precision float.
780 *
781 * https://en.wikipedia.org/wiki/Half-precision_floating-point_format#IEEE_754_half-precision_binary_floating-point_format:_binary16
782 */
783
784 /* Caller must ensure space. */
785 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
786
787 /* Most important path is integers. The floor() test will be true
788 * for Inf too (but not NaN).
789 */
790 d_floor = floor(d); /* identity if d is +/- 0.0, NaN, or +/- Infinity */
791 if (DUK_CBOR_LIKELY(d_floor == d)) {
792 DUK_CBOR_ASSERT(!duk__cbor_isnan(d)); /* NaN == NaN compares false. */
793 if (duk__cbor_signbit(d)) {
794 if (d >= -4294967296.0) {
795 d = -1.0 - d;
796 if (d >= 0.0) {
797 DUK_CBOR_ASSERT(d >= 0.0);
798 duk__cbor_encode_uint32(enc_ctx, duk__cbor_double_to_uint32(d), 0x20U);
799 return;
800 }
801
802 /* Input was negative zero, d == -1.0 < 0.0.
803 * Shortest -0 is using half-float.
804 */
805 p = enc_ctx->ptr;
806 *p++ = 0xf9U;
807 *p++ = 0x80U;
808 *p++ = 0x00U;
809 enc_ctx->ptr = p;
810 return;
811 }
812 } else {
813 if (d <= 4294967295.0) {
814 /* Positive zero needs no special handling. */
815 DUK_CBOR_ASSERT(d >= 0.0);
816 duk__cbor_encode_uint32(enc_ctx, duk__cbor_double_to_uint32(d), 0x00U);
817 return;
818 }
819 }
820 }
821
822 /* 64-bit integers are not supported at present. So
823 * we also don't need to deal with choosing between a
824 * 64-bit uint/sint representation vs. IEEE double or
825 * float.
826 */
827
828 DUK_CBOR_ASSERT(duk__cbor_fpclassify(d) != FP_ZERO);
829 duk__cbor_encode_double_fp(enc_ctx, d);
830 }
831 #endif /* DUK_CBOR_DOUBLE_AS_IS */
832
duk__cbor_encode_string_top(duk_cbor_encode_context * enc_ctx)833 static void duk__cbor_encode_string_top(duk_cbor_encode_context *enc_ctx) {
834 const duk_uint8_t *str;
835 duk_size_t len;
836 duk_uint8_t *p;
837
838 /* CBOR differentiates between UTF-8 text strings and byte strings.
839 * Text strings MUST be valid UTF-8, so not all Duktape strings can
840 * be encoded as valid CBOR text strings. Possible behaviors:
841 *
842 * 1. Use text string when input is valid UTF-8, otherwise use
843 * byte string (maybe tagged to indicate it was an extended
844 * UTF-8 string).
845 * 2. Always use text strings, but sanitize input string so that
846 * invalid UTF-8 is replaced with U+FFFD for example. Combine
847 * surrogates whenever possible.
848 * 3. Always use byte strings. This is simple and produces valid
849 * CBOR, but isn't ideal for interoperability.
850 * 4. Always use text strings, even for invalid UTF-8 such as
851 * codepoints in the surrogate pair range. This is simple but
852 * produces technically invalid CBOR for non-UTF-8 strings which
853 * may affect interoperability.
854 *
855 * Current default is 1; can be changed with defines.
856 */
857
858 /* Caller must ensure space. */
859 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
860
861 str = (const duk_uint8_t *) duk_require_lstring(enc_ctx->ctx, -1, &len);
862 if (duk_is_symbol(enc_ctx->ctx, -1)) {
863 /* Symbols, encode as an empty table for now. This matches
864 * the behavior of cbor-js.
865 *
866 * XXX: Maybe encode String() coercion with a tag?
867 * XXX: Option to keep enough information to recover
868 * Symbols when decoding (this is not always desirable).
869 */
870 p = enc_ctx->ptr;
871 *p++ = 0xa0U;
872 enc_ctx->ptr = p;
873 return;
874 }
875
876 duk__cbor_encode_sizet_uint32_check(enc_ctx, len);
877 #if defined(DUK_CBOR_TEXT_STRINGS)
878 duk__cbor_encode_uint32(enc_ctx, (duk_uint32_t) len, 0x60U);
879 #elif defined(DUK_CBOR_BYTE_STRINGS)
880 duk__cbor_encode_uint32(enc_ctx, (duk_uint32_t) len, 0x40U);
881 #else
882 duk__cbor_encode_uint32(enc_ctx, (duk_uint32_t) len,
883 (DUK_CBOR_LIKELY(duk__cbor_is_utf8_compatible(str, len)) ? 0x60U : 0x40U));
884 #endif
885 duk__cbor_encode_ensure(enc_ctx, len);
886 p = enc_ctx->ptr;
887 (void) memcpy((void *) p, (const void *) str, len);
888 p += len;
889 enc_ctx->ptr = p;
890 }
891
duk__cbor_encode_object(duk_cbor_encode_context * enc_ctx)892 static void duk__cbor_encode_object(duk_cbor_encode_context *enc_ctx) {
893 duk_uint8_t *buf;
894 duk_size_t len;
895 duk_uint8_t *p;
896 duk_size_t i;
897 duk_size_t off_ib;
898 duk_uint32_t count;
899
900 /* Caller must ensure space. */
901 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
902
903 /* XXX: Support for specific built-ins like Date and RegExp. */
904 if (duk_is_array(enc_ctx->ctx, -1)) {
905 /* Shortest encoding for arrays >= 256 in length is actually
906 * the indefinite length one (3 or more bytes vs. 2 bytes).
907 * We still use the definite length version because it is
908 * more decoding friendly.
909 */
910 len = duk_get_length(enc_ctx->ctx, -1);
911 duk__cbor_encode_sizet_uint32_check(enc_ctx, len);
912 duk__cbor_encode_uint32(enc_ctx, (duk_uint32_t) len, 0x80U);
913 for (i = 0; i < len; i++) {
914 duk_get_prop_index(enc_ctx->ctx, -1, (duk_uarridx_t) i);
915 duk__cbor_encode_value(enc_ctx);
916 }
917 } else if (duk_is_buffer_data(enc_ctx->ctx, -1)) {
918 /* XXX: Tag buffer data?
919 * XXX: Encode typed arrays as integer arrays rather
920 * than buffer data as is?
921 */
922 buf = (duk_uint8_t *) duk_require_buffer_data(enc_ctx->ctx, -1, &len);
923 duk__cbor_encode_sizet_uint32_check(enc_ctx, len);
924 duk__cbor_encode_uint32(enc_ctx, (duk_uint32_t) len, 0x40U);
925 duk__cbor_encode_ensure(enc_ctx, len);
926 p = enc_ctx->ptr;
927 (void) memcpy((void *) p, (const void *) buf, len);
928 p += len;
929 enc_ctx->ptr = p;
930 } else {
931 /* We don't know the number of properties in advance
932 * but would still like to encode at least small
933 * objects without indefinite length. Emit an
934 * indefinite length byte initially, and if the final
935 * property count is small enough to also fit in one
936 * byte, backpatch it later. Otherwise keep the
937 * indefinite length. This works well up to 23
938 * properties which is practical and good enough.
939 */
940 off_ib = (duk_size_t) (enc_ctx->ptr - enc_ctx->buf); /* XXX: get_offset? */
941 count = 0U;
942 p = enc_ctx->ptr;
943 *p++ = 0xa0U + 0x1fU; /* indefinite length */
944 enc_ctx->ptr = p;
945 duk_enum(enc_ctx->ctx, -1, DUK_ENUM_OWN_PROPERTIES_ONLY);
946 while (duk_next(enc_ctx->ctx, -1, 1 /*get_value*/)) {
947 duk_insert(enc_ctx->ctx, -2); /* [ ... key value ] -> [ ... value key ] */
948 duk__cbor_encode_value(enc_ctx);
949 duk__cbor_encode_value(enc_ctx);
950 count++;
951 if (count == 0U) {
952 duk__cbor_encode_error(enc_ctx);
953 }
954 }
955 duk_pop(enc_ctx->ctx);
956 if (count <= 0x17U) {
957 DUK_CBOR_ASSERT(off_ib < enc_ctx->len);
958 enc_ctx->buf[off_ib] = 0xa0U + (duk_uint8_t) count;
959 } else {
960 duk__cbor_encode_ensure(enc_ctx, 1);
961 p = enc_ctx->ptr;
962 *p++ = 0xffU; /* break */
963 enc_ctx->ptr = p;
964 }
965 }
966 }
967
duk__cbor_encode_buffer(duk_cbor_encode_context * enc_ctx)968 static void duk__cbor_encode_buffer(duk_cbor_encode_context *enc_ctx) {
969 duk_uint8_t *buf;
970 duk_size_t len;
971 duk_uint8_t *p;
972
973 /* Caller must ensure space. */
974 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
975
976 /* Tag buffer data? */
977 buf = (duk_uint8_t *) duk_require_buffer(enc_ctx->ctx, -1, &len);
978 duk__cbor_encode_sizet_uint32_check(enc_ctx, len);
979 duk__cbor_encode_uint32(enc_ctx, (duk_uint32_t) len, 0x40U);
980 duk__cbor_encode_ensure(enc_ctx, len);
981 p = enc_ctx->ptr;
982 (void) memcpy((void *) p, (const void *) buf, len);
983 p += len;
984 enc_ctx->ptr = p;
985 }
986
duk__cbor_encode_pointer(duk_cbor_encode_context * enc_ctx)987 static void duk__cbor_encode_pointer(duk_cbor_encode_context *enc_ctx) {
988 /* Pointers (void *) are challenging to encode. They can't
989 * be relied to be even 64-bit integer compatible (there are
990 * pointer models larger than that), nor can floats encode
991 * them. They could be encoded as strings (%p format) but
992 * that's not portable. They could be encoded as direct memory
993 * representations. Recovering pointers is non-portable in any
994 * case but it would be nice to be able to detect and recover
995 * compatible pointers.
996 *
997 * For now, encode as "(%p)" string, matching JX. There doesn't
998 * seem to be an appropriate tag, so pointers don't currently
999 * survive a CBOR encode/decode roundtrip intact.
1000 */
1001 const char *ptr;
1002
1003 ptr = duk_to_string(enc_ctx->ctx, -1);
1004 DUK_CBOR_ASSERT(ptr != NULL);
1005 duk_push_sprintf(enc_ctx->ctx, "(%s)", ptr);
1006 duk_remove(enc_ctx->ctx, -2);
1007 duk__cbor_encode_string_top(enc_ctx);
1008 }
1009
duk__cbor_encode_lightfunc(duk_cbor_encode_context * enc_ctx)1010 static void duk__cbor_encode_lightfunc(duk_cbor_encode_context *enc_ctx) {
1011 duk_uint8_t *p;
1012
1013 /* Caller must ensure space. */
1014 DUK_CBOR_ASSERT(duk__cbor_get_reserve(enc_ctx) >= 1 + 8);
1015
1016 /* For now encode as an empty object. */
1017 p = enc_ctx->ptr;
1018 *p++ = 0xa0U;
1019 enc_ctx->ptr = p;
1020 }
1021
duk__cbor_encode_value(duk_cbor_encode_context * enc_ctx)1022 static void duk__cbor_encode_value(duk_cbor_encode_context *enc_ctx) {
1023 duk_uint8_t *p;
1024
1025 /* Encode/decode cycle currently loses some type information.
1026 * This can be improved by registering custom tags with IANA.
1027 */
1028
1029 /* When working with deeply recursive structures, this is important
1030 * to ensure there's no effective depth limit.
1031 */
1032 duk_require_stack(enc_ctx->ctx, 4);
1033
1034 /* Reserve space for up to 64-bit types (1 initial byte + 8
1035 * followup bytes). This allows encoding of integers, floats,
1036 * string/buffer length fields, etc without separate checks
1037 * in each code path.
1038 */
1039 duk__cbor_encode_ensure(enc_ctx, 1 + 8);
1040
1041 switch (duk_get_type(enc_ctx->ctx, -1)) {
1042 case DUK_TYPE_UNDEFINED: {
1043 p = enc_ctx->ptr;
1044 *p++ = 0xf7;
1045 enc_ctx->ptr = p;
1046 break;
1047 }
1048 case DUK_TYPE_NULL: {
1049 p = enc_ctx->ptr;
1050 *p++ = 0xf6;
1051 enc_ctx->ptr = p;
1052 break;
1053 }
1054 case DUK_TYPE_BOOLEAN: {
1055 duk_uint8_t u8 = duk_get_boolean(enc_ctx->ctx, -1) ? 0xf5U : 0xf4U;
1056 p = enc_ctx->ptr;
1057 *p++ = u8;
1058 enc_ctx->ptr = p;
1059 break;
1060 }
1061 case DUK_TYPE_NUMBER: {
1062 duk__cbor_encode_double(enc_ctx, duk_get_number(enc_ctx->ctx, -1));
1063 break;
1064 }
1065 case DUK_TYPE_STRING: {
1066 duk__cbor_encode_string_top(enc_ctx);
1067 break;
1068 }
1069 case DUK_TYPE_OBJECT: {
1070 duk__cbor_encode_object(enc_ctx);
1071 break;
1072 }
1073 case DUK_TYPE_BUFFER: {
1074 duk__cbor_encode_buffer(enc_ctx);
1075 break;
1076 }
1077 case DUK_TYPE_POINTER: {
1078 duk__cbor_encode_pointer(enc_ctx);
1079 break;
1080 }
1081 case DUK_TYPE_LIGHTFUNC: {
1082 duk__cbor_encode_lightfunc(enc_ctx);
1083 break;
1084 }
1085 case DUK_TYPE_NONE:
1086 default:
1087 goto fail;
1088 }
1089
1090 duk_pop(enc_ctx->ctx);
1091 return;
1092
1093 fail:
1094 duk__cbor_encode_error(enc_ctx);
1095 }
1096
1097 /*
1098 * Decoding
1099 */
1100
duk__cbor_req_stack(duk_cbor_decode_context * dec_ctx)1101 static void duk__cbor_req_stack(duk_cbor_decode_context *dec_ctx) {
1102 duk_require_stack(dec_ctx->ctx, 4);
1103 }
1104
duk__cbor_decode_error(duk_cbor_decode_context * dec_ctx)1105 static void duk__cbor_decode_error(duk_cbor_decode_context *dec_ctx) {
1106 (void) duk_type_error(dec_ctx->ctx, "cbor decode error");
1107 }
1108
duk__cbor_decode_readbyte(duk_cbor_decode_context * dec_ctx)1109 static duk_uint8_t duk__cbor_decode_readbyte(duk_cbor_decode_context *dec_ctx) {
1110 DUK_CBOR_ASSERT(dec_ctx->off <= dec_ctx->len);
1111 if (DUK_CBOR_UNLIKELY(dec_ctx->len - dec_ctx->off < 1U)) {
1112 duk__cbor_decode_error(dec_ctx);
1113 }
1114 return dec_ctx->buf[dec_ctx->off++];
1115 }
1116
duk__cbor_decode_read_u16(duk_cbor_decode_context * dec_ctx)1117 static duk_uint16_t duk__cbor_decode_read_u16(duk_cbor_decode_context *dec_ctx) {
1118 duk_uint16_t res;
1119
1120 if (DUK_CBOR_UNLIKELY(dec_ctx->len - dec_ctx->off < 2U)) {
1121 duk__cbor_decode_error(dec_ctx);
1122 }
1123 res = duk__cbor_read_uint16_big(dec_ctx->buf + dec_ctx->off);
1124 dec_ctx->off += 2;
1125 return res;
1126 }
1127
duk__cbor_decode_read_u32(duk_cbor_decode_context * dec_ctx)1128 static duk_uint32_t duk__cbor_decode_read_u32(duk_cbor_decode_context *dec_ctx) {
1129 duk_uint32_t res;
1130
1131 if (DUK_CBOR_UNLIKELY(dec_ctx->len - dec_ctx->off < 4U)) {
1132 duk__cbor_decode_error(dec_ctx);
1133 }
1134 res = duk__cbor_read_uint32_big(dec_ctx->buf + dec_ctx->off);
1135 dec_ctx->off += 4;
1136 return res;
1137 }
1138
duk__cbor_decode_peekbyte(duk_cbor_decode_context * dec_ctx)1139 static duk_uint8_t duk__cbor_decode_peekbyte(duk_cbor_decode_context *dec_ctx) {
1140 if (DUK_CBOR_UNLIKELY(dec_ctx->off >= dec_ctx->len)) {
1141 duk__cbor_decode_error(dec_ctx);
1142 }
1143 return dec_ctx->buf[dec_ctx->off];
1144 }
1145
duk__cbor_decode_rewind(duk_cbor_decode_context * dec_ctx,duk_size_t len)1146 static void duk__cbor_decode_rewind(duk_cbor_decode_context *dec_ctx, duk_size_t len) {
1147 DUK_CBOR_ASSERT(len <= dec_ctx->off); /* Caller must ensure. */
1148 dec_ctx->off -= len;
1149 }
1150
1151 #if 0
1152 static void duk__cbor_decode_ensure(duk_cbor_decode_context *dec_ctx, duk_size_t len) {
1153 if (dec_ctx->off + len > dec_ctx->len) {
1154 duk__cbor_decode_error(dec_ctx);
1155 }
1156 }
1157 #endif
1158
duk__cbor_decode_consume(duk_cbor_decode_context * dec_ctx,duk_size_t len)1159 static const duk_uint8_t *duk__cbor_decode_consume(duk_cbor_decode_context *dec_ctx, duk_size_t len) {
1160 DUK_CBOR_ASSERT(dec_ctx->off <= dec_ctx->len);
1161 if (DUK_CBOR_LIKELY(dec_ctx->len - dec_ctx->off >= len)) {
1162 const duk_uint8_t *res = dec_ctx->buf + dec_ctx->off;
1163 dec_ctx->off += len;
1164 return res;
1165 }
1166
1167 duk__cbor_decode_error(dec_ctx); /* Not enough input. */
1168 return NULL;
1169 }
1170
duk__cbor_decode_checkbreak(duk_cbor_decode_context * dec_ctx)1171 static int duk__cbor_decode_checkbreak(duk_cbor_decode_context *dec_ctx) {
1172 if (duk__cbor_decode_peekbyte(dec_ctx) == 0xffU) {
1173 DUK_CBOR_ASSERT(dec_ctx->off < dec_ctx->len);
1174 dec_ctx->off++;
1175 #if 0
1176 (void) duk__cbor_decode_readbyte(dec_ctx);
1177 #endif
1178 return 1;
1179 }
1180 return 0;
1181 }
1182
duk__cbor_decode_push_aival_int(duk_cbor_decode_context * dec_ctx,duk_uint8_t ib,duk_bool_t negative)1183 static void duk__cbor_decode_push_aival_int(duk_cbor_decode_context *dec_ctx, duk_uint8_t ib, duk_bool_t negative) {
1184 duk_uint8_t ai;
1185 duk_uint32_t t, t1, t2;
1186 #if 0
1187 duk_uint64_t t3;
1188 #endif
1189 duk_double_t d1, d2;
1190 duk_double_t d;
1191
1192 ai = ib & 0x1fU;
1193 if (ai <= 0x17U) {
1194 t = ai;
1195 goto shared_exit;
1196 }
1197
1198 switch (ai) {
1199 case 0x18U: /* 1 byte */
1200 t = (duk_uint32_t) duk__cbor_decode_readbyte(dec_ctx);
1201 goto shared_exit;
1202 case 0x19U: /* 2 byte */
1203 t = (duk_uint32_t) duk__cbor_decode_read_u16(dec_ctx);
1204 goto shared_exit;
1205 case 0x1aU: /* 4 byte */
1206 t = (duk_uint32_t) duk__cbor_decode_read_u32(dec_ctx);
1207 goto shared_exit;
1208 case 0x1bU: /* 8 byte */
1209 /* For uint64 it's important to handle the -1.0 part before
1210 * casting to double: otherwise the adjustment might be lost
1211 * in the cast. Uses: -1.0 - d <=> -(d + 1.0).
1212 */
1213 t = (duk_uint32_t) duk__cbor_decode_read_u32(dec_ctx);
1214 t2 = t;
1215 t = (duk_uint32_t) duk__cbor_decode_read_u32(dec_ctx);
1216 t1 = t;
1217 #if 0
1218 t3 = (duk_uint64_t) t2 * 0x100000000ULL + (duk_uint64_t) t1;
1219 if (negative) {
1220 if (t3 == DUK_UINT64_MAX) {
1221 /* -(0xffff'ffff'ffff'ffffULL + 1) =
1222 * -0x1'0000'0000'0000'0000
1223 *
1224 * >>> -0x10000000000000000
1225 * -18446744073709551616L
1226 */
1227 return -18446744073709551616.0;
1228 } else {
1229 return -((duk_double_t) (t3 + 1ULL));
1230 }
1231 } else {
1232 return (duk_double_t) t3; /* XXX: cast helper */
1233 }
1234 #endif
1235 #if 0
1236 t3 = (duk_uint64_t) t2 * 0x100000000ULL + (duk_uint64_t) t1;
1237 if (negative) {
1238 /* Simpler version: take advantage of the fact that
1239 * 0xffff'ffff'ffff'ffff and 0x1'0000'0000'0000'0000
1240 * both round to 0x1'0000'0000'0000'0000:
1241 * > (0xffffffffffffffff).toString(16)
1242 * '10000000000000000'
1243 * > (0x10000000000000000).toString(16)
1244 * '10000000000000000'
1245 *
1246 * For the DUK_UINT64_MAX case we just skip the +1
1247 * increment to avoid wrapping; the result still
1248 * comes out right for an IEEE double cast.
1249 */
1250 if (t3 != DUK_UINT64_MAX) {
1251 t3++;
1252 }
1253 return -((duk_double_t) t3);
1254 } else {
1255 return (duk_double_t) t3; /* XXX: cast helper */
1256 }
1257 #endif
1258 #if 1
1259 /* Use two double parts, avoids dependency on 64-bit type.
1260 * Avoid precision loss carefully, especially when dealing
1261 * with the required +1 for negative values.
1262 *
1263 * No fastint check for this path at present.
1264 */
1265 d1 = (duk_double_t) t1; /* XXX: cast helpers */
1266 d2 = (duk_double_t) t2 * 4294967296.0;
1267 if (negative) {
1268 d1 += 1.0;
1269 }
1270 d = d2 + d1;
1271 if (negative) {
1272 d = -d;
1273 }
1274 #endif
1275 /* XXX: a push and check for fastint API would be nice */
1276 duk_push_number(dec_ctx->ctx, d);
1277 return;
1278 }
1279
1280 duk__cbor_decode_error(dec_ctx);
1281 return;
1282
1283 shared_exit:
1284 if (negative) {
1285 /* XXX: a push and check for fastint API would be nice */
1286 if ((duk_uint_t) t <= (duk_uint_t) -(DUK_INT_MIN + 1)) {
1287 duk_push_int(dec_ctx->ctx, -1 - ((duk_int_t) t));
1288 } else {
1289 duk_push_number(dec_ctx->ctx, -1.0 - (duk_double_t) t);
1290 }
1291 } else {
1292 duk_push_uint(dec_ctx->ctx, (duk_uint_t) t);
1293 }
1294 }
1295
duk__cbor_decode_skip_aival_int(duk_cbor_decode_context * dec_ctx,duk_uint8_t ib)1296 static void duk__cbor_decode_skip_aival_int(duk_cbor_decode_context *dec_ctx, duk_uint8_t ib) {
1297 const duk_int8_t skips[32] = {
1298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1299 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, -1, -1, -1, -1
1300 };
1301 duk_uint8_t ai;
1302 duk_int8_t skip;
1303
1304 ai = ib & 0x1fU;
1305 skip = skips[ai];
1306 if (DUK_UNLIKELY(skip < 0)) {
1307 duk__cbor_decode_error(dec_ctx);
1308 }
1309 duk__cbor_decode_consume(dec_ctx, (duk_size_t) skip);
1310 return;
1311 }
1312
duk__cbor_decode_aival_uint32(duk_cbor_decode_context * dec_ctx,duk_uint8_t ib)1313 static duk_uint32_t duk__cbor_decode_aival_uint32(duk_cbor_decode_context *dec_ctx, duk_uint8_t ib) {
1314 duk_uint8_t ai;
1315 duk_uint32_t t;
1316
1317 ai = ib & 0x1fU;
1318 if (ai <= 0x17U) {
1319 return (duk_uint32_t) ai;
1320 }
1321
1322 switch (ai) {
1323 case 0x18U: /* 1 byte */
1324 t = (duk_uint32_t) duk__cbor_decode_readbyte(dec_ctx);
1325 return t;
1326 case 0x19U: /* 2 byte */
1327 t = (duk_uint32_t) duk__cbor_decode_read_u16(dec_ctx);
1328 return t;
1329 case 0x1aU: /* 4 byte */
1330 t = (duk_uint32_t) duk__cbor_decode_read_u32(dec_ctx);
1331 return t;
1332 case 0x1bU: /* 8 byte */
1333 t = (duk_uint32_t) duk__cbor_decode_read_u32(dec_ctx);
1334 if (t != 0U) {
1335 break;
1336 }
1337 t = (duk_uint32_t) duk__cbor_decode_read_u32(dec_ctx);
1338 return t;
1339 }
1340
1341 duk__cbor_decode_error(dec_ctx);
1342 return 0U;
1343 }
1344
duk__cbor_decode_buffer(duk_cbor_decode_context * dec_ctx,duk_uint8_t expected_base)1345 static void duk__cbor_decode_buffer(duk_cbor_decode_context *dec_ctx, duk_uint8_t expected_base) {
1346 duk_uint32_t len;
1347 duk_uint8_t *buf;
1348 const duk_uint8_t *inp;
1349 duk_uint8_t ib;
1350
1351 ib = duk__cbor_decode_readbyte(dec_ctx);
1352 if ((ib & 0xe0U) != expected_base) {
1353 duk__cbor_decode_error(dec_ctx);
1354 }
1355 /* Indefinite format is rejected by the following on purpose. */
1356 len = duk__cbor_decode_aival_uint32(dec_ctx, ib);
1357 inp = duk__cbor_decode_consume(dec_ctx, len);
1358 /* XXX: duk_push_fixed_buffer_with_data() would be a nice API addition. */
1359 buf = (duk_uint8_t *) duk_push_fixed_buffer(dec_ctx->ctx, (duk_size_t) len);
1360 (void) memcpy((void *) buf, (const void *) inp, (size_t) len);
1361 }
1362
duk__cbor_decode_join_buffers(duk_cbor_decode_context * dec_ctx,duk_idx_t count)1363 static void duk__cbor_decode_join_buffers(duk_cbor_decode_context *dec_ctx, duk_idx_t count) {
1364 duk_size_t total_size = 0;
1365 duk_idx_t top = duk_get_top(dec_ctx->ctx);
1366 duk_idx_t base = top - count; /* count is >= 1 */
1367 duk_idx_t idx;
1368 duk_uint8_t *p = NULL;
1369
1370 DUK_CBOR_ASSERT(count >= 1);
1371 DUK_CBOR_ASSERT(top >= count);
1372
1373 for (;;) {
1374 /* First round: compute total size.
1375 * Second round: copy into place.
1376 */
1377 for (idx = base; idx < top; idx++) {
1378 duk_uint8_t *buf_data;
1379 duk_size_t buf_size;
1380
1381 buf_data = (duk_uint8_t *) duk_require_buffer(dec_ctx->ctx, idx, &buf_size);
1382 if (p != NULL) {
1383 if (buf_size > 0U) {
1384 (void) memcpy((void *) p, (const void *) buf_data, buf_size);
1385 }
1386 p += buf_size;
1387 } else {
1388 total_size += buf_size;
1389 if (DUK_CBOR_UNLIKELY(total_size < buf_size)) { /* Wrap check. */
1390 duk__cbor_decode_error(dec_ctx);
1391 }
1392 }
1393 }
1394
1395 if (p != NULL) {
1396 break;
1397 } else {
1398 p = (duk_uint8_t *) duk_push_fixed_buffer(dec_ctx->ctx, total_size);
1399 DUK_CBOR_ASSERT(p != NULL);
1400 }
1401 }
1402
1403 duk_replace(dec_ctx->ctx, base);
1404 duk_pop_n(dec_ctx->ctx, count - 1);
1405 }
1406
duk__cbor_decode_and_join_strbuf(duk_cbor_decode_context * dec_ctx,duk_uint8_t expected_base)1407 static void duk__cbor_decode_and_join_strbuf(duk_cbor_decode_context *dec_ctx, duk_uint8_t expected_base) {
1408 duk_idx_t count = 0;
1409 for (;;) {
1410 if (duk__cbor_decode_checkbreak(dec_ctx)) {
1411 break;
1412 }
1413 duk_require_stack(dec_ctx->ctx, 1);
1414 duk__cbor_decode_buffer(dec_ctx, expected_base);
1415 count++;
1416 if (DUK_UNLIKELY(count <= 0)) { /* Wrap check. */
1417 duk__cbor_decode_error(dec_ctx);
1418 }
1419 }
1420 if (count == 0) {
1421 (void) duk_push_fixed_buffer(dec_ctx->ctx, 0);
1422 } else if (count > 1) {
1423 duk__cbor_decode_join_buffers(dec_ctx, count);
1424 }
1425 }
1426
duk__cbor_decode_half_float(duk_cbor_decode_context * dec_ctx)1427 static duk_double_t duk__cbor_decode_half_float(duk_cbor_decode_context *dec_ctx) {
1428 duk_cbor_dblunion u;
1429 const duk_uint8_t *inp;
1430 duk_int_t exp;
1431 duk_uint_t u16;
1432 duk_uint_t tmp;
1433 duk_double_t res;
1434
1435 inp = duk__cbor_decode_consume(dec_ctx, 2);
1436 u16 = ((duk_uint_t) inp[0] << 8) + (duk_uint_t) inp[1];
1437 exp = (duk_int_t) ((u16 >> 10) & 0x1fU) - 15;
1438
1439 /* Reconstruct IEEE double into little endian order first, then convert
1440 * to host order.
1441 */
1442
1443 memset((void *) &u, 0, sizeof(u));
1444
1445 if (exp == -15) {
1446 /* Zero or denormal; but note that half float
1447 * denormals become double normals.
1448 */
1449 if ((u16 & 0x03ffU) == 0) {
1450 u.x[7] = inp[0] & 0x80U;
1451 } else {
1452 /* Create denormal by first creating a double that
1453 * contains the denormal bits and a leading implicit
1454 * 1-bit. Then subtract away the implicit 1-bit.
1455 *
1456 * 0.mmmmmmmmmm * 2^-14
1457 * 1.mmmmmmmmmm 0.... * 2^-14
1458 * -1.0000000000 0.... * 2^-14
1459 *
1460 * Double exponent: -14 + 1023 = 0x3f1
1461 */
1462 u.x[7] = 0x3fU;
1463 u.x[6] = 0x10U + (duk_uint8_t) ((u16 >> 6) & 0x0fU);
1464 u.x[5] = (duk_uint8_t) ((u16 << 2) & 0xffU); /* Mask is really 0xfcU */
1465
1466 duk__cbor_dblunion_little_to_host(&u);
1467 res = u.d - 0.00006103515625; /* 2^(-14) */
1468 if (u16 & 0x8000U) {
1469 res = -res;
1470 }
1471 return res;
1472 }
1473 } else if (exp == 16) {
1474 /* +/- Inf or NaN. */
1475 if ((u16 & 0x03ffU) == 0) {
1476 u.x[7] = (inp[0] & 0x80U) + 0x7fU;
1477 u.x[6] = 0xf0U;
1478 } else {
1479 /* Create a 'quiet NaN' with highest
1480 * bit set (there are some platforms
1481 * where the NaN payload convention is
1482 * the opposite). Keep sign.
1483 */
1484 u.x[7] = (inp[0] & 0x80U) + 0x7fU;
1485 u.x[6] = 0xf8U;
1486 }
1487 } else {
1488 /* Normal. */
1489 tmp = (inp[0] & 0x80U) ? 0x80000000UL : 0UL;
1490 tmp += (duk_uint_t) (exp + 1023) << 20;
1491 tmp += (duk_uint_t) (inp[0] & 0x03U) << 18;
1492 tmp += (duk_uint_t) (inp[1] & 0xffU) << 10;
1493 u.x[7] = (tmp >> 24) & 0xffU;
1494 u.x[6] = (tmp >> 16) & 0xffU;
1495 u.x[5] = (tmp >> 8) & 0xffU;
1496 u.x[4] = (tmp >> 0) & 0xffU;
1497 }
1498
1499 duk__cbor_dblunion_little_to_host(&u);
1500 return u.d;
1501 }
1502
duk__cbor_decode_string(duk_cbor_decode_context * dec_ctx,duk_uint8_t ib,duk_uint8_t ai)1503 static void duk__cbor_decode_string(duk_cbor_decode_context *dec_ctx, duk_uint8_t ib, duk_uint8_t ai) {
1504 /* If the CBOR string data is not valid UTF-8 it is technically
1505 * invalid CBOR. Possible behaviors at least:
1506 *
1507 * 1. Reject the input, i.e. throw TypeError.
1508 *
1509 * 2. Accept the input, but sanitize non-UTF-8 data into UTF-8
1510 * using U+FFFD replacements. Also it might make sense to
1511 * decode non-BMP codepoints into surrogates for better
1512 * ECMAScript compatibility.
1513 *
1514 * 3. Accept the input as a Duktape string (which are not always
1515 * valid UTF-8), but reject any input that would create a
1516 * Symbol representation.
1517 *
1518 * Current behavior is 3.
1519 */
1520
1521 if (ai == 0x1fU) {
1522 duk_uint8_t *buf_data;
1523 duk_size_t buf_size;
1524
1525 duk__cbor_decode_and_join_strbuf(dec_ctx, 0x60U);
1526 buf_data = (duk_uint8_t *) duk_require_buffer(dec_ctx->ctx, -1, &buf_size);
1527 (void) duk_push_lstring(dec_ctx->ctx, (const char *) buf_data, buf_size);
1528 duk_remove(dec_ctx->ctx, -2);
1529 } else {
1530 duk_uint32_t len;
1531 const duk_uint8_t *inp;
1532
1533 len = duk__cbor_decode_aival_uint32(dec_ctx, ib);
1534 inp = duk__cbor_decode_consume(dec_ctx, len);
1535 (void) duk_push_lstring(dec_ctx->ctx, (const char *) inp, (duk_size_t) len);
1536 }
1537 if (duk_is_symbol(dec_ctx->ctx, -1)) {
1538 /* Refuse to create Symbols when decoding. */
1539 duk__cbor_decode_error(dec_ctx);
1540 }
1541
1542 /* XXX: Here a Duktape API call to convert input -> utf-8 with
1543 * replacements would be nice.
1544 */
1545 }
1546
duk__cbor_decode_array(duk_cbor_decode_context * dec_ctx,duk_uint8_t ib,duk_uint8_t ai)1547 static duk_bool_t duk__cbor_decode_array(duk_cbor_decode_context *dec_ctx, duk_uint8_t ib, duk_uint8_t ai) {
1548 duk_uint32_t idx, len;
1549
1550 duk__cbor_req_stack(dec_ctx);
1551
1552 /* Support arrays up to 0xfffffffeU in length. 0xffffffff is
1553 * used as an indefinite length marker.
1554 */
1555 if (ai == 0x1fU) {
1556 len = 0xffffffffUL;
1557 } else {
1558 len = duk__cbor_decode_aival_uint32(dec_ctx, ib);
1559 if (len == 0xffffffffUL) {
1560 return 0;
1561 }
1562 }
1563
1564 /* XXX: use bare array? */
1565 duk_push_array(dec_ctx->ctx);
1566 for (idx = 0U; ;) {
1567 if (len == 0xffffffffUL && duk__cbor_decode_checkbreak(dec_ctx)) {
1568 break;
1569 }
1570 if (idx == len) {
1571 if (ai == 0x1fU) {
1572 return 0;
1573 }
1574 break;
1575 }
1576 duk__cbor_decode_value(dec_ctx);
1577 duk_put_prop_index(dec_ctx->ctx, -2, (duk_uarridx_t) idx);
1578 idx++;
1579 if (idx == 0U) {
1580 return 0; /* wrapped */
1581 }
1582 }
1583
1584 return 1;
1585 }
1586
duk__cbor_decode_map(duk_cbor_decode_context * dec_ctx,duk_uint8_t ib,duk_uint8_t ai)1587 static duk_bool_t duk__cbor_decode_map(duk_cbor_decode_context *dec_ctx, duk_uint8_t ib, duk_uint8_t ai) {
1588 duk_uint32_t count;
1589
1590 duk__cbor_req_stack(dec_ctx);
1591
1592 if (ai == 0x1fU) {
1593 count = 0xffffffffUL;
1594 } else {
1595 count = duk__cbor_decode_aival_uint32(dec_ctx, ib);
1596 if (count == 0xffffffffUL) {
1597 return 0;
1598 }
1599 }
1600
1601 /* XXX: use bare object? */
1602 duk_push_object(dec_ctx->ctx);
1603 for (;;) {
1604 if (count == 0xffffffffUL) {
1605 if (duk__cbor_decode_checkbreak(dec_ctx)) {
1606 break;
1607 }
1608 } else {
1609 if (count == 0UL) {
1610 break;
1611 }
1612 count--;
1613 }
1614
1615 /* Non-string keys are coerced to strings,
1616 * possibly leading to overwriting previous
1617 * keys. Last key of a certain coerced name
1618 * wins. If key is an object, it will coerce
1619 * to '[object Object]' which is consistent
1620 * but potentially misleading. One alternative
1621 * would be to skip non-string keys.
1622 */
1623 duk__cbor_decode_value(dec_ctx);
1624 duk__cbor_decode_value(dec_ctx);
1625 duk_put_prop(dec_ctx->ctx, -3);
1626 }
1627
1628 return 1;
1629 }
1630
duk__cbor_decode_float(duk_cbor_decode_context * dec_ctx)1631 static duk_double_t duk__cbor_decode_float(duk_cbor_decode_context *dec_ctx) {
1632 duk_cbor_fltunion u;
1633 const duk_uint8_t *inp;
1634 inp = duk__cbor_decode_consume(dec_ctx, 4);
1635 (void) memcpy((void *) u.x, (const void *) inp, 4);
1636 duk__cbor_fltunion_big_to_host(&u);
1637 return (duk_double_t) u.f;
1638 }
1639
duk__cbor_decode_double(duk_cbor_decode_context * dec_ctx)1640 static duk_double_t duk__cbor_decode_double(duk_cbor_decode_context *dec_ctx) {
1641 duk_cbor_dblunion u;
1642 const duk_uint8_t *inp;
1643 inp = duk__cbor_decode_consume(dec_ctx, 8);
1644 (void) memcpy((void *) u.x, (const void *) inp, 8);
1645 duk__cbor_dblunion_big_to_host(&u);
1646 return u.d;
1647 }
1648
1649 #if defined(DUK_CBOR_DECODE_FASTPATH)
1650 #define DUK__CBOR_AI (ib & 0x1fU)
1651
duk__cbor_decode_value(duk_cbor_decode_context * dec_ctx)1652 static void duk__cbor_decode_value(duk_cbor_decode_context *dec_ctx) {
1653 duk_uint8_t ib;
1654
1655 /* Any paths potentially recursing back to duk__cbor_decode_value()
1656 * must perform a Duktape value stack growth check. Avoid the check
1657 * here for simple paths like primitive values.
1658 */
1659
1660 reread_initial_byte:
1661 #if defined(DUK_CBOR_DPRINT)
1662 fprintf(stderr, "cbor decode off=%ld len=%ld\n", (long) dec_ctx->off, (long) dec_ctx->len);
1663 #endif
1664
1665 ib = duk__cbor_decode_readbyte(dec_ctx);
1666
1667 /* Full initial byte switch, footprint cost over baseline is ~+1kB. */
1668 /* XXX: Force full switch with no range check. */
1669
1670 switch (ib) {
1671 case 0x00U: case 0x01U: case 0x02U: case 0x03U: case 0x04U: case 0x05U: case 0x06U: case 0x07U:
1672 case 0x08U: case 0x09U: case 0x0aU: case 0x0bU: case 0x0cU: case 0x0dU: case 0x0eU: case 0x0fU:
1673 case 0x10U: case 0x11U: case 0x12U: case 0x13U: case 0x14U: case 0x15U: case 0x16U: case 0x17U:
1674 duk_push_uint(dec_ctx->ctx, ib);
1675 break;
1676 case 0x18U: case 0x19U: case 0x1aU: case 0x1bU:
1677 duk__cbor_decode_push_aival_int(dec_ctx, ib, 0 /*negative*/);
1678 break;
1679 case 0x1cU: case 0x1dU: case 0x1eU: case 0x1fU:
1680 goto format_error;
1681 case 0x20U: case 0x21U: case 0x22U: case 0x23U: case 0x24U: case 0x25U: case 0x26U: case 0x27U:
1682 case 0x28U: case 0x29U: case 0x2aU: case 0x2bU: case 0x2cU: case 0x2dU: case 0x2eU: case 0x2fU:
1683 case 0x30U: case 0x31U: case 0x32U: case 0x33U: case 0x34U: case 0x35U: case 0x36U: case 0x37U:
1684 duk_push_int(dec_ctx->ctx, -((duk_int_t) ((ib - 0x20U) + 1U)));
1685 break;
1686 case 0x38U: case 0x39U: case 0x3aU: case 0x3bU:
1687 duk__cbor_decode_push_aival_int(dec_ctx, ib, 1 /*negative*/);
1688 break;
1689 case 0x3cU: case 0x3dU: case 0x3eU: case 0x3fU:
1690 goto format_error;
1691 case 0x40U: case 0x41U: case 0x42U: case 0x43U: case 0x44U: case 0x45U: case 0x46U: case 0x47U:
1692 case 0x48U: case 0x49U: case 0x4aU: case 0x4bU: case 0x4cU: case 0x4dU: case 0x4eU: case 0x4fU:
1693 case 0x50U: case 0x51U: case 0x52U: case 0x53U: case 0x54U: case 0x55U: case 0x56U: case 0x57U:
1694 /* XXX: Avoid rewind, we know the length already. */
1695 DUK_CBOR_ASSERT(dec_ctx->off > 0U);
1696 dec_ctx->off--;
1697 duk__cbor_decode_buffer(dec_ctx, 0x40U);
1698 break;
1699 case 0x58U: case 0x59U: case 0x5aU: case 0x5bU:
1700 /* XXX: Avoid rewind, decode length inline. */
1701 DUK_CBOR_ASSERT(dec_ctx->off > 0U);
1702 dec_ctx->off--;
1703 duk__cbor_decode_buffer(dec_ctx, 0x40U);
1704 break;
1705 case 0x5cU: case 0x5dU: case 0x5eU:
1706 goto format_error;
1707 case 0x5fU:
1708 duk__cbor_decode_and_join_strbuf(dec_ctx, 0x40U);
1709 break;
1710 case 0x60U: case 0x61U: case 0x62U: case 0x63U: case 0x64U: case 0x65U: case 0x66U: case 0x67U:
1711 case 0x68U: case 0x69U: case 0x6aU: case 0x6bU: case 0x6cU: case 0x6dU: case 0x6eU: case 0x6fU:
1712 case 0x70U: case 0x71U: case 0x72U: case 0x73U: case 0x74U: case 0x75U: case 0x76U: case 0x77U:
1713 /* XXX: Avoid double decode of length. */
1714 duk__cbor_decode_string(dec_ctx, ib, DUK__CBOR_AI);
1715 break;
1716 case 0x78U: case 0x79U: case 0x7aU: case 0x7bU:
1717 /* XXX: Avoid double decode of length. */
1718 duk__cbor_decode_string(dec_ctx, ib, DUK__CBOR_AI);
1719 break;
1720 case 0x7cU: case 0x7dU: case 0x7eU:
1721 goto format_error;
1722 case 0x7fU:
1723 duk__cbor_decode_string(dec_ctx, ib, DUK__CBOR_AI);
1724 break;
1725 case 0x80U: case 0x81U: case 0x82U: case 0x83U: case 0x84U: case 0x85U: case 0x86U: case 0x87U:
1726 case 0x88U: case 0x89U: case 0x8aU: case 0x8bU: case 0x8cU: case 0x8dU: case 0x8eU: case 0x8fU:
1727 case 0x90U: case 0x91U: case 0x92U: case 0x93U: case 0x94U: case 0x95U: case 0x96U: case 0x97U:
1728 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_array(dec_ctx, ib, DUK__CBOR_AI) == 0)) {
1729 goto format_error;
1730 }
1731 break;
1732 case 0x98U: case 0x99U: case 0x9aU: case 0x9bU:
1733 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_array(dec_ctx, ib, DUK__CBOR_AI) == 0)) {
1734 goto format_error;
1735 }
1736 break;
1737 case 0x9cU: case 0x9dU: case 0x9eU:
1738 goto format_error;
1739 case 0x9fU:
1740 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_array(dec_ctx, ib, DUK__CBOR_AI) == 0)) {
1741 goto format_error;
1742 }
1743 break;
1744 case 0xa0U: case 0xa1U: case 0xa2U: case 0xa3U: case 0xa4U: case 0xa5U: case 0xa6U: case 0xa7U:
1745 case 0xa8U: case 0xa9U: case 0xaaU: case 0xabU: case 0xacU: case 0xadU: case 0xaeU: case 0xafU:
1746 case 0xb0U: case 0xb1U: case 0xb2U: case 0xb3U: case 0xb4U: case 0xb5U: case 0xb6U: case 0xb7U:
1747 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_map(dec_ctx, ib, DUK__CBOR_AI) == 0)) {
1748 goto format_error;
1749 }
1750 break;
1751 case 0xb8U: case 0xb9U: case 0xbaU: case 0xbbU:
1752 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_map(dec_ctx, ib, DUK__CBOR_AI) == 0)) {
1753 goto format_error;
1754 }
1755 break;
1756 case 0xbcU: case 0xbdU: case 0xbeU:
1757 goto format_error;
1758 case 0xbfU:
1759 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_map(dec_ctx, ib, DUK__CBOR_AI) == 0)) {
1760 goto format_error;
1761 }
1762 break;
1763 case 0xc0U: case 0xc1U: case 0xc2U: case 0xc3U: case 0xc4U: case 0xc5U: case 0xc6U: case 0xc7U:
1764 case 0xc8U: case 0xc9U: case 0xcaU: case 0xcbU: case 0xccU: case 0xcdU: case 0xceU: case 0xcfU:
1765 case 0xd0U: case 0xd1U: case 0xd2U: case 0xd3U: case 0xd4U: case 0xd5U: case 0xd6U: case 0xd7U:
1766 /* Tag 0-23: drop. */
1767 goto reread_initial_byte;
1768 case 0xd8U: case 0xd9U: case 0xdaU: case 0xdbU:
1769 duk__cbor_decode_skip_aival_int(dec_ctx, ib);
1770 goto reread_initial_byte;
1771 case 0xdcU: case 0xddU: case 0xdeU: case 0xdfU:
1772 goto format_error;
1773 case 0xe0U:
1774 goto format_error;
1775 case 0xe1U:
1776 goto format_error;
1777 case 0xe2U:
1778 goto format_error;
1779 case 0xe3U:
1780 goto format_error;
1781 case 0xe4U:
1782 goto format_error;
1783 case 0xe5U:
1784 goto format_error;
1785 case 0xe6U:
1786 goto format_error;
1787 case 0xe7U:
1788 goto format_error;
1789 case 0xe8U:
1790 goto format_error;
1791 case 0xe9U:
1792 goto format_error;
1793 case 0xeaU:
1794 goto format_error;
1795 case 0xebU:
1796 goto format_error;
1797 case 0xecU:
1798 goto format_error;
1799 case 0xedU:
1800 goto format_error;
1801 case 0xeeU:
1802 goto format_error;
1803 case 0xefU:
1804 goto format_error;
1805 case 0xf0U:
1806 goto format_error;
1807 case 0xf1U:
1808 goto format_error;
1809 case 0xf2U:
1810 goto format_error;
1811 case 0xf3U:
1812 goto format_error;
1813 case 0xf4U:
1814 duk_push_false(dec_ctx->ctx);
1815 break;
1816 case 0xf5U:
1817 duk_push_true(dec_ctx->ctx);
1818 break;
1819 case 0xf6U:
1820 duk_push_null(dec_ctx->ctx);
1821 break;
1822 case 0xf7U:
1823 duk_push_undefined(dec_ctx->ctx);
1824 break;
1825 case 0xf8U:
1826 /* Simple value 32-255, nothing defined yet, so reject. */
1827 goto format_error;
1828 case 0xf9U: {
1829 duk_double_t d;
1830 d = duk__cbor_decode_half_float(dec_ctx);
1831 duk_push_number(dec_ctx->ctx, d);
1832 break;
1833 }
1834 case 0xfaU: {
1835 duk_double_t d;
1836 d = duk__cbor_decode_float(dec_ctx);
1837 duk_push_number(dec_ctx->ctx, d);
1838 break;
1839 }
1840 case 0xfbU: {
1841 duk_double_t d;
1842 d = duk__cbor_decode_double(dec_ctx);
1843 duk_push_number(dec_ctx->ctx, d);
1844 break;
1845 }
1846 case 0xfcU:
1847 case 0xfdU:
1848 case 0xfeU:
1849 case 0xffU:
1850 goto format_error;
1851 } /* end switch */
1852
1853 return;
1854
1855 format_error:
1856 duk__cbor_decode_error(dec_ctx);
1857 }
1858 #else /* DUK_CBOR_DECODE_FASTPATH */
duk__cbor_decode_value(duk_cbor_decode_context * dec_ctx)1859 static void duk__cbor_decode_value(duk_cbor_decode_context *dec_ctx) {
1860 duk_uint8_t ib, mt, ai;
1861
1862 /* Any paths potentially recursing back to duk__cbor_decode_value()
1863 * must perform a Duktape value stack growth check. Avoid the check
1864 * here for simple paths like primitive values.
1865 */
1866
1867 reread_initial_byte:
1868 #if defined(DUK_CBOR_DPRINT)
1869 fprintf(stderr, "cbor decode off=%ld len=%ld\n", (long) dec_ctx->off, (long) dec_ctx->len);
1870 #endif
1871
1872 ib = duk__cbor_decode_readbyte(dec_ctx);
1873 mt = ib >> 5U;
1874 ai = ib & 0x1fU;
1875
1876 /* Additional information in [24,27] = [0x18,0x1b] has relatively
1877 * uniform handling for all major types: read 1/2/4/8 additional
1878 * bytes. For major type 7 the 1-byte value is a 'simple type', and
1879 * 2/4/8-byte values are floats. For other major types the 1/2/4/8
1880 * byte values are integers. The lengths are uniform, but the typing
1881 * is not.
1882 */
1883
1884 switch (mt) {
1885 case 0U: { /* unsigned integer */
1886 duk__cbor_decode_push_aival_int(dec_ctx, ib, 0 /*negative*/);
1887 break;
1888 }
1889 case 1U: { /* negative integer */
1890 duk__cbor_decode_push_aival_int(dec_ctx, ib, 1 /*negative*/);
1891 break;
1892 }
1893 case 2U: { /* byte string */
1894 if (ai == 0x1fU) {
1895 duk__cbor_decode_and_join_strbuf(dec_ctx, 0x40U);
1896 } else {
1897 duk__cbor_decode_rewind(dec_ctx, 1U);
1898 duk__cbor_decode_buffer(dec_ctx, 0x40U);
1899 }
1900 break;
1901 }
1902 case 3U: { /* text string */
1903 duk__cbor_decode_string(dec_ctx, ib, ai);
1904 break;
1905 }
1906 case 4U: { /* array of data items */
1907 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_array(dec_ctx, ib, ai) == 0)) {
1908 goto format_error;
1909 }
1910 break;
1911 }
1912 case 5U: { /* map of pairs of data items */
1913 if (DUK_CBOR_UNLIKELY(duk__cbor_decode_map(dec_ctx, ib, ai) == 0)) {
1914 goto format_error;
1915 }
1916 break;
1917 }
1918 case 6U: { /* semantic tagging */
1919 /* Tags are ignored now, re-read initial byte. A tagged
1920 * value may itself be tagged (an unlimited number of times)
1921 * so keep on peeling away tags.
1922 */
1923 duk__cbor_decode_skip_aival_int(dec_ctx, ib);
1924 goto reread_initial_byte;
1925 }
1926 case 7U: { /* floating point numbers, simple data types, break; other */
1927 switch (ai) {
1928 case 0x14U: {
1929 duk_push_false(dec_ctx->ctx);
1930 break;
1931 }
1932 case 0x15U: {
1933 duk_push_true(dec_ctx->ctx);
1934 break;
1935 }
1936 case 0x16U: {
1937 duk_push_null(dec_ctx->ctx);
1938 break;
1939 }
1940 case 0x17U: {
1941 duk_push_undefined(dec_ctx->ctx);
1942 break;
1943 }
1944 case 0x18U: { /* more simple values (1 byte) */
1945 /* Simple value encoded in additional byte (none
1946 * are defined so far). RFC 7049 states that the
1947 * follow-up byte must be 32-255 to minimize
1948 * confusion. So, a non-shortest encoding like
1949 * f815 (= true, shortest encoding f5) must be
1950 * rejected. cbor.me tester rejects f815, but
1951 * e.g. Python CBOR binding decodes it as true.
1952 */
1953 goto format_error;
1954 }
1955 case 0x19U: { /* half-float (2 bytes) */
1956 duk_double_t d;
1957 d = duk__cbor_decode_half_float(dec_ctx);
1958 duk_push_number(dec_ctx->ctx, d);
1959 break;
1960 }
1961 case 0x1aU: { /* float (4 bytes) */
1962 duk_double_t d;
1963 d = duk__cbor_decode_float(dec_ctx);
1964 duk_push_number(dec_ctx->ctx, d);
1965 break;
1966 }
1967 case 0x1bU: { /* double (8 bytes) */
1968 duk_double_t d;
1969 d = duk__cbor_decode_double(dec_ctx);
1970 duk_push_number(dec_ctx->ctx, d);
1971 break;
1972 }
1973 case 0xffU: /* unexpected break */
1974 default: {
1975 goto format_error;
1976 }
1977 } /* end switch */
1978 break;
1979 }
1980 default: {
1981 goto format_error; /* will never actually occur */
1982 }
1983 } /* end switch */
1984
1985 return;
1986
1987 format_error:
1988 duk__cbor_decode_error(dec_ctx);
1989 }
1990 #endif /* DUK_CBOR_DECODE_FASTPATH */
1991
1992 /*
1993 * Public APIs
1994 */
1995
duk__cbor_encode_binding(duk_context * ctx)1996 static duk_ret_t duk__cbor_encode_binding(duk_context *ctx) {
1997 /* Produce an ArrayBuffer by first decoding into a plain buffer which
1998 * mimics a Uint8Array and gettings its .buffer property.
1999 */
2000 duk_cbor_encode(ctx, -1, 0);
2001 duk_get_prop_string(ctx, -1, "buffer");
2002 return 1;
2003 }
2004
duk__cbor_decode_binding(duk_context * ctx)2005 static duk_ret_t duk__cbor_decode_binding(duk_context *ctx) {
2006 /* Lenient: accept any buffer like. */
2007 duk_cbor_decode(ctx, -1, 0);
2008 return 1;
2009 }
2010
duk_cbor_init(duk_context * ctx,duk_uint_t flags)2011 void duk_cbor_init(duk_context *ctx, duk_uint_t flags) {
2012 (void) flags;
2013 duk_push_global_object(ctx);
2014 duk_push_string(ctx, "CBOR");
2015 duk_push_object(ctx);
2016 duk_push_string(ctx, "encode");
2017 duk_push_c_function(ctx, duk__cbor_encode_binding, 1);
2018 duk_def_prop(ctx, -3, DUK_DEFPROP_ATTR_WC | DUK_DEFPROP_HAVE_VALUE);
2019 duk_push_string(ctx, "decode");
2020 duk_push_c_function(ctx, duk__cbor_decode_binding, 1);
2021 duk_def_prop(ctx, -3, DUK_DEFPROP_ATTR_WC | DUK_DEFPROP_HAVE_VALUE);
2022 duk_def_prop(ctx, -3, DUK_DEFPROP_ATTR_WC | DUK_DEFPROP_HAVE_VALUE);
2023 duk_pop(ctx);
2024 }
2025
duk_cbor_encode(duk_context * ctx,duk_idx_t idx,duk_uint_t encode_flags)2026 void duk_cbor_encode(duk_context *ctx, duk_idx_t idx, duk_uint_t encode_flags) {
2027 duk_cbor_encode_context enc_ctx;
2028 duk_uint8_t *buf;
2029
2030 (void) encode_flags;
2031
2032 idx = duk_require_normalize_index(ctx, idx);
2033
2034 enc_ctx.ctx = ctx;
2035 enc_ctx.idx_buf = duk_get_top(ctx);
2036
2037 enc_ctx.len = 64;
2038 buf = (duk_uint8_t *) duk_push_dynamic_buffer(ctx, enc_ctx.len);
2039 enc_ctx.ptr = buf;
2040 enc_ctx.buf = buf;
2041 enc_ctx.buf_end = buf + enc_ctx.len;
2042
2043 duk_dup(ctx, idx);
2044 duk__cbor_encode_value(&enc_ctx);
2045 duk_resize_buffer(enc_ctx.ctx, enc_ctx.idx_buf, (duk_size_t) (enc_ctx.ptr - enc_ctx.buf));
2046 duk_replace(ctx, idx);
2047 }
2048
duk_cbor_decode(duk_context * ctx,duk_idx_t idx,duk_uint_t decode_flags)2049 void duk_cbor_decode(duk_context *ctx, duk_idx_t idx, duk_uint_t decode_flags) {
2050 duk_cbor_decode_context dec_ctx;
2051
2052 (void) decode_flags;
2053
2054 /* Suppress compile warnings for functions only needed with e.g.
2055 * asserts enabled.
2056 */
2057 (void) duk__cbor_get_reserve;
2058 (void) duk__cbor_isinf;
2059 (void) duk__cbor_fpclassify;
2060
2061 idx = duk_require_normalize_index(ctx, idx);
2062
2063 dec_ctx.ctx = ctx;
2064 dec_ctx.buf = (const duk_uint8_t *) duk_require_buffer_data(ctx, idx, &dec_ctx.len);
2065 dec_ctx.off = 0;
2066 /* dec_ctx.len: set above */
2067
2068 duk__cbor_req_stack(&dec_ctx);
2069 duk__cbor_decode_value(&dec_ctx);
2070 if (dec_ctx.off != dec_ctx.len) {
2071 (void) duk_type_error(ctx, "trailing garbage");
2072 }
2073
2074 duk_replace(ctx, idx);
2075 }
2076