1 /* Copyright 2013-present Facebook, Inc.
2 * Licensed under the Apache License, Version 2.0 */
3
4 #include "watchman.h"
5 #include "thirdparty/jansson/jansson_private.h"
6
7 /*
8 * This defines a binary serialization of the JSON data objects in this
9 * library. It is designed for use with watchman and is not intended to serve
10 * as a general binary JSON interchange format. In particular, all integers
11 * are signed integers and are stored in host byte order to minimize
12 * transformation overhead.
13 */
14
15 /* Return the smallest size int that can store the value */
16 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
17 ((x) == ((int16_t)x)) ? 2 : \
18 ((x) == ((int32_t)x)) ? 4 : 8)
19
20 #define BSER_ARRAY 0x00
21 #define BSER_OBJECT 0x01
22 #define BSER_BYTESTRING 0x02
23 #define BSER_INT8 0x03
24 #define BSER_INT16 0x04
25 #define BSER_INT32 0x05
26 #define BSER_INT64 0x06
27 #define BSER_REAL 0x07
28 #define BSER_TRUE 0x08
29 #define BSER_FALSE 0x09
30 #define BSER_NULL 0x0a
31 #define BSER_TEMPLATE 0x0b
32 #define BSER_SKIP 0x0c
33 #define BSER_UTF8STRING 0x0d
34
35 static const char bser_true = BSER_TRUE;
36 static const char bser_false = BSER_FALSE;
37 static const char bser_null = BSER_NULL;
38 static const char bser_bytestring_hdr = BSER_BYTESTRING;
39 static const char bser_array_hdr = BSER_ARRAY;
40 static const char bser_object_hdr = BSER_OBJECT;
41 static const char bser_template_hdr = BSER_TEMPLATE;
42 static const char bser_utf8string_hdr = BSER_UTF8STRING;
43 static const char bser_skip = BSER_SKIP;
44
is_bser_version_supported(const bser_ctx_t * ctx)45 static bool is_bser_version_supported(const bser_ctx_t *ctx) {
46 return ctx->bser_version == 1 || ctx->bser_version == 2;
47 }
48
bser_real(const bser_ctx_t * ctx,double val,void * data)49 static int bser_real(const bser_ctx_t *ctx, double val, void *data)
50 {
51 char sz = BSER_REAL;
52 if (!is_bser_version_supported(ctx)) {
53 return -1;
54 }
55
56 if (ctx->dump(&sz, sizeof(sz), data)) {
57 return -1;
58 }
59 return ctx->dump((char*)&val, sizeof(val), data);
60 }
61
bunser_generic_string(const char * buf,json_int_t avail,json_int_t * needed,const char ** start,json_int_t * len)62 bool bunser_generic_string(
63 const char* buf,
64 json_int_t avail,
65 json_int_t* needed,
66 const char** start,
67 json_int_t* len) {
68 json_int_t ineed;
69
70 if (!bunser_int(buf + 1, avail - 1, &ineed, len)) {
71 *needed = ineed;
72 return false;
73 }
74
75 buf += ineed + 1;
76 avail -= ineed + 1;
77 *needed = ineed + 1 + *len;
78
79 if (*len > avail) {
80 return false;
81 }
82
83 *start = buf;
84 return true;
85 }
86
87 // Attempt to unserialize an integer value.
88 // Returns bool if successful, and populates *val with the value.
89 // Otherwise populates *needed with the size required to successfully
90 // decode the integer value
bunser_int(const char * buf,json_int_t avail,json_int_t * needed,json_int_t * val)91 bool bunser_int(const char *buf, json_int_t avail,
92 json_int_t *needed, json_int_t *val)
93 {
94 int8_t i8;
95 int16_t i16;
96 int32_t i32;
97 int64_t i64;
98
99 switch (buf[0]) {
100 case BSER_INT8:
101 *needed = 2;
102 break;
103 case BSER_INT16:
104 *needed = 3;
105 break;
106 case BSER_INT32:
107 *needed = 5;
108 break;
109 case BSER_INT64:
110 *needed = 9;
111 break;
112 default:
113 *needed = -1;
114 return false;
115 }
116 if (avail < *needed) {
117 return false;
118 }
119
120 switch (buf[0]) {
121 case BSER_INT8:
122 memcpy(&i8, buf + 1, sizeof(i8));
123 *val = i8;
124 return true;
125 case BSER_INT16:
126 memcpy(&i16, buf + 1, sizeof(i16));
127 *val = i16;
128 return true;
129 case BSER_INT32:
130 memcpy(&i32, buf + 1, sizeof(i32));
131 *val = i32;
132 return true;
133 case BSER_INT64:
134 memcpy(&i64, buf + 1, sizeof(i64));
135 *val = i64;
136 return true;
137 default:
138 return false;
139 }
140 }
141
bser_int(const bser_ctx_t * ctx,json_int_t val,void * data)142 static int bser_int(const bser_ctx_t *ctx, json_int_t val, void *data)
143 {
144 int8_t i8;
145 int16_t i16;
146 int32_t i32;
147 int64_t i64;
148 char sz;
149 int size = INT_SIZE(val);
150 char *iptr;
151
152 if (!is_bser_version_supported(ctx)) {
153 return -1;
154 }
155
156 switch (size) {
157 case 1:
158 sz = BSER_INT8;
159 i8 = (int8_t)val;
160 iptr = (char*)&i8;
161 break;
162 case 2:
163 sz = BSER_INT16;
164 i16 = (int16_t)val;
165 iptr = (char*)&i16;
166 break;
167 case 4:
168 sz = BSER_INT32;
169 i32 = (int32_t)val;
170 iptr = (char*)&i32;
171 break;
172 case 8:
173 sz = BSER_INT64;
174 i64 = (int64_t)val;
175 iptr = (char*)&i64;
176 break;
177 default:
178 return -1;
179 }
180
181 if (ctx->dump(&sz, sizeof(sz), data)) {
182 return -1;
183 }
184
185 return ctx->dump(iptr, size, data);
186 }
187
bser_generic_string(const bser_ctx_t * ctx,w_string_piece str,void * data,const char hdr)188 static int bser_generic_string(
189 const bser_ctx_t* ctx,
190 w_string_piece str,
191 void* data,
192 const char hdr) {
193 if (!is_bser_version_supported(ctx)) {
194 return -1;
195 }
196
197 if (ctx->dump(&hdr, sizeof(hdr), data)) {
198 return -1;
199 }
200
201 if (bser_int(ctx, str.size(), data)) {
202 return -1;
203 }
204
205 if (ctx->dump(str.data(), str.size(), data)) {
206 return -1;
207 }
208
209 return 0;
210 }
211
212 static int
bser_bytestring(const bser_ctx_t * ctx,w_string_piece str,void * data)213 bser_bytestring(const bser_ctx_t* ctx, w_string_piece str, void* data) {
214 return bser_generic_string(ctx, str, data, bser_bytestring_hdr);
215 }
216
217 static int
bser_utf8string(const bser_ctx_t * ctx,w_string_piece str,void * data)218 bser_utf8string(const bser_ctx_t* ctx, w_string_piece str, void* data) {
219 if ((ctx->bser_capabilities & BSER_CAP_DISABLE_UNICODE) ||
220 ctx->bser_version == 1) {
221 return bser_bytestring(ctx, str, data);
222 }
223 return bser_generic_string(ctx, str, data, bser_utf8string_hdr);
224 }
225
226 static int
bser_mixedstring(const bser_ctx_t * ctx,w_string_piece str,void * data)227 bser_mixedstring(const bser_ctx_t* ctx, w_string_piece str, void* data) {
228 if (ctx->bser_version != 1 &&
229 !(BSER_CAP_DISABLE_UNICODE_FOR_ERRORS & ctx->bser_capabilities) &&
230 !(BSER_CAP_DISABLE_UNICODE & ctx->bser_capabilities)) {
231 auto utf8_clean = str.asUTF8Clean();
232 return bser_utf8string(ctx, utf8_clean, data);
233 } else {
234 return bser_bytestring(ctx, str, data);
235 }
236 }
237
238 static int bser_array(const bser_ctx_t *ctx, const json_t *array, void *data);
239
bser_template(const bser_ctx_t * ctx,const json_t * array,const json_t * templ,void * data)240 static int bser_template(const bser_ctx_t *ctx, const json_t *array,
241 const json_t *templ, void *data)
242 {
243 size_t n = json_array_size(array);
244 size_t i, pn;
245
246 if (!is_bser_version_supported(ctx)) {
247 return -1;
248 }
249
250 if (ctx->dump(&bser_template_hdr, sizeof(bser_template_hdr), data)) {
251 return -1;
252 }
253
254 // The template goes next
255 if (bser_array(ctx, templ, data)) {
256 return -1;
257 }
258
259 // Now the array of arrays of object values.
260 // How many objects
261 if (bser_int(ctx, n, data)) {
262 return -1;
263 }
264
265 pn = json_array_size(templ);
266
267 // For each object
268 for (i = 0; i < n; i++) {
269 auto obj = json_array_get(array, i);
270 size_t pi;
271
272 // For each factored key
273 for (pi = 0; pi < pn; pi++) {
274 const char *key = json_string_value(json_array_get(templ, pi));
275
276 // Look up the object property
277 auto val = json_object_get(obj, key);
278 if (!val) {
279 // property not set on this one; emit a skip
280 if (ctx->dump(&bser_skip, sizeof(bser_skip), data)) {
281 return -1;
282 }
283 continue;
284 }
285
286 // Emit value
287 if (w_bser_dump(ctx, val, data)) {
288 return -1;
289 }
290 }
291 }
292
293 return 0;
294 }
295
bser_array(const bser_ctx_t * ctx,const json_t * array,void * data)296 static int bser_array(const bser_ctx_t *ctx, const json_t *array, void *data)
297 {
298 size_t n = json_array_size(array);
299 size_t i;
300
301 if (!is_bser_version_supported(ctx)) {
302 return -1;
303 }
304
305 auto templ = json_array_get_template(array);
306 if (templ) {
307 return bser_template(ctx, array, templ, data);
308 }
309
310 if (ctx->dump(&bser_array_hdr, sizeof(bser_array_hdr), data)) {
311 return -1;
312 }
313
314 if (bser_int(ctx, n, data)) {
315 return -1;
316 }
317
318 for (i = 0; i < n; i++) {
319 auto val = json_array_get(array, i);
320
321 if (w_bser_dump(ctx, val, data)) {
322 return -1;
323 }
324 }
325
326 return 0;
327 }
328
bser_object(const bser_ctx_t * ctx,const json_ref & obj,void * data)329 static int bser_object(const bser_ctx_t* ctx, const json_ref& obj, void* data) {
330 size_t n;
331
332 if (!is_bser_version_supported(ctx)) {
333 return -1;
334 }
335
336 if (ctx->dump(&bser_object_hdr, sizeof(bser_object_hdr), data)) {
337 return -1;
338 }
339
340 n = json_object_size(obj);
341 if (bser_int(ctx, n, data)) {
342 return -1;
343 }
344
345 auto object = json_to_object(obj);
346 for (auto& it : object->map) {
347 auto &key = it.first;
348 auto &val = it.second;
349
350 if (bser_bytestring(ctx, key.c_str(), data)) {
351 return -1;
352 }
353 if (w_bser_dump(ctx, val, data)) {
354 return -1;
355 }
356 }
357
358 return 0;
359 }
360
w_bser_dump(const bser_ctx_t * ctx,const json_ref & json,void * data)361 int w_bser_dump(const bser_ctx_t* ctx, const json_ref& json, void* data) {
362 int type = json_typeof(json);
363
364 if (!is_bser_version_supported(ctx)) {
365 return -1;
366 }
367
368 switch (type) {
369 case JSON_NULL:
370 return ctx->dump(&bser_null, sizeof(bser_null), data);
371 case JSON_TRUE:
372 return ctx->dump(&bser_true, sizeof(bser_true), data);
373 case JSON_FALSE:
374 return ctx->dump(&bser_false, sizeof(bser_false), data);
375 case JSON_REAL:
376 return bser_real(ctx, json_real_value(json), data);
377 case JSON_INTEGER:
378 return bser_int(ctx, json_integer_value(json), data);
379 case JSON_STRING: {
380 auto& wstr = json_to_w_string(json);
381 switch (wstr.type()) {
382 case W_STRING_BYTE:
383 return bser_bytestring(ctx, wstr, data);
384 case W_STRING_UNICODE:
385 return bser_utf8string(ctx, wstr, data);
386 case W_STRING_MIXED:
387 return bser_mixedstring(ctx, wstr, data);
388 default:
389 w_assert(false, "unknown string type 0x%02x", wstr.type());
390 return -1;
391 }
392 }
393 case JSON_ARRAY:
394 return bser_array(ctx, json, data);
395 case JSON_OBJECT:
396 return bser_object(ctx, json, data);
397 default:
398 return -1;
399 }
400 }
401
measure(const char *,size_t size,void * ptr)402 static int measure(const char*, size_t size, void* ptr) {
403 auto tot = (json_int_t*)ptr;
404 *tot += size;
405 return 0;
406 }
407
w_bser_write_pdu(const uint32_t bser_version,const uint32_t bser_capabilities,json_dump_callback_t dump,const json_ref & json,void * data)408 int w_bser_write_pdu(
409 const uint32_t bser_version,
410 const uint32_t bser_capabilities,
411 json_dump_callback_t dump,
412 const json_ref& json,
413 void* data) {
414 json_int_t m_size = 0;
415 bser_ctx_t ctx{bser_version, bser_capabilities, measure};
416
417 if (!is_bser_version_supported(&ctx)) {
418 return -1;
419 }
420
421 if (w_bser_dump(&ctx, json, &m_size)) {
422 return -1;
423 }
424
425 // To actually write the contents
426 ctx.dump = dump;
427
428 if (bser_version == 2) {
429 if (dump(BSER_V2_MAGIC, 2, data)) {
430 return -1;
431 }
432 } else {
433 if (dump(BSER_MAGIC, 2, data)) {
434 return -1;
435 }
436 }
437
438 if (bser_version == 2) {
439 if (dump(
440 (const char*)&bser_capabilities, sizeof(bser_capabilities), data)) {
441 return -1;
442 }
443 }
444
445 if (bser_int(&ctx, m_size, data)) {
446 return -1;
447 }
448
449 if (w_bser_dump(&ctx, json, data)) {
450 return -1;
451 }
452
453 return 0;
454 }
455
bunser_array(const char * buf,const char * end,json_int_t * used,json_error_t * jerr)456 static json_ref bunser_array(
457 const char* buf,
458 const char* end,
459 json_int_t* used,
460 json_error_t* jerr) {
461 json_int_t needed;
462 json_int_t total = 0;
463 json_int_t i, nelems;
464
465 buf++;
466 total++;
467
468 if (!bunser_int(buf, end - buf, &needed, &nelems)) {
469 if (needed == -1) {
470 snprintf(jerr->text, sizeof(jerr->text),
471 "invalid integer encoding 0x%02x for array length. buf=%p\n",
472 (int)buf[0], buf);
473 return nullptr;
474 }
475 *used = needed + total;
476 snprintf(jerr->text, sizeof(jerr->text),
477 "invalid array length encoding 0x%02x (needed %d but have %d)",
478 (int)buf[0], (int)needed, (int)(end - buf));
479 return nullptr;
480 }
481
482 total += needed;
483 buf += needed;
484
485 auto arrval = json_array();
486 for (i = 0; i < nelems; i++) {
487 needed = 0;
488 auto item = bunser(buf, end, &needed, jerr);
489
490 total += needed;
491 buf += needed;
492
493 if (!item) {
494 *used = total;
495 return nullptr;
496 }
497
498 if (json_array_append_new(arrval, std::move(item))) {
499 *used = total;
500 snprintf(jerr->text, sizeof(jerr->text),
501 "failed to append array item");
502 return nullptr;
503 }
504 }
505
506 *used = total;
507 return arrval;
508 }
509
bunser_template(const char * buf,const char * end,json_int_t * used,json_error_t * jerr)510 static json_ref bunser_template(
511 const char* buf,
512 const char* end,
513 json_int_t* used,
514 json_error_t* jerr) {
515 json_int_t needed = 0;
516 json_int_t total = 0;
517 json_int_t i, nelems;
518 json_int_t ip, np;
519
520 buf++;
521 total++;
522
523 if (*buf != BSER_ARRAY) {
524 snprintf(jerr->text, sizeof(jerr->text),
525 "Expected array encoding, but found 0x%02x", *buf);
526 *used = total;
527 return nullptr;
528 }
529
530 // Load in the property names template
531 auto templ = bunser_array(buf, end, &needed, jerr);
532 if (!templ) {
533 *used = needed + total;
534 return nullptr;
535 }
536 total += needed;
537 buf += needed;
538
539 // And the number of objects
540 needed = 0;
541 if (!bunser_int(buf, end - buf, &needed, &nelems)) {
542 *used = needed + total;
543 snprintf(jerr->text, sizeof(jerr->text),
544 "invalid object number encoding (needed %d but have %d)",
545 (int)needed, (int)(end - buf));
546 return nullptr;
547 }
548 total += needed;
549 buf += needed;
550
551 np = json_array_size(templ);
552
553 // Now load up the array with object values
554 auto arrval = json_array_of_size((size_t)nelems);
555 for (i = 0; i < nelems; i++) {
556 auto item = json_object_of_size((size_t)np);
557 for (ip = 0; ip < np; ip++) {
558 if (*buf == BSER_SKIP) {
559 buf++;
560 total++;
561 continue;
562 }
563
564 needed = 0;
565 auto val = bunser(buf, end, &needed, jerr);
566 if (!val) {
567 *used = needed + total;
568 return nullptr;
569 }
570 buf += needed;
571 total += needed;
572
573 json_object_set_new_nocheck(
574 item,
575 json_string_value(json_array_get(templ, (size_t)ip)),
576 std::move(val));
577 }
578
579 json_array_append_new(arrval, std::move(item));
580 }
581
582 *used = total;
583 return arrval;
584 }
585
bunser_object(const char * buf,const char * end,json_int_t * used,json_error_t * jerr)586 static json_ref bunser_object(
587 const char* buf,
588 const char* end,
589 json_int_t* used,
590 json_error_t* jerr) {
591 json_int_t needed;
592 json_int_t total = 0;
593 json_int_t i, nelems;
594 char keybuf[128];
595
596 total = 1;
597 buf++;
598
599 if (!bunser_int(buf, end - buf, &needed, &nelems)) {
600 *used = needed + total;
601 snprintf(jerr->text, sizeof(jerr->text),
602 "invalid object property count encoding");
603 return nullptr;
604 }
605
606 total += needed;
607 buf += needed;
608
609 auto objval = json_object();
610 for (i = 0; i < nelems; i++) {
611 const char *start;
612 json_int_t slen;
613
614 // Read key
615 if (!bunser_generic_string(buf, end - buf, &needed, &start, &slen)) {
616 *used = total + needed;
617 snprintf(jerr->text, sizeof(jerr->text),
618 "invalid bytestring for object key");
619 return nullptr;
620 }
621 total += needed;
622 buf += needed;
623
624 // Saves us allocating a string when the library is going to
625 // do that anyway
626 if ((uint16_t)slen > sizeof(keybuf) - 1) {
627 snprintf(jerr->text, sizeof(jerr->text),
628 "object key is too long");
629 return nullptr;
630 }
631 memcpy(keybuf, start, (size_t)slen);
632 keybuf[slen] = '\0';
633
634 // Read value
635 auto item = bunser(buf, end, &needed, jerr);
636 total += needed;
637 buf += needed;
638
639 if (!item) {
640 *used = total;
641 return nullptr;
642 }
643
644 if (json_object_set_new_nocheck(objval, keybuf, std::move(item))) {
645 *used = total;
646 snprintf(jerr->text, sizeof(jerr->text),
647 "failed to add object property");
648 return nullptr;
649 }
650 }
651
652 *used = total;
653 return objval;
654 }
655
bunser(const char * buf,const char * end,json_int_t * needed,json_error_t * jerr)656 json_ref bunser(
657 const char* buf,
658 const char* end,
659 json_int_t* needed,
660 json_error_t* jerr) {
661 json_int_t ival;
662
663 switch (buf[0]) {
664 case BSER_INT8:
665 case BSER_INT16:
666 case BSER_INT32:
667 case BSER_INT64:
668 if (!bunser_int(buf, end - buf, needed, &ival)) {
669 snprintf(jerr->text, sizeof(jerr->text),
670 "invalid integer encoding");
671 return nullptr;
672 }
673 return json_integer(ival);
674
675 case BSER_BYTESTRING:
676 case BSER_UTF8STRING: {
677 const char *start;
678 json_int_t len;
679
680 if (!bunser_generic_string(buf, end - buf, needed, &start, &len)) {
681 snprintf(jerr->text, sizeof(jerr->text),
682 "invalid bytestring encoding");
683 return nullptr;
684 }
685
686 return typed_string_to_json(
687 start,
688 len,
689 buf[0] == BSER_BYTESTRING ? W_STRING_BYTE : W_STRING_UNICODE);
690 }
691
692 case BSER_REAL:
693 {
694 double dval;
695 *needed = sizeof(double) + 1;
696 memcpy(&dval, buf + 1, sizeof(dval));
697 return json_real(dval);
698 }
699
700 case BSER_TRUE:
701 *needed = 1;
702 return json_true();
703 case BSER_FALSE:
704 *needed = 1;
705 return json_false();
706 case BSER_NULL:
707 *needed = 1;
708 return json_null();
709 case BSER_ARRAY:
710 return bunser_array(buf, end, needed, jerr);
711 case BSER_TEMPLATE:
712 return bunser_template(buf, end, needed, jerr);
713 case BSER_OBJECT:
714 return bunser_object(buf, end, needed, jerr);
715 default:
716 snprintf(jerr->text, sizeof(jerr->text),
717 "invalid bser encoding type %02x", (int)buf[0]);
718 return nullptr;
719 }
720
721 #ifndef _WIN32 // It knows this is unreachable
722 return nullptr;
723 #endif
724 }
725
726 /* vim:ts=2:sw=2:et:
727 */
728