1 /* Copyright 2013-present Facebook, Inc.
2  * Licensed under the Apache License, Version 2.0 */
3 
4 #include "watchman.h"
5 #include "thirdparty/jansson/jansson_private.h"
6 
7 /*
8  * This defines a binary serialization of the JSON data objects in this
9  * library.  It is designed for use with watchman and is not intended to serve
10  * as a general binary JSON interchange format.  In particular, all integers
11  * are signed integers and are stored in host byte order to minimize
12  * transformation overhead.
13  */
14 
15 /* Return the smallest size int that can store the value */
16 #define INT_SIZE(x) (((x) == ((int8_t)x))  ? 1 :    \
17                      ((x) == ((int16_t)x)) ? 2 :    \
18                      ((x) == ((int32_t)x)) ? 4 : 8)
19 
20 #define BSER_ARRAY     0x00
21 #define BSER_OBJECT    0x01
22 #define BSER_BYTESTRING 0x02
23 #define BSER_INT8      0x03
24 #define BSER_INT16     0x04
25 #define BSER_INT32     0x05
26 #define BSER_INT64     0x06
27 #define BSER_REAL      0x07
28 #define BSER_TRUE      0x08
29 #define BSER_FALSE     0x09
30 #define BSER_NULL      0x0a
31 #define BSER_TEMPLATE  0x0b
32 #define BSER_SKIP      0x0c
33 #define BSER_UTF8STRING 0x0d
34 
35 static const char bser_true = BSER_TRUE;
36 static const char bser_false = BSER_FALSE;
37 static const char bser_null = BSER_NULL;
38 static const char bser_bytestring_hdr = BSER_BYTESTRING;
39 static const char bser_array_hdr = BSER_ARRAY;
40 static const char bser_object_hdr = BSER_OBJECT;
41 static const char bser_template_hdr = BSER_TEMPLATE;
42 static const char bser_utf8string_hdr = BSER_UTF8STRING;
43 static const char bser_skip = BSER_SKIP;
44 
is_bser_version_supported(const bser_ctx_t * ctx)45 static bool is_bser_version_supported(const bser_ctx_t *ctx) {
46   return ctx->bser_version == 1 || ctx->bser_version == 2;
47 }
48 
bser_real(const bser_ctx_t * ctx,double val,void * data)49 static int bser_real(const bser_ctx_t *ctx, double val, void *data)
50 {
51   char sz = BSER_REAL;
52   if (!is_bser_version_supported(ctx)) {
53     return -1;
54   }
55 
56   if (ctx->dump(&sz, sizeof(sz), data)) {
57     return -1;
58   }
59   return ctx->dump((char*)&val, sizeof(val), data);
60 }
61 
bunser_generic_string(const char * buf,json_int_t avail,json_int_t * needed,const char ** start,json_int_t * len)62 bool bunser_generic_string(
63     const char* buf,
64     json_int_t avail,
65     json_int_t* needed,
66     const char** start,
67     json_int_t* len) {
68   json_int_t ineed;
69 
70   if (!bunser_int(buf + 1, avail - 1, &ineed, len)) {
71     *needed = ineed;
72     return false;
73   }
74 
75   buf += ineed + 1;
76   avail -= ineed + 1;
77   *needed = ineed + 1 + *len;
78 
79   if (*len > avail) {
80     return false;
81   }
82 
83   *start = buf;
84   return true;
85 }
86 
87 // Attempt to unserialize an integer value.
88 // Returns bool if successful, and populates *val with the value.
89 // Otherwise populates *needed with the size required to successfully
90 // decode the integer value
bunser_int(const char * buf,json_int_t avail,json_int_t * needed,json_int_t * val)91 bool bunser_int(const char *buf, json_int_t avail,
92     json_int_t *needed, json_int_t *val)
93 {
94   int8_t i8;
95   int16_t i16;
96   int32_t i32;
97   int64_t i64;
98 
99   switch (buf[0]) {
100     case BSER_INT8:
101       *needed = 2;
102       break;
103     case BSER_INT16:
104       *needed = 3;
105       break;
106     case BSER_INT32:
107       *needed = 5;
108       break;
109     case BSER_INT64:
110       *needed = 9;
111       break;
112     default:
113       *needed = -1;
114       return false;
115   }
116   if (avail < *needed) {
117     return false;
118   }
119 
120   switch (buf[0]) {
121     case BSER_INT8:
122       memcpy(&i8, buf + 1, sizeof(i8));
123       *val = i8;
124       return true;
125     case BSER_INT16:
126       memcpy(&i16, buf + 1, sizeof(i16));
127       *val = i16;
128       return true;
129     case BSER_INT32:
130       memcpy(&i32, buf + 1, sizeof(i32));
131       *val = i32;
132       return true;
133     case BSER_INT64:
134       memcpy(&i64, buf + 1, sizeof(i64));
135       *val = i64;
136       return true;
137     default:
138       return false;
139   }
140 }
141 
bser_int(const bser_ctx_t * ctx,json_int_t val,void * data)142 static int bser_int(const bser_ctx_t *ctx, json_int_t val, void *data)
143 {
144   int8_t i8;
145   int16_t i16;
146   int32_t i32;
147   int64_t i64;
148   char sz;
149   int size = INT_SIZE(val);
150   char *iptr;
151 
152   if (!is_bser_version_supported(ctx)) {
153     return -1;
154   }
155 
156   switch (size) {
157     case 1:
158       sz = BSER_INT8;
159       i8 = (int8_t)val;
160       iptr = (char*)&i8;
161       break;
162     case 2:
163       sz = BSER_INT16;
164       i16 = (int16_t)val;
165       iptr = (char*)&i16;
166       break;
167     case 4:
168       sz = BSER_INT32;
169       i32 = (int32_t)val;
170       iptr = (char*)&i32;
171       break;
172     case 8:
173       sz = BSER_INT64;
174       i64 = (int64_t)val;
175       iptr = (char*)&i64;
176       break;
177     default:
178       return -1;
179   }
180 
181   if (ctx->dump(&sz, sizeof(sz), data)) {
182     return -1;
183   }
184 
185   return ctx->dump(iptr, size, data);
186 }
187 
bser_generic_string(const bser_ctx_t * ctx,w_string_piece str,void * data,const char hdr)188 static int bser_generic_string(
189     const bser_ctx_t* ctx,
190     w_string_piece str,
191     void* data,
192     const char hdr) {
193   if (!is_bser_version_supported(ctx)) {
194     return -1;
195   }
196 
197   if (ctx->dump(&hdr, sizeof(hdr), data)) {
198     return -1;
199   }
200 
201   if (bser_int(ctx, str.size(), data)) {
202     return -1;
203   }
204 
205   if (ctx->dump(str.data(), str.size(), data)) {
206     return -1;
207   }
208 
209   return 0;
210 }
211 
212 static int
bser_bytestring(const bser_ctx_t * ctx,w_string_piece str,void * data)213 bser_bytestring(const bser_ctx_t* ctx, w_string_piece str, void* data) {
214   return bser_generic_string(ctx, str, data, bser_bytestring_hdr);
215 }
216 
217 static int
bser_utf8string(const bser_ctx_t * ctx,w_string_piece str,void * data)218 bser_utf8string(const bser_ctx_t* ctx, w_string_piece str, void* data) {
219   if ((ctx->bser_capabilities & BSER_CAP_DISABLE_UNICODE) ||
220       ctx->bser_version == 1) {
221     return bser_bytestring(ctx, str, data);
222   }
223   return bser_generic_string(ctx, str, data, bser_utf8string_hdr);
224 }
225 
226 static int
bser_mixedstring(const bser_ctx_t * ctx,w_string_piece str,void * data)227 bser_mixedstring(const bser_ctx_t* ctx, w_string_piece str, void* data) {
228   if (ctx->bser_version != 1 &&
229       !(BSER_CAP_DISABLE_UNICODE_FOR_ERRORS & ctx->bser_capabilities) &&
230       !(BSER_CAP_DISABLE_UNICODE & ctx->bser_capabilities)) {
231     auto utf8_clean = str.asUTF8Clean();
232     return bser_utf8string(ctx, utf8_clean, data);
233   } else {
234     return bser_bytestring(ctx, str, data);
235   }
236 }
237 
238 static int bser_array(const bser_ctx_t *ctx, const json_t *array, void *data);
239 
bser_template(const bser_ctx_t * ctx,const json_t * array,const json_t * templ,void * data)240 static int bser_template(const bser_ctx_t *ctx, const json_t *array,
241     const json_t *templ, void *data)
242 {
243   size_t n = json_array_size(array);
244   size_t i, pn;
245 
246   if (!is_bser_version_supported(ctx)) {
247     return -1;
248   }
249 
250   if (ctx->dump(&bser_template_hdr, sizeof(bser_template_hdr), data)) {
251     return -1;
252   }
253 
254   // The template goes next
255   if (bser_array(ctx, templ, data)) {
256     return -1;
257   }
258 
259   // Now the array of arrays of object values.
260   // How many objects
261   if (bser_int(ctx, n, data)) {
262     return -1;
263   }
264 
265   pn = json_array_size(templ);
266 
267   // For each object
268   for (i = 0; i < n; i++) {
269     auto obj = json_array_get(array, i);
270     size_t pi;
271 
272     // For each factored key
273     for (pi = 0; pi < pn; pi++) {
274       const char *key = json_string_value(json_array_get(templ, pi));
275 
276       // Look up the object property
277       auto val = json_object_get(obj, key);
278       if (!val) {
279         // property not set on this one; emit a skip
280         if (ctx->dump(&bser_skip, sizeof(bser_skip), data)) {
281           return -1;
282         }
283         continue;
284       }
285 
286       // Emit value
287       if (w_bser_dump(ctx, val, data)) {
288         return -1;
289       }
290     }
291   }
292 
293   return 0;
294 }
295 
bser_array(const bser_ctx_t * ctx,const json_t * array,void * data)296 static int bser_array(const bser_ctx_t *ctx, const json_t *array, void *data)
297 {
298   size_t n = json_array_size(array);
299   size_t i;
300 
301   if (!is_bser_version_supported(ctx)) {
302     return -1;
303   }
304 
305   auto templ = json_array_get_template(array);
306   if (templ) {
307     return bser_template(ctx, array, templ, data);
308   }
309 
310   if (ctx->dump(&bser_array_hdr, sizeof(bser_array_hdr), data)) {
311     return -1;
312   }
313 
314   if (bser_int(ctx, n, data)) {
315     return -1;
316   }
317 
318   for (i = 0; i < n; i++) {
319     auto val = json_array_get(array, i);
320 
321     if (w_bser_dump(ctx, val, data)) {
322       return -1;
323     }
324   }
325 
326   return 0;
327 }
328 
bser_object(const bser_ctx_t * ctx,const json_ref & obj,void * data)329 static int bser_object(const bser_ctx_t* ctx, const json_ref& obj, void* data) {
330   size_t n;
331 
332   if (!is_bser_version_supported(ctx)) {
333     return -1;
334   }
335 
336   if (ctx->dump(&bser_object_hdr, sizeof(bser_object_hdr), data)) {
337     return -1;
338   }
339 
340   n = json_object_size(obj);
341   if (bser_int(ctx, n, data)) {
342     return -1;
343   }
344 
345   auto object = json_to_object(obj);
346   for (auto& it : object->map) {
347     auto &key = it.first;
348     auto &val = it.second;
349 
350     if (bser_bytestring(ctx, key.c_str(), data)) {
351       return -1;
352     }
353     if (w_bser_dump(ctx, val, data)) {
354       return -1;
355     }
356   }
357 
358   return 0;
359 }
360 
w_bser_dump(const bser_ctx_t * ctx,const json_ref & json,void * data)361 int w_bser_dump(const bser_ctx_t* ctx, const json_ref& json, void* data) {
362   int type = json_typeof(json);
363 
364   if (!is_bser_version_supported(ctx)) {
365     return -1;
366   }
367 
368   switch (type) {
369     case JSON_NULL:
370       return ctx->dump(&bser_null, sizeof(bser_null), data);
371     case JSON_TRUE:
372       return ctx->dump(&bser_true, sizeof(bser_true), data);
373     case JSON_FALSE:
374       return ctx->dump(&bser_false, sizeof(bser_false), data);
375     case JSON_REAL:
376       return bser_real(ctx, json_real_value(json), data);
377     case JSON_INTEGER:
378       return bser_int(ctx, json_integer_value(json), data);
379     case JSON_STRING: {
380       auto& wstr = json_to_w_string(json);
381       switch (wstr.type()) {
382         case W_STRING_BYTE:
383           return bser_bytestring(ctx, wstr, data);
384         case W_STRING_UNICODE:
385           return bser_utf8string(ctx, wstr, data);
386         case W_STRING_MIXED:
387           return bser_mixedstring(ctx, wstr, data);
388         default:
389           w_assert(false, "unknown string type 0x%02x", wstr.type());
390           return -1;
391       }
392     }
393     case JSON_ARRAY:
394       return bser_array(ctx, json, data);
395     case JSON_OBJECT:
396       return bser_object(ctx, json, data);
397     default:
398       return -1;
399   }
400 }
401 
measure(const char *,size_t size,void * ptr)402 static int measure(const char*, size_t size, void* ptr) {
403   auto tot = (json_int_t*)ptr;
404   *tot += size;
405   return 0;
406 }
407 
w_bser_write_pdu(const uint32_t bser_version,const uint32_t bser_capabilities,json_dump_callback_t dump,const json_ref & json,void * data)408 int w_bser_write_pdu(
409     const uint32_t bser_version,
410     const uint32_t bser_capabilities,
411     json_dump_callback_t dump,
412     const json_ref& json,
413     void* data) {
414   json_int_t m_size = 0;
415   bser_ctx_t ctx{bser_version, bser_capabilities, measure};
416 
417   if (!is_bser_version_supported(&ctx)) {
418     return -1;
419   }
420 
421   if (w_bser_dump(&ctx, json, &m_size)) {
422     return -1;
423   }
424 
425   // To actually write the contents
426   ctx.dump = dump;
427 
428   if (bser_version == 2) {
429     if (dump(BSER_V2_MAGIC, 2, data)) {
430       return -1;
431     }
432   } else {
433     if (dump(BSER_MAGIC, 2, data)) {
434       return -1;
435     }
436   }
437 
438   if (bser_version == 2) {
439     if (dump(
440             (const char*)&bser_capabilities, sizeof(bser_capabilities), data)) {
441       return -1;
442     }
443   }
444 
445   if (bser_int(&ctx, m_size, data)) {
446     return -1;
447   }
448 
449   if (w_bser_dump(&ctx, json, data)) {
450     return -1;
451   }
452 
453   return 0;
454 }
455 
bunser_array(const char * buf,const char * end,json_int_t * used,json_error_t * jerr)456 static json_ref bunser_array(
457     const char* buf,
458     const char* end,
459     json_int_t* used,
460     json_error_t* jerr) {
461   json_int_t needed;
462   json_int_t total = 0;
463   json_int_t i, nelems;
464 
465   buf++;
466   total++;
467 
468   if (!bunser_int(buf, end - buf, &needed, &nelems)) {
469     if (needed == -1) {
470       snprintf(jerr->text, sizeof(jerr->text),
471           "invalid integer encoding 0x%02x for array length. buf=%p\n",
472           (int)buf[0], buf);
473       return nullptr;
474     }
475     *used = needed + total;
476     snprintf(jerr->text, sizeof(jerr->text),
477         "invalid array length encoding 0x%02x (needed %d but have %d)",
478         (int)buf[0], (int)needed, (int)(end - buf));
479     return nullptr;
480   }
481 
482   total += needed;
483   buf += needed;
484 
485   auto arrval = json_array();
486   for (i = 0; i < nelems; i++) {
487     needed = 0;
488     auto item = bunser(buf, end, &needed, jerr);
489 
490     total += needed;
491     buf += needed;
492 
493     if (!item) {
494       *used = total;
495       return nullptr;
496     }
497 
498     if (json_array_append_new(arrval, std::move(item))) {
499       *used = total;
500       snprintf(jerr->text, sizeof(jerr->text),
501         "failed to append array item");
502       return nullptr;
503     }
504   }
505 
506   *used = total;
507   return arrval;
508 }
509 
bunser_template(const char * buf,const char * end,json_int_t * used,json_error_t * jerr)510 static json_ref bunser_template(
511     const char* buf,
512     const char* end,
513     json_int_t* used,
514     json_error_t* jerr) {
515   json_int_t needed = 0;
516   json_int_t total = 0;
517   json_int_t i, nelems;
518   json_int_t ip, np;
519 
520   buf++;
521   total++;
522 
523   if (*buf != BSER_ARRAY) {
524     snprintf(jerr->text, sizeof(jerr->text),
525         "Expected array encoding, but found 0x%02x", *buf);
526     *used = total;
527     return nullptr;
528   }
529 
530   // Load in the property names template
531   auto templ = bunser_array(buf, end, &needed, jerr);
532   if (!templ) {
533     *used = needed + total;
534     return nullptr;
535   }
536   total += needed;
537   buf += needed;
538 
539   // And the number of objects
540   needed = 0;
541   if (!bunser_int(buf, end - buf, &needed, &nelems)) {
542     *used = needed + total;
543     snprintf(jerr->text, sizeof(jerr->text),
544         "invalid object number encoding (needed %d but have %d)",
545         (int)needed, (int)(end - buf));
546     return nullptr;
547   }
548   total += needed;
549   buf += needed;
550 
551   np = json_array_size(templ);
552 
553   // Now load up the array with object values
554   auto arrval = json_array_of_size((size_t)nelems);
555   for (i = 0; i < nelems; i++) {
556     auto item = json_object_of_size((size_t)np);
557     for (ip = 0; ip < np; ip++) {
558       if (*buf == BSER_SKIP) {
559         buf++;
560         total++;
561         continue;
562       }
563 
564       needed = 0;
565       auto val = bunser(buf, end, &needed, jerr);
566       if (!val) {
567         *used = needed + total;
568         return nullptr;
569       }
570       buf += needed;
571       total += needed;
572 
573       json_object_set_new_nocheck(
574           item,
575           json_string_value(json_array_get(templ, (size_t)ip)),
576           std::move(val));
577     }
578 
579     json_array_append_new(arrval, std::move(item));
580   }
581 
582   *used = total;
583   return arrval;
584 }
585 
bunser_object(const char * buf,const char * end,json_int_t * used,json_error_t * jerr)586 static json_ref bunser_object(
587     const char* buf,
588     const char* end,
589     json_int_t* used,
590     json_error_t* jerr) {
591   json_int_t needed;
592   json_int_t total = 0;
593   json_int_t i, nelems;
594   char keybuf[128];
595 
596   total = 1;
597   buf++;
598 
599   if (!bunser_int(buf, end - buf, &needed, &nelems)) {
600     *used = needed + total;
601     snprintf(jerr->text, sizeof(jerr->text),
602         "invalid object property count encoding");
603     return nullptr;
604   }
605 
606   total += needed;
607   buf += needed;
608 
609   auto objval = json_object();
610   for (i = 0; i < nelems; i++) {
611     const char *start;
612     json_int_t slen;
613 
614     // Read key
615     if (!bunser_generic_string(buf, end - buf, &needed, &start, &slen)) {
616       *used = total + needed;
617       snprintf(jerr->text, sizeof(jerr->text),
618           "invalid bytestring for object key");
619       return nullptr;
620     }
621     total += needed;
622     buf += needed;
623 
624     // Saves us allocating a string when the library is going to
625     // do that anyway
626     if ((uint16_t)slen > sizeof(keybuf) - 1) {
627       snprintf(jerr->text, sizeof(jerr->text),
628           "object key is too long");
629       return nullptr;
630     }
631     memcpy(keybuf, start, (size_t)slen);
632     keybuf[slen] = '\0';
633 
634     // Read value
635     auto item = bunser(buf, end, &needed, jerr);
636     total += needed;
637     buf += needed;
638 
639     if (!item) {
640       *used = total;
641       return nullptr;
642     }
643 
644     if (json_object_set_new_nocheck(objval, keybuf, std::move(item))) {
645       *used = total;
646       snprintf(jerr->text, sizeof(jerr->text),
647           "failed to add object property");
648       return nullptr;
649     }
650   }
651 
652   *used = total;
653   return objval;
654 }
655 
bunser(const char * buf,const char * end,json_int_t * needed,json_error_t * jerr)656 json_ref bunser(
657     const char* buf,
658     const char* end,
659     json_int_t* needed,
660     json_error_t* jerr) {
661   json_int_t ival;
662 
663   switch (buf[0]) {
664     case BSER_INT8:
665     case BSER_INT16:
666     case BSER_INT32:
667     case BSER_INT64:
668       if (!bunser_int(buf, end - buf, needed, &ival)) {
669         snprintf(jerr->text, sizeof(jerr->text),
670             "invalid integer encoding");
671         return nullptr;
672       }
673       return json_integer(ival);
674 
675     case BSER_BYTESTRING:
676     case BSER_UTF8STRING: {
677       const char *start;
678       json_int_t len;
679 
680       if (!bunser_generic_string(buf, end - buf, needed, &start, &len)) {
681         snprintf(jerr->text, sizeof(jerr->text),
682             "invalid bytestring encoding");
683         return nullptr;
684       }
685 
686       return typed_string_to_json(
687           start,
688           len,
689           buf[0] == BSER_BYTESTRING ? W_STRING_BYTE : W_STRING_UNICODE);
690     }
691 
692     case BSER_REAL:
693     {
694       double dval;
695       *needed = sizeof(double) + 1;
696       memcpy(&dval, buf + 1, sizeof(dval));
697       return json_real(dval);
698     }
699 
700     case BSER_TRUE:
701       *needed = 1;
702       return json_true();
703     case BSER_FALSE:
704       *needed = 1;
705       return json_false();
706     case BSER_NULL:
707       *needed = 1;
708       return json_null();
709     case BSER_ARRAY:
710       return bunser_array(buf, end, needed, jerr);
711     case BSER_TEMPLATE:
712       return bunser_template(buf, end, needed, jerr);
713     case BSER_OBJECT:
714       return bunser_object(buf, end, needed, jerr);
715     default:
716       snprintf(jerr->text, sizeof(jerr->text),
717             "invalid bser encoding type %02x", (int)buf[0]);
718       return nullptr;
719   }
720 
721 #ifndef _WIN32 // It knows this is unreachable
722   return nullptr;
723 #endif
724 }
725 
726 /* vim:ts=2:sw=2:et:
727  */
728