1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 static int __json_unpack_put(
12 WT_SESSION_IMPL *, void *, u_char *, size_t, WT_CONFIG_ITEM *, size_t *);
13 static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t,
14 const char *, WT_CONFIG_ITEM *, bool, size_t *);
15 static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t,
16 const char *, WT_CONFIG_ITEM *, u_char *, size_t, bool, va_list);
17 static int json_string_arg(WT_SESSION_IMPL *, const char **, WT_ITEM *);
18 static int json_int_arg(WT_SESSION_IMPL *, const char **, int64_t *);
19 static int json_uint_arg(WT_SESSION_IMPL *, const char **, uint64_t *);
20 static int __json_pack_struct(WT_SESSION_IMPL *, void *, size_t, const char *,
21 const char *);
22 static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
23 bool, const char *, size_t *);
24
25 #define WT_PACK_JSON_GET(session, pv, jstr) do { \
26 switch ((pv).type) { \
27 case 'x': \
28 break; \
29 case 's': \
30 case 'S': \
31 WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\
32 (pv).type = (pv).type == 's' ? 'j' : 'J'; \
33 break; \
34 case 'b': \
35 case 'h': \
36 case 'i': \
37 case 'l': \
38 case 'q': \
39 WT_RET(json_int_arg(session, &(jstr), &(pv).u.i)); \
40 break; \
41 case 'B': \
42 case 'H': \
43 case 'I': \
44 case 'L': \
45 case 'Q': \
46 case 'r': \
47 case 'R': \
48 case 't': \
49 WT_RET(json_uint_arg(session, &(jstr), &(pv).u.u)); \
50 break; \
51 case 'u': \
52 WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\
53 (pv).type = 'K'; \
54 break; \
55 /* User format strings have already been validated. */ \
56 WT_ILLEGAL_VALUE(session, (pv).type); \
57 } \
58 } while (0)
59
60 /*
61 * __json_unpack_put --
62 * Calculate the size of a packed byte string as formatted for JSON.
63 */
64 static int
__json_unpack_put(WT_SESSION_IMPL * session,void * voidpv,u_char * buf,size_t bufsz,WT_CONFIG_ITEM * name,size_t * retsizep)65 __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv,
66 u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name, size_t *retsizep)
67 {
68 WT_PACK_VALUE *pv;
69 size_t s, n;
70 const u_char *p, *end;
71
72 pv = (WT_PACK_VALUE *)voidpv;
73
74 WT_RET(__wt_snprintf_len_set(
75 (char *)buf, bufsz, &s, "\"%.*s\" : ", (int)name->len, name->str));
76 if (s <= bufsz) {
77 bufsz -= s;
78 buf += s;
79 } else
80 bufsz = 0;
81
82 switch (pv->type) {
83 case 'x':
84 return (0);
85 case 's':
86 case 'S':
87 /* Account for '"' quote in front and back. */
88 s += 2;
89 p = (const u_char *)pv->u.s;
90 if (bufsz > 0) {
91 *buf++ = '"';
92 bufsz--;
93 }
94 if (pv->type == 's' || pv->havesize) {
95 end = p + pv->size;
96 for (; p < end; p++) {
97 n = __wt_json_unpack_char(
98 *p, buf, bufsz, false);
99 if (n > bufsz)
100 bufsz = 0;
101 else {
102 bufsz -= n;
103 buf += n;
104 }
105 s += n;
106 }
107 } else
108 for (; *p; p++) {
109 n = __wt_json_unpack_char(
110 *p, buf, bufsz, false);
111 if (n > bufsz)
112 bufsz = 0;
113 else {
114 bufsz -= n;
115 buf += n;
116 }
117 s += n;
118 }
119 if (bufsz > 0)
120 *buf++ = '"';
121 *retsizep += s;
122 return (0);
123 case 'U':
124 case 'u':
125 s += 2;
126 p = (const u_char *)pv->u.item.data;
127 end = p + pv->u.item.size;
128 if (bufsz > 0) {
129 *buf++ = '"';
130 bufsz--;
131 }
132 for (; p < end; p++) {
133 n = __wt_json_unpack_char(*p, buf, bufsz, true);
134 if (n > bufsz)
135 bufsz = 0;
136 else {
137 bufsz -= n;
138 buf += n;
139 }
140 s += n;
141 }
142 if (bufsz > 0)
143 *buf++ = '"';
144 *retsizep += s;
145 return (0);
146 case 'b':
147 case 'h':
148 case 'i':
149 case 'l':
150 case 'q':
151 WT_RET(__wt_snprintf_len_incr(
152 (char *)buf, bufsz, &s, "%" PRId64, pv->u.i));
153 *retsizep += s;
154 return (0);
155 case 'B':
156 case 't':
157 case 'H':
158 case 'I':
159 case 'L':
160 case 'Q':
161 case 'r':
162 case 'R':
163 WT_RET(__wt_snprintf_len_incr(
164 (char *)buf, bufsz, &s, "%" PRId64, pv->u.u));
165 *retsizep += s;
166 return (0);
167 }
168
169 WT_RET_MSG(session, EINVAL,
170 "unknown pack-value type: %c", (int)pv->type);
171 }
172
173 /*
174 * __json_struct_size --
175 * Calculate the size of a packed byte string as formatted for JSON.
176 */
177 static inline int
__json_struct_size(WT_SESSION_IMPL * session,const void * buffer,size_t size,const char * fmt,WT_CONFIG_ITEM * names,bool iskey,size_t * presult)178 __json_struct_size(WT_SESSION_IMPL *session, const void *buffer,
179 size_t size, const char *fmt, WT_CONFIG_ITEM *names, bool iskey,
180 size_t *presult)
181 {
182 WT_CONFIG_ITEM name;
183 WT_DECL_PACK_VALUE(pv);
184 WT_DECL_RET;
185 WT_PACK pack;
186 WT_PACK_NAME packname;
187 size_t result;
188 const uint8_t *p, *end;
189 bool needcr;
190
191 p = buffer;
192 end = p + size;
193 result = 0;
194 needcr = false;
195
196 __pack_name_init(session, names, iskey, &packname);
197 WT_RET(__pack_init(session, &pack, fmt));
198 while ((ret = __pack_next(&pack, &pv)) == 0) {
199 if (needcr)
200 result += 2;
201 needcr = true;
202 WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
203 WT_RET(__pack_name_next(&packname, &name));
204 WT_RET(
205 __json_unpack_put(session, &pv, NULL, 0, &name, &result));
206 }
207 WT_RET_NOTFOUND_OK(ret);
208
209 /* Be paranoid - __pack_write should never overflow. */
210 WT_ASSERT(session, p <= end);
211
212 *presult = result;
213 return (0);
214 }
215
216 /*
217 * __json_struct_unpackv --
218 * Unpack a byte string to JSON (va_list version).
219 */
220 static inline int
__json_struct_unpackv(WT_SESSION_IMPL * session,const void * buffer,size_t size,const char * fmt,WT_CONFIG_ITEM * names,u_char * jbuf,size_t jbufsize,bool iskey,va_list ap)221 __json_struct_unpackv(WT_SESSION_IMPL *session,
222 const void *buffer, size_t size, const char *fmt, WT_CONFIG_ITEM *names,
223 u_char *jbuf, size_t jbufsize, bool iskey, va_list ap)
224 {
225 WT_CONFIG_ITEM name;
226 WT_DECL_PACK_VALUE(pv);
227 WT_DECL_RET;
228 WT_PACK pack;
229 WT_PACK_NAME packname;
230 size_t jsize;
231 const uint8_t *p, *end;
232 bool needcr;
233
234 p = buffer;
235 end = p + size;
236 needcr = false;
237
238 /* Unpacking a cursor marked as json implies a single arg. */
239 *va_arg(ap, const char **) = (char *)jbuf;
240
241 __pack_name_init(session, names, iskey, &packname);
242 WT_RET(__pack_init(session, &pack, fmt));
243 while ((ret = __pack_next(&pack, &pv)) == 0) {
244 if (needcr) {
245 WT_ASSERT(session, jbufsize >= 3);
246 strncat((char *)jbuf, ",\n", jbufsize);
247 jbuf += 2;
248 jbufsize -= 2;
249 }
250 needcr = true;
251 WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
252 WT_RET(__pack_name_next(&packname, &name));
253 jsize = 0;
254 WT_RET(__json_unpack_put(session,
255 (u_char *)&pv, jbuf, jbufsize, &name, &jsize));
256 WT_ASSERT(session, jsize <= jbufsize);
257 jbuf += jsize;
258 jbufsize -= jsize;
259 }
260 WT_RET_NOTFOUND_OK(ret);
261
262 /* Be paranoid - __unpack_read should never overflow. */
263 WT_ASSERT(session, p <= end);
264
265 WT_ASSERT(session, jbufsize == 1);
266
267 return (0);
268 }
269
270 /*
271 * __wt_json_alloc_unpack --
272 * Allocate space for, and unpack an entry into JSON format.
273 */
274 int
__wt_json_alloc_unpack(WT_SESSION_IMPL * session,const void * buffer,size_t size,const char * fmt,WT_CURSOR_JSON * json,bool iskey,va_list ap)275 __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer,
276 size_t size, const char *fmt, WT_CURSOR_JSON *json,
277 bool iskey, va_list ap)
278 {
279 WT_CONFIG_ITEM *names;
280 size_t needed;
281 char **json_bufp;
282
283 if (iskey) {
284 names = &json->key_names;
285 json_bufp = &json->key_buf;
286 } else {
287 names = &json->value_names;
288 json_bufp = &json->value_buf;
289 }
290 needed = 0;
291 WT_RET(__json_struct_size(session, buffer, size, fmt, names,
292 iskey, &needed));
293 WT_RET(__wt_realloc(session, NULL, needed + 1, json_bufp));
294 WT_RET(__json_struct_unpackv(session, buffer, size, fmt,
295 names, (u_char *)*json_bufp, needed + 1, iskey, ap));
296
297 return (0);
298 }
299
300 /*
301 * __wt_json_close --
302 * Release any json related resources.
303 */
304 void
__wt_json_close(WT_SESSION_IMPL * session,WT_CURSOR * cursor)305 __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
306 {
307 WT_CURSOR_JSON *json;
308
309 if ((json = (WT_CURSOR_JSON *)cursor->json_private) != NULL) {
310 __wt_free(session, json->key_buf);
311 __wt_free(session, json->value_buf);
312 __wt_free(session, json);
313 }
314 }
315
316 /*
317 * __wt_json_unpack_char --
318 * Unpack a single character into JSON escaped format.
319 * Can be called with null buf for sizing.
320 */
321 size_t
__wt_json_unpack_char(u_char ch,u_char * buf,size_t bufsz,bool force_unicode)322 __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
323 WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
324 {
325 u_char abbrev;
326
327 if (!force_unicode) {
328 if (__wt_isprint(ch) && ch != '\\' && ch != '"') {
329 if (bufsz >= 1)
330 *buf = ch;
331 return (1);
332 }
333 abbrev = '\0';
334 switch (ch) {
335 case '\\':
336 case '"':
337 abbrev = ch;
338 break;
339 case '\f':
340 abbrev = 'f';
341 break;
342 case '\n':
343 abbrev = 'n';
344 break;
345 case '\r':
346 abbrev = 'r';
347 break;
348 case '\t':
349 abbrev = 't';
350 break;
351 }
352 if (abbrev != '\0') {
353 if (bufsz >= 2) {
354 *buf++ = '\\';
355 *buf = abbrev;
356 }
357 return (2);
358 }
359 }
360 if (bufsz >= 6) {
361 *buf++ = '\\';
362 *buf++ = 'u';
363 *buf++ = '0';
364 *buf++ = '0';
365 *buf++ = __wt_hex((ch & 0xf0) >> 4);
366 *buf++ = __wt_hex(ch & 0x0f);
367 }
368 return (6);
369 }
370
371 /*
372 * __wt_json_column_init --
373 * Set json_key_names, json_value_names to comma separated lists
374 * of column names.
375 */
376 void
__wt_json_column_init(WT_CURSOR * cursor,const char * uri,const char * keyformat,const WT_CONFIG_ITEM * idxconf,const WT_CONFIG_ITEM * colconf)377 __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat,
378 const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf)
379 {
380 WT_CURSOR_JSON *json;
381 uint32_t keycnt, nkeys;
382 const char *beginkey, *end, *lparen, *p;
383
384 json = (WT_CURSOR_JSON *)cursor->json_private;
385 beginkey = colconf->str;
386 end = beginkey + colconf->len;
387
388 if (idxconf != NULL) {
389 json->key_names.str = idxconf->str;
390 json->key_names.len = idxconf->len;
391 } else if (colconf->len > 0 && *beginkey == '(') {
392 beginkey++;
393 if (end[-1] == ')')
394 end--;
395 }
396
397 for (nkeys = 0; *keyformat; keyformat++)
398 if (!__wt_isdigit((u_char)*keyformat))
399 nkeys++;
400
401 p = beginkey;
402 keycnt = 0;
403 while (p < end && keycnt < nkeys) {
404 if (*p == ',')
405 keycnt++;
406 p++;
407 }
408 if ((lparen = strchr(uri, '(')) != NULL) {
409 /* This cursor is a projection. */
410 json->value_names.str = lparen;
411 json->value_names.len = strlen(lparen) - 1;
412 WT_ASSERT((WT_SESSION_IMPL *)cursor->session,
413 json->value_names.str[json->value_names.len] == ')');
414 } else {
415 json->value_names.str = p;
416 json->value_names.len = WT_PTRDIFF(end, p);
417 }
418 if (idxconf == NULL) {
419 if (p > beginkey)
420 p--;
421 json->key_names.str = beginkey;
422 json->key_names.len = WT_PTRDIFF(p, beginkey);
423 }
424 }
425
426 #define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \
427 size_t _kwlen = strlen(keyword); \
428 if (strncmp(in, keyword, _kwlen) == 0 && \
429 !__wt_isalnum((u_char)(in)[_kwlen])) { \
430 (in) += _kwlen; \
431 (result) = matchval; \
432 } else { \
433 const char *_bad = (in); \
434 while (__wt_isalnum((u_char)*(in))) \
435 (in)++; \
436 WT_RET_MSG(session, EINVAL, \
437 "unknown keyword \"%.*s\" in JSON", \
438 (int)((in) - _bad), _bad); \
439 } \
440 } while (0)
441
442 /*
443 * __wt_json_token --
444 * Return the type, start position and length of the next JSON
445 * token in the input. String tokens include the quotes. JSON
446 * can be entirely parsed using calls to this tokenizer, each
447 * call using a src pointer that is the previously returned
448 * tokstart + toklen.
449 *
450 * The token type returned is one of:
451 * 0 : EOF
452 * 's' : string
453 * 'i' : intnum
454 * 'f' : floatnum
455 * ':' : colon
456 * ',' : comma
457 * '{' : lbrace
458 * '}' : rbrace
459 * '[' : lbracket
460 * ']' : rbracket
461 * 'N' : null
462 * 'T' : true
463 * 'F' : false
464 */
465 int
__wt_json_token(WT_SESSION * wt_session,const char * src,int * toktype,const char ** tokstart,size_t * toklen)466 __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
467 const char **tokstart, size_t *toklen)
468 WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
469 {
470 WT_SESSION_IMPL *session;
471 int result;
472 const char *bad;
473 char ch;
474 bool backslash, isalph, isfloat;
475
476 result = -1;
477 session = (WT_SESSION_IMPL *)wt_session;
478 while (__wt_isspace((u_char)*src))
479 src++;
480 *tokstart = src;
481
482 if (*src == '\0') {
483 *toktype = 0;
484 *toklen = 0;
485 return (0);
486 }
487
488 /* JSON is specified in RFC 4627. */
489 switch (*src) {
490 case '"':
491 backslash = false;
492 src++;
493 while ((ch = *src) != '\0') {
494 if (!backslash) {
495 if (ch == '"') {
496 src++;
497 result = 's';
498 break;
499 }
500 if (ch == '\\')
501 backslash = true;
502 } else {
503 /* We validate Unicode on this pass. */
504 if (ch == 'u') {
505 u_char ignored;
506 const u_char *uc;
507
508 uc = (const u_char *)src;
509 if (__wt_hex2byte(&uc[1], &ignored) ||
510 __wt_hex2byte(&uc[3], &ignored))
511 WT_RET_MSG(session, EINVAL,
512 "invalid Unicode within JSON string");
513 src += 4;
514 }
515 backslash = false;
516 }
517 src++;
518 }
519 if (result == 's')
520 break;
521 WT_RET_MSG(session, EINVAL, "unterminated string in JSON");
522 case '-':
523 case '0':
524 case '1':
525 case '2':
526 case '3':
527 case '4':
528 case '5':
529 case '6':
530 case '7':
531 case '8':
532 case '9':
533 isfloat = false;
534 if (*src == '-')
535 src++;
536 while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
537 src++;
538 if (*src == '.') {
539 isfloat = true;
540 src++;
541 while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
542 src++;
543 }
544 if (*src == 'e' || *src == 'E') {
545 isfloat = true;
546 src++;
547 if (*src == '+' || *src == '-')
548 src++;
549 while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
550 src++;
551 }
552 result = isfloat ? 'f' : 'i';
553 break;
554 case ':':
555 case ',':
556 case '{':
557 case '}':
558 case '[':
559 case ']':
560 result = *src++;
561 break;
562 case 'n':
563 MATCH_KEYWORD(session, src, result, "null", 'N');
564 break;
565 case 't':
566 MATCH_KEYWORD(session, src, result, "true", 'T');
567 break;
568 case 'f':
569 MATCH_KEYWORD(session, src, result, "false", 'F');
570 break;
571 default:
572 /* An illegal token, move past it anyway */
573 bad = src;
574 isalph = __wt_isalnum((u_char)*src);
575 src++;
576 if (isalph)
577 while (*src != '\0' && __wt_isalnum((u_char)*src))
578 src++;
579 WT_RET_MSG(session, EINVAL,
580 "unknown token \"%.*s\" in JSON", (int)(src - bad), bad);
581 /* NOTREACHED */
582 }
583 WT_ASSERT(session, result != -1);
584
585 *toklen = (size_t)(src - *tokstart);
586 *toktype = result;
587 return (0);
588 }
589
590 /*
591 * __wt_json_tokname --
592 * Return a descriptive name from the token type returned by
593 * __wt_json_token.
594 */
595 const char *
__wt_json_tokname(int toktype)596 __wt_json_tokname(int toktype)
597 WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
598 {
599 switch (toktype) {
600 case 0: return ("<EOF>");
601 case 's': return ("<string>");
602 case 'i': return ("<integer>");
603 case 'f': return ("<float>");
604 case ':': return ("':'");
605 case ',': return ("','");
606 case '{': return ("'{'");
607 case '}': return ("'}'");
608 case '[': return ("'['");
609 case ']': return ("']'");
610 case 'N': return ("'null'");
611 case 'T': return ("'true'");
612 case 'F': return ("'false'");
613 default: return ("<UNKNOWN>");
614 }
615 }
616
617 /*
618 * json_string_arg --
619 * Returns a first cut of the needed string in item.
620 * The result has not been stripped of escapes.
621 */
622 static int
json_string_arg(WT_SESSION_IMPL * session,const char ** jstr,WT_ITEM * item)623 json_string_arg(WT_SESSION_IMPL *session, const char **jstr, WT_ITEM *item)
624 {
625 int tok;
626 const char *tokstart;
627
628 WT_RET(__wt_json_token(
629 (WT_SESSION *)session, *jstr, &tok, &tokstart, &item->size));
630 if (tok == 's') {
631 *jstr = tokstart + item->size;
632 /* The tokenizer includes the '"' chars */
633 item->data = tokstart + 1;
634 item->size -= 2;
635 } else
636 WT_RET_MSG(session, EINVAL,
637 "expected JSON <string>, got %s", __wt_json_tokname(tok));
638 return (0);
639 }
640
641 /*
642 * json_int_arg --
643 * Returns a signed integral value from the current position
644 * in the JSON string.
645 */
646 static int
json_int_arg(WT_SESSION_IMPL * session,const char ** jstr,int64_t * ip)647 json_int_arg(WT_SESSION_IMPL *session, const char **jstr, int64_t *ip)
648 {
649 size_t toksize;
650 int tok;
651 char *end;
652 const char *tokstart;
653
654 WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
655 &toksize));
656 if (tok == 'i') {
657 /* JSON only allows decimal */
658 *ip = strtoll(tokstart, &end, 10);
659 if (end != tokstart + toksize)
660 WT_RET_MSG(session, EINVAL,
661 "JSON <int> extraneous input");
662 *jstr = tokstart + toksize;
663 } else
664 WT_RET_MSG(session, EINVAL,
665 "expected JSON <int>, got %s", __wt_json_tokname(tok));
666 return (0);
667 }
668
669 /*
670 * json_uint_arg --
671 * Returns an unsigned integral value from the current position
672 * in the JSON string.
673 */
674 static int
json_uint_arg(WT_SESSION_IMPL * session,const char ** jstr,uint64_t * up)675 json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up)
676 {
677 size_t toksize;
678 int tok;
679 char *end;
680 const char *tokstart;
681
682 WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
683 &toksize));
684 if (tok == 'i' && *tokstart != '-') {
685 /* JSON only allows decimal */
686 *up = strtoull(tokstart, &end, 10);
687 if (end != tokstart + toksize)
688 WT_RET_MSG(session, EINVAL,
689 "JSON <int> extraneous input");
690 *jstr = tokstart + toksize;
691 } else
692 WT_RET_MSG(session, EINVAL,
693 "expected unsigned JSON <int>, got %s",
694 __wt_json_tokname(tok));
695 return (0);
696 }
697
698 #define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \
699 int __tok; \
700 WT_RET(__wt_json_token( \
701 (WT_SESSION *)(session), jstr, &__tok, &(start), &(sz))); \
702 if (__tok != (tokval)) \
703 WT_RET_MSG(session, EINVAL, \
704 "expected JSON %s, got %s", \
705 __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \
706 (jstr) = (start) + (sz); \
707 } while (0)
708
709 #define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \
710 const char *__start; \
711 size_t __sz; \
712 JSON_EXPECT_TOKEN_GET(session, jstr, tokval, __start, __sz); \
713 } while (0)
714
715 /*
716 * __json_pack_struct --
717 * Pack a byte string from a JSON string.
718 */
719 static int
__json_pack_struct(WT_SESSION_IMPL * session,void * buffer,size_t size,const char * fmt,const char * jstr)720 __json_pack_struct(WT_SESSION_IMPL *session, void *buffer, size_t size,
721 const char *fmt, const char *jstr)
722 {
723 WT_DECL_PACK_VALUE(pv);
724 WT_DECL_RET;
725 WT_PACK pack;
726 size_t toksize;
727 uint8_t *p, *end;
728 const char *tokstart;
729 bool multi;
730
731 p = buffer;
732 end = p + size;
733 multi = false;
734
735 if (fmt[0] != '\0' && fmt[1] == '\0') {
736 JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
737 /* the key name was verified in __json_pack_size */
738 JSON_EXPECT_TOKEN(session, jstr, ':');
739 pv.type = fmt[0];
740 WT_PACK_JSON_GET(session, pv, jstr);
741 return (__pack_write(session, &pv, &p, size));
742 }
743
744 WT_RET(__pack_init(session, &pack, fmt));
745 while ((ret = __pack_next(&pack, &pv)) == 0) {
746 if (multi)
747 JSON_EXPECT_TOKEN(session, jstr, ',');
748 JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
749 /* the key name was verified in __json_pack_size */
750 JSON_EXPECT_TOKEN(session, jstr, ':');
751 WT_PACK_JSON_GET(session, pv, jstr);
752 WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
753 multi = true;
754 }
755 WT_RET_NOTFOUND_OK(ret);
756
757 /* Be paranoid - __pack_write should never overflow. */
758 WT_ASSERT(session, p <= end);
759
760 return (0);
761 }
762
763 /*
764 * __json_pack_size --
765 * Calculate the size of a packed byte string from a JSON string.
766 * We verify that the names and value types provided in JSON match
767 * the column names and type from the schema format, returning error
768 * if not.
769 */
770 static int
__json_pack_size(WT_SESSION_IMPL * session,const char * fmt,WT_CONFIG_ITEM * names,bool iskey,const char * jstr,size_t * sizep)771 __json_pack_size(
772 WT_SESSION_IMPL *session, const char *fmt, WT_CONFIG_ITEM *names,
773 bool iskey, const char *jstr, size_t *sizep)
774 {
775 WT_CONFIG_ITEM name;
776 WT_DECL_PACK_VALUE(pv);
777 WT_DECL_RET;
778 WT_PACK pack;
779 WT_PACK_NAME packname;
780 size_t toksize, v;
781 const char *tokstart;
782 bool multi;
783
784 __pack_name_init(session, names, iskey, &packname);
785 multi = false;
786 WT_RET(__pack_init(session, &pack, fmt));
787 for (*sizep = 0; (ret = __pack_next(&pack, &pv)) == 0;) {
788 if (multi)
789 JSON_EXPECT_TOKEN(session, jstr, ',');
790 JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
791 WT_RET(__pack_name_next(&packname, &name));
792 if (toksize - 2 != name.len ||
793 strncmp(tokstart + 1, name.str, toksize - 2) != 0)
794 WT_RET_MSG(session, EINVAL,
795 "JSON expected %s name: \"%.*s\"",
796 iskey ? "key" : "value", (int)name.len, name.str);
797 JSON_EXPECT_TOKEN(session, jstr, ':');
798 WT_PACK_JSON_GET(session, pv, jstr);
799 WT_RET(__pack_size(session, &pv, &v));
800 *sizep += v;
801 multi = true;
802 }
803 WT_RET_NOTFOUND_OK(ret);
804
805 /* check end of string */
806 JSON_EXPECT_TOKEN(session, jstr, 0);
807
808 return (0);
809 }
810
811 /*
812 * __wt_json_to_item --
813 * Convert a JSON input string for either key/value to a raw WT_ITEM.
814 * Checks that the input matches the expected format.
815 */
816 int
__wt_json_to_item(WT_SESSION_IMPL * session,const char * jstr,const char * format,WT_CURSOR_JSON * json,bool iskey,WT_ITEM * item)817 __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr,
818 const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item)
819 {
820 size_t sz;
821 sz = 0; /* Initialize because GCC 4.1 is paranoid */
822
823 WT_RET(__json_pack_size(session, format,
824 iskey ? &json->key_names : &json->value_names, iskey, jstr, &sz));
825 WT_RET(__wt_buf_initsize(session, item, sz));
826 WT_RET(__json_pack_struct(session, item->mem, sz, format, jstr));
827 return (0);
828 }
829
830 /*
831 * __wt_json_strlen --
832 * Return the number of bytes represented by a string in JSON format,
833 * or -1 if the format is incorrect.
834 */
835 ssize_t
__wt_json_strlen(const char * src,size_t srclen)836 __wt_json_strlen(const char *src, size_t srclen)
837 WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
838 {
839 size_t dstlen;
840 u_char hi, lo;
841 const char *srcend;
842
843 dstlen = 0;
844 srcend = src + srclen;
845 while (src < srcend) {
846 /* JSON can include any UTF-8 expressed in 4 hex chars. */
847 if (*src == '\\') {
848 if (*++src == 'u') {
849 if (__wt_hex2byte((const u_char *)++src, &hi))
850 return (-1);
851 src += 2;
852 if (__wt_hex2byte((const u_char *)src, &lo))
853 return (-1);
854 src += 2;
855 if (hi != 0)
856 /*
857 * For our dump representation,
858 * every Unicode character on input
859 * represents a single byte.
860 */
861 return (-1);
862 }
863 } else
864 src++;
865 dstlen++;
866 }
867 if (src != srcend)
868 return (-1); /* invalid input, e.g. final char is '\\' */
869 return ((ssize_t)dstlen);
870 }
871
872 /*
873 * __wt_json_strncpy --
874 * Copy bytes of string in JSON format to a destination, up to dstlen
875 * bytes. If dstlen is greater than the needed size, the result if zero padded.
876 */
877 int
__wt_json_strncpy(WT_SESSION * wt_session,char ** pdst,size_t dstlen,const char * src,size_t srclen)878 __wt_json_strncpy(WT_SESSION *wt_session,
879 char **pdst, size_t dstlen, const char *src, size_t srclen)
880 WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
881 {
882 WT_SESSION_IMPL *session;
883 u_char hi, lo;
884 char ch, *dst;
885 const char *dstend, *srcend;
886
887 session = (WT_SESSION_IMPL *)wt_session;
888
889 dst = *pdst;
890 dstend = dst + dstlen;
891 srcend = src + srclen;
892 while (src < srcend && dst < dstend) {
893 /* JSON can include any UTF-8 expressed in 4 hex chars. */
894 if ((ch = *src++) == '\\')
895 switch (ch = *src++) {
896 case 'u':
897 if (__wt_hex2byte((const u_char *)src, &hi) ||
898 __wt_hex2byte((const u_char *)src + 2, &lo))
899 WT_RET_MSG(session, EINVAL,
900 "invalid Unicode within JSON string");
901 src += 4;
902 if (hi != 0)
903 WT_RET_MSG(session, EINVAL,
904 "Unicode \"%6.6s\" byte out of "
905 "range in JSON",
906 src - 6);
907 *dst++ = (char)lo;
908 break;
909 case 'f':
910 *dst++ = '\f';
911 break;
912 case 'n':
913 *dst++ = '\n';
914 break;
915 case 'r':
916 *dst++ = '\r';
917 break;
918 case 't':
919 *dst++ = '\t';
920 break;
921 case '"':
922 case '\\':
923 *dst++ = ch;
924 break;
925 WT_ILLEGAL_VALUE(session, ch);
926 }
927 else
928 *dst++ = ch;
929 }
930 if (src != srcend)
931 WT_RET_MSG(session,
932 ENOMEM, "JSON string copy destination buffer too small");
933 *pdst = dst;
934 while (dst < dstend)
935 *dst++ = '\0';
936 return (0);
937 }
938