1 /*-------------------------------------------------------------------------
2 *
3 * jsonb_util.c
4 * converting between Jsonb and JsonbValues, and iterating.
5 *
6 * Copyright (c) 2014-2020, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/jsonb_util.c
11 *
12 *-------------------------------------------------------------------------
13 */
14 #include "postgres.h"
15
16 #include "catalog/pg_collation.h"
17 #include "catalog/pg_type.h"
18 #include "common/hashfn.h"
19 #include "common/jsonapi.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/datetime.h"
23 #include "utils/json.h"
24 #include "utils/jsonb.h"
25 #include "utils/memutils.h"
26 #include "utils/varlena.h"
27
28 /*
29 * Maximum number of elements in an array (or key/value pairs in an object).
30 * This is limited by two things: the size of the JEntry array must fit
31 * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
32 * reserved for that in the JsonbContainer.header field.
33 *
34 * (The total size of an array's or object's elements is also limited by
35 * JENTRY_OFFLENMASK, but we're not concerned about that here.)
36 */
37 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
38 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
39
40 static void fillJsonbValue(JsonbContainer *container, int index,
41 char *base_addr, uint32 offset,
42 JsonbValue *result);
43 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
44 static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
45 static Jsonb *convertToJsonb(JsonbValue *val);
46 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
47 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
48 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
49 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
50
51 static int reserveFromBuffer(StringInfo buffer, int len);
52 static void appendToBuffer(StringInfo buffer, const char *data, int len);
53 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
54 static short padBufferToInt(StringInfo buffer);
55
56 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
57 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
58 static JsonbParseState *pushState(JsonbParseState **pstate);
59 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
60 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
61 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
62 static int lengthCompareJsonbStringValue(const void *a, const void *b);
63 static int lengthCompareJsonbString(const char *val1, int len1,
64 const char *val2, int len2);
65 static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
66 static void uniqueifyJsonbObject(JsonbValue *object);
67 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
68 JsonbIteratorToken seq,
69 JsonbValue *scalarVal);
70
71 /*
72 * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
73 *
74 * There isn't a JsonbToJsonbValue(), because generally we find it more
75 * convenient to directly iterate through the Jsonb representation and only
76 * really convert nested scalar values. JsonbIteratorNext() does this, so that
77 * clients of the iteration code don't have to directly deal with the binary
78 * representation (JsonbDeepContains() is a notable exception, although all
79 * exceptions are internal to this module). In general, functions that accept
80 * a JsonbValue argument are concerned with the manipulation of scalar values,
81 * or simple containers of scalar values, where it would be inconvenient to
82 * deal with a great amount of other state.
83 */
84 Jsonb *
JsonbValueToJsonb(JsonbValue * val)85 JsonbValueToJsonb(JsonbValue *val)
86 {
87 Jsonb *out;
88
89 if (IsAJsonbScalar(val))
90 {
91 /* Scalar value */
92 JsonbParseState *pstate = NULL;
93 JsonbValue *res;
94 JsonbValue scalarArray;
95
96 scalarArray.type = jbvArray;
97 scalarArray.val.array.rawScalar = true;
98 scalarArray.val.array.nElems = 1;
99
100 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
101 pushJsonbValue(&pstate, WJB_ELEM, val);
102 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
103
104 out = convertToJsonb(res);
105 }
106 else if (val->type == jbvObject || val->type == jbvArray)
107 {
108 out = convertToJsonb(val);
109 }
110 else
111 {
112 Assert(val->type == jbvBinary);
113 out = palloc(VARHDRSZ + val->val.binary.len);
114 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
115 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
116 }
117
118 return out;
119 }
120
121 /*
122 * Get the offset of the variable-length portion of a Jsonb node within
123 * the variable-length-data part of its container. The node is identified
124 * by index within the container's JEntry array.
125 */
126 uint32
getJsonbOffset(const JsonbContainer * jc,int index)127 getJsonbOffset(const JsonbContainer *jc, int index)
128 {
129 uint32 offset = 0;
130 int i;
131
132 /*
133 * Start offset of this entry is equal to the end offset of the previous
134 * entry. Walk backwards to the most recent entry stored as an end
135 * offset, returning that offset plus any lengths in between.
136 */
137 for (i = index - 1; i >= 0; i--)
138 {
139 offset += JBE_OFFLENFLD(jc->children[i]);
140 if (JBE_HAS_OFF(jc->children[i]))
141 break;
142 }
143
144 return offset;
145 }
146
147 /*
148 * Get the length of the variable-length portion of a Jsonb node.
149 * The node is identified by index within the container's JEntry array.
150 */
151 uint32
getJsonbLength(const JsonbContainer * jc,int index)152 getJsonbLength(const JsonbContainer *jc, int index)
153 {
154 uint32 off;
155 uint32 len;
156
157 /*
158 * If the length is stored directly in the JEntry, just return it.
159 * Otherwise, get the begin offset of the entry, and subtract that from
160 * the stored end+1 offset.
161 */
162 if (JBE_HAS_OFF(jc->children[index]))
163 {
164 off = getJsonbOffset(jc, index);
165 len = JBE_OFFLENFLD(jc->children[index]) - off;
166 }
167 else
168 len = JBE_OFFLENFLD(jc->children[index]);
169
170 return len;
171 }
172
173 /*
174 * BT comparator worker function. Returns an integer less than, equal to, or
175 * greater than zero, indicating whether a is less than, equal to, or greater
176 * than b. Consistent with the requirements for a B-Tree operator class
177 *
178 * Strings are compared lexically, in contrast with other places where we use a
179 * much simpler comparator logic for searching through Strings. Since this is
180 * called from B-Tree support function 1, we're careful about not leaking
181 * memory here.
182 */
183 int
compareJsonbContainers(JsonbContainer * a,JsonbContainer * b)184 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
185 {
186 JsonbIterator *ita,
187 *itb;
188 int res = 0;
189
190 ita = JsonbIteratorInit(a);
191 itb = JsonbIteratorInit(b);
192
193 do
194 {
195 JsonbValue va,
196 vb;
197 JsonbIteratorToken ra,
198 rb;
199
200 ra = JsonbIteratorNext(&ita, &va, false);
201 rb = JsonbIteratorNext(&itb, &vb, false);
202
203 if (ra == rb)
204 {
205 if (ra == WJB_DONE)
206 {
207 /* Decisively equal */
208 break;
209 }
210
211 if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
212 {
213 /*
214 * There is no array or object to compare at this stage of
215 * processing. jbvArray/jbvObject values are compared
216 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
217 * tokens.
218 */
219 continue;
220 }
221
222 if (va.type == vb.type)
223 {
224 switch (va.type)
225 {
226 case jbvString:
227 case jbvNull:
228 case jbvNumeric:
229 case jbvBool:
230 res = compareJsonbScalarValue(&va, &vb);
231 break;
232 case jbvArray:
233
234 /*
235 * This could be a "raw scalar" pseudo array. That's
236 * a special case here though, since we still want the
237 * general type-based comparisons to apply, and as far
238 * as we're concerned a pseudo array is just a scalar.
239 */
240 if (va.val.array.rawScalar != vb.val.array.rawScalar)
241 res = (va.val.array.rawScalar) ? -1 : 1;
242 if (va.val.array.nElems != vb.val.array.nElems)
243 res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
244 break;
245 case jbvObject:
246 if (va.val.object.nPairs != vb.val.object.nPairs)
247 res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
248 break;
249 case jbvBinary:
250 elog(ERROR, "unexpected jbvBinary value");
251 break;
252 case jbvDatetime:
253 elog(ERROR, "unexpected jbvDatetime value");
254 break;
255 }
256 }
257 else
258 {
259 /* Type-defined order */
260 res = (va.type > vb.type) ? 1 : -1;
261 }
262 }
263 else
264 {
265 /*
266 * It's safe to assume that the types differed, and that the va
267 * and vb values passed were set.
268 *
269 * If the two values were of the same container type, then there'd
270 * have been a chance to observe the variation in the number of
271 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
272 * either two heterogeneously-typed containers, or a container and
273 * some scalar type.
274 *
275 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
276 * cases here, because we would have seen the corresponding
277 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
278 * concluded that they don't match.
279 */
280 Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
281 Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
282
283 Assert(va.type != vb.type);
284 Assert(va.type != jbvBinary);
285 Assert(vb.type != jbvBinary);
286 /* Type-defined order */
287 res = (va.type > vb.type) ? 1 : -1;
288 }
289 }
290 while (res == 0);
291
292 while (ita != NULL)
293 {
294 JsonbIterator *i = ita->parent;
295
296 pfree(ita);
297 ita = i;
298 }
299 while (itb != NULL)
300 {
301 JsonbIterator *i = itb->parent;
302
303 pfree(itb);
304 itb = i;
305 }
306
307 return res;
308 }
309
310 /*
311 * Find value in object (i.e. the "value" part of some key/value pair in an
312 * object), or find a matching element if we're looking through an array. Do
313 * so on the basis of equality of the object keys only, or alternatively
314 * element values only, with a caller-supplied value "key". The "flags"
315 * argument allows the caller to specify which container types are of interest.
316 *
317 * This exported utility function exists to facilitate various cases concerned
318 * with "containment". If asked to look through an object, the caller had
319 * better pass a Jsonb String, because their keys can only be strings.
320 * Otherwise, for an array, any type of JsonbValue will do.
321 *
322 * In order to proceed with the search, it is necessary for callers to have
323 * both specified an interest in exactly one particular container type with an
324 * appropriate flag, as well as having the pointed-to Jsonb container be of
325 * one of those same container types at the top level. (Actually, we just do
326 * whichever makes sense to save callers the trouble of figuring it out - at
327 * most one can make sense, because the container either points to an array
328 * (possibly a "raw scalar" pseudo array) or an object.)
329 *
330 * Note that we can return a jbvBinary JsonbValue if this is called on an
331 * object, but we never do so on an array. If the caller asks to look through
332 * a container type that is not of the type pointed to by the container,
333 * immediately fall through and return NULL. If we cannot find the value,
334 * return NULL. Otherwise, return palloc()'d copy of value.
335 */
336 JsonbValue *
findJsonbValueFromContainer(JsonbContainer * container,uint32 flags,JsonbValue * key)337 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
338 JsonbValue *key)
339 {
340 JEntry *children = container->children;
341 int count = JsonContainerSize(container);
342
343 Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
344
345 /* Quick out without a palloc cycle if object/array is empty */
346 if (count <= 0)
347 return NULL;
348
349 if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
350 {
351 JsonbValue *result = palloc(sizeof(JsonbValue));
352 char *base_addr = (char *) (children + count);
353 uint32 offset = 0;
354 int i;
355
356 for (i = 0; i < count; i++)
357 {
358 fillJsonbValue(container, i, base_addr, offset, result);
359
360 if (key->type == result->type)
361 {
362 if (equalsJsonbScalarValue(key, result))
363 return result;
364 }
365
366 JBE_ADVANCE_OFFSET(offset, children[i]);
367 }
368
369 pfree(result);
370 }
371 else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
372 {
373 /* Object key passed by caller must be a string */
374 Assert(key->type == jbvString);
375
376 return getKeyJsonValueFromContainer(container, key->val.string.val,
377 key->val.string.len, NULL);
378 }
379
380 /* Not found */
381 return NULL;
382 }
383
384 /*
385 * Find value by key in Jsonb object and fetch it into 'res', which is also
386 * returned.
387 *
388 * 'res' can be passed in as NULL, in which case it's newly palloc'ed here.
389 */
390 JsonbValue *
getKeyJsonValueFromContainer(JsonbContainer * container,const char * keyVal,int keyLen,JsonbValue * res)391 getKeyJsonValueFromContainer(JsonbContainer *container,
392 const char *keyVal, int keyLen, JsonbValue *res)
393 {
394 JEntry *children = container->children;
395 int count = JsonContainerSize(container);
396 char *baseAddr;
397 uint32 stopLow,
398 stopHigh;
399
400 Assert(JsonContainerIsObject(container));
401
402 /* Quick out without a palloc cycle if object is empty */
403 if (count <= 0)
404 return NULL;
405
406 /*
407 * Binary search the container. Since we know this is an object, account
408 * for *Pairs* of Jentrys
409 */
410 baseAddr = (char *) (children + count * 2);
411 stopLow = 0;
412 stopHigh = count;
413 while (stopLow < stopHigh)
414 {
415 uint32 stopMiddle;
416 int difference;
417 const char *candidateVal;
418 int candidateLen;
419
420 stopMiddle = stopLow + (stopHigh - stopLow) / 2;
421
422 candidateVal = baseAddr + getJsonbOffset(container, stopMiddle);
423 candidateLen = getJsonbLength(container, stopMiddle);
424
425 difference = lengthCompareJsonbString(candidateVal, candidateLen,
426 keyVal, keyLen);
427
428 if (difference == 0)
429 {
430 /* Found our key, return corresponding value */
431 int index = stopMiddle + count;
432
433 if (!res)
434 res = palloc(sizeof(JsonbValue));
435
436 fillJsonbValue(container, index, baseAddr,
437 getJsonbOffset(container, index),
438 res);
439
440 return res;
441 }
442 else
443 {
444 if (difference < 0)
445 stopLow = stopMiddle + 1;
446 else
447 stopHigh = stopMiddle;
448 }
449 }
450
451 /* Not found */
452 return NULL;
453 }
454
455 /*
456 * Get i-th value of a Jsonb array.
457 *
458 * Returns palloc()'d copy of the value, or NULL if it does not exist.
459 */
460 JsonbValue *
getIthJsonbValueFromContainer(JsonbContainer * container,uint32 i)461 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
462 {
463 JsonbValue *result;
464 char *base_addr;
465 uint32 nelements;
466
467 if (!JsonContainerIsArray(container))
468 elog(ERROR, "not a jsonb array");
469
470 nelements = JsonContainerSize(container);
471 base_addr = (char *) &container->children[nelements];
472
473 if (i >= nelements)
474 return NULL;
475
476 result = palloc(sizeof(JsonbValue));
477
478 fillJsonbValue(container, i, base_addr,
479 getJsonbOffset(container, i),
480 result);
481
482 return result;
483 }
484
485 /*
486 * A helper function to fill in a JsonbValue to represent an element of an
487 * array, or a key or value of an object.
488 *
489 * The node's JEntry is at container->children[index], and its variable-length
490 * data is at base_addr + offset. We make the caller determine the offset
491 * since in many cases the caller can amortize that work across multiple
492 * children. When it can't, it can just call getJsonbOffset().
493 *
494 * A nested array or object will be returned as jbvBinary, ie. it won't be
495 * expanded.
496 */
497 static void
fillJsonbValue(JsonbContainer * container,int index,char * base_addr,uint32 offset,JsonbValue * result)498 fillJsonbValue(JsonbContainer *container, int index,
499 char *base_addr, uint32 offset,
500 JsonbValue *result)
501 {
502 JEntry entry = container->children[index];
503
504 if (JBE_ISNULL(entry))
505 {
506 result->type = jbvNull;
507 }
508 else if (JBE_ISSTRING(entry))
509 {
510 result->type = jbvString;
511 result->val.string.val = base_addr + offset;
512 result->val.string.len = getJsonbLength(container, index);
513 Assert(result->val.string.len >= 0);
514 }
515 else if (JBE_ISNUMERIC(entry))
516 {
517 result->type = jbvNumeric;
518 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
519 }
520 else if (JBE_ISBOOL_TRUE(entry))
521 {
522 result->type = jbvBool;
523 result->val.boolean = true;
524 }
525 else if (JBE_ISBOOL_FALSE(entry))
526 {
527 result->type = jbvBool;
528 result->val.boolean = false;
529 }
530 else
531 {
532 Assert(JBE_ISCONTAINER(entry));
533 result->type = jbvBinary;
534 /* Remove alignment padding from data pointer and length */
535 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
536 result->val.binary.len = getJsonbLength(container, index) -
537 (INTALIGN(offset) - offset);
538 }
539 }
540
541 /*
542 * Push JsonbValue into JsonbParseState.
543 *
544 * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
545 * JsonbValue to a Jsonb.
546 *
547 * Initial state of *JsonbParseState is NULL, since it'll be allocated here
548 * originally (caller will get JsonbParseState back by reference).
549 *
550 * Only sequential tokens pertaining to non-container types should pass a
551 * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a
552 * "raw scalar" pseudo array to append it - the actual scalar should be passed
553 * next and it will be added as the only member of the array.
554 *
555 * Values of type jbvBinary, which are rolled up arrays and objects,
556 * are unpacked before being added to the result.
557 */
558 JsonbValue *
pushJsonbValue(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * jbval)559 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
560 JsonbValue *jbval)
561 {
562 JsonbIterator *it;
563 JsonbValue *res = NULL;
564 JsonbValue v;
565 JsonbIteratorToken tok;
566
567 if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
568 jbval->type != jbvBinary)
569 {
570 /* drop through */
571 return pushJsonbValueScalar(pstate, seq, jbval);
572 }
573
574 /* unpack the binary and add each piece to the pstate */
575 it = JsonbIteratorInit(jbval->val.binary.data);
576 while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
577 res = pushJsonbValueScalar(pstate, tok,
578 tok < WJB_BEGIN_ARRAY ? &v : NULL);
579
580 return res;
581 }
582
583 /*
584 * Do the actual pushing, with only scalar or pseudo-scalar-array values
585 * accepted.
586 */
587 static JsonbValue *
pushJsonbValueScalar(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * scalarVal)588 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
589 JsonbValue *scalarVal)
590 {
591 JsonbValue *result = NULL;
592
593 switch (seq)
594 {
595 case WJB_BEGIN_ARRAY:
596 Assert(!scalarVal || scalarVal->val.array.rawScalar);
597 *pstate = pushState(pstate);
598 result = &(*pstate)->contVal;
599 (*pstate)->contVal.type = jbvArray;
600 (*pstate)->contVal.val.array.nElems = 0;
601 (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
602 scalarVal->val.array.rawScalar);
603 if (scalarVal && scalarVal->val.array.nElems > 0)
604 {
605 /* Assume that this array is still really a scalar */
606 Assert(scalarVal->type == jbvArray);
607 (*pstate)->size = scalarVal->val.array.nElems;
608 }
609 else
610 {
611 (*pstate)->size = 4;
612 }
613 (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
614 (*pstate)->size);
615 break;
616 case WJB_BEGIN_OBJECT:
617 Assert(!scalarVal);
618 *pstate = pushState(pstate);
619 result = &(*pstate)->contVal;
620 (*pstate)->contVal.type = jbvObject;
621 (*pstate)->contVal.val.object.nPairs = 0;
622 (*pstate)->size = 4;
623 (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
624 (*pstate)->size);
625 break;
626 case WJB_KEY:
627 Assert(scalarVal->type == jbvString);
628 appendKey(*pstate, scalarVal);
629 break;
630 case WJB_VALUE:
631 Assert(IsAJsonbScalar(scalarVal));
632 appendValue(*pstate, scalarVal);
633 break;
634 case WJB_ELEM:
635 Assert(IsAJsonbScalar(scalarVal));
636 appendElement(*pstate, scalarVal);
637 break;
638 case WJB_END_OBJECT:
639 uniqueifyJsonbObject(&(*pstate)->contVal);
640 /* fall through! */
641 case WJB_END_ARRAY:
642 /* Steps here common to WJB_END_OBJECT case */
643 Assert(!scalarVal);
644 result = &(*pstate)->contVal;
645
646 /*
647 * Pop stack and push current array/object as value in parent
648 * array/object
649 */
650 *pstate = (*pstate)->next;
651 if (*pstate)
652 {
653 switch ((*pstate)->contVal.type)
654 {
655 case jbvArray:
656 appendElement(*pstate, result);
657 break;
658 case jbvObject:
659 appendValue(*pstate, result);
660 break;
661 default:
662 elog(ERROR, "invalid jsonb container type");
663 }
664 }
665 break;
666 default:
667 elog(ERROR, "unrecognized jsonb sequential processing token");
668 }
669
670 return result;
671 }
672
673 /*
674 * pushJsonbValue() worker: Iteration-like forming of Jsonb
675 */
676 static JsonbParseState *
pushState(JsonbParseState ** pstate)677 pushState(JsonbParseState **pstate)
678 {
679 JsonbParseState *ns = palloc(sizeof(JsonbParseState));
680
681 ns->next = *pstate;
682 return ns;
683 }
684
685 /*
686 * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb
687 */
688 static void
appendKey(JsonbParseState * pstate,JsonbValue * string)689 appendKey(JsonbParseState *pstate, JsonbValue *string)
690 {
691 JsonbValue *object = &pstate->contVal;
692
693 Assert(object->type == jbvObject);
694 Assert(string->type == jbvString);
695
696 if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
697 ereport(ERROR,
698 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
699 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
700 JSONB_MAX_PAIRS)));
701
702 if (object->val.object.nPairs >= pstate->size)
703 {
704 pstate->size *= 2;
705 object->val.object.pairs = repalloc(object->val.object.pairs,
706 sizeof(JsonbPair) * pstate->size);
707 }
708
709 object->val.object.pairs[object->val.object.nPairs].key = *string;
710 object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
711 }
712
713 /*
714 * pushJsonbValue() worker: Append a pair value to state when generating a
715 * Jsonb
716 */
717 static void
appendValue(JsonbParseState * pstate,JsonbValue * scalarVal)718 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
719 {
720 JsonbValue *object = &pstate->contVal;
721
722 Assert(object->type == jbvObject);
723
724 object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
725 }
726
727 /*
728 * pushJsonbValue() worker: Append an element to state when generating a Jsonb
729 */
730 static void
appendElement(JsonbParseState * pstate,JsonbValue * scalarVal)731 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
732 {
733 JsonbValue *array = &pstate->contVal;
734
735 Assert(array->type == jbvArray);
736
737 if (array->val.array.nElems >= JSONB_MAX_ELEMS)
738 ereport(ERROR,
739 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
740 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
741 JSONB_MAX_ELEMS)));
742
743 if (array->val.array.nElems >= pstate->size)
744 {
745 pstate->size *= 2;
746 array->val.array.elems = repalloc(array->val.array.elems,
747 sizeof(JsonbValue) * pstate->size);
748 }
749
750 array->val.array.elems[array->val.array.nElems++] = *scalarVal;
751 }
752
753 /*
754 * Given a JsonbContainer, expand to JsonbIterator to iterate over items
755 * fully expanded to in-memory representation for manipulation.
756 *
757 * See JsonbIteratorNext() for notes on memory management.
758 */
759 JsonbIterator *
JsonbIteratorInit(JsonbContainer * container)760 JsonbIteratorInit(JsonbContainer *container)
761 {
762 return iteratorFromContainer(container, NULL);
763 }
764
765 /*
766 * Get next JsonbValue while iterating
767 *
768 * Caller should initially pass their own, original iterator. They may get
769 * back a child iterator palloc()'d here instead. The function can be relied
770 * on to free those child iterators, lest the memory allocated for highly
771 * nested objects become unreasonable, but only if callers don't end iteration
772 * early (by breaking upon having found something in a search, for example).
773 *
774 * Callers in such a scenario, that are particularly sensitive to leaking
775 * memory in a long-lived context may walk the ancestral tree from the final
776 * iterator we left them with to its oldest ancestor, pfree()ing as they go.
777 * They do not have to free any other memory previously allocated for iterators
778 * but not accessible as direct ancestors of the iterator they're last passed
779 * back.
780 *
781 * Returns "Jsonb sequential processing" token value. Iterator "state"
782 * reflects the current stage of the process in a less granular fashion, and is
783 * mostly used here to track things internally with respect to particular
784 * iterators.
785 *
786 * Clients of this function should not have to handle any jbvBinary values
787 * (since recursive calls will deal with this), provided skipNested is false.
788 * It is our job to expand the jbvBinary representation without bothering them
789 * with it. However, clients should not take it upon themselves to touch array
790 * or Object element/pair buffers, since their element/pair pointers are
791 * garbage. Also, *val will not be set when returning WJB_END_ARRAY or
792 * WJB_END_OBJECT, on the assumption that it's only useful to access values
793 * when recursing in.
794 */
795 JsonbIteratorToken
JsonbIteratorNext(JsonbIterator ** it,JsonbValue * val,bool skipNested)796 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
797 {
798 if (*it == NULL)
799 return WJB_DONE;
800
801 /*
802 * When stepping into a nested container, we jump back here to start
803 * processing the child. We will not recurse further in one call, because
804 * processing the child will always begin in JBI_ARRAY_START or
805 * JBI_OBJECT_START state.
806 */
807 recurse:
808 switch ((*it)->state)
809 {
810 case JBI_ARRAY_START:
811 /* Set v to array on first array call */
812 val->type = jbvArray;
813 val->val.array.nElems = (*it)->nElems;
814
815 /*
816 * v->val.array.elems is not actually set, because we aren't doing
817 * a full conversion
818 */
819 val->val.array.rawScalar = (*it)->isScalar;
820 (*it)->curIndex = 0;
821 (*it)->curDataOffset = 0;
822 (*it)->curValueOffset = 0; /* not actually used */
823 /* Set state for next call */
824 (*it)->state = JBI_ARRAY_ELEM;
825 return WJB_BEGIN_ARRAY;
826
827 case JBI_ARRAY_ELEM:
828 if ((*it)->curIndex >= (*it)->nElems)
829 {
830 /*
831 * All elements within array already processed. Report this
832 * to caller, and give it back original parent iterator (which
833 * independently tracks iteration progress at its level of
834 * nesting).
835 */
836 *it = freeAndGetParent(*it);
837 return WJB_END_ARRAY;
838 }
839
840 fillJsonbValue((*it)->container, (*it)->curIndex,
841 (*it)->dataProper, (*it)->curDataOffset,
842 val);
843
844 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
845 (*it)->children[(*it)->curIndex]);
846 (*it)->curIndex++;
847
848 if (!IsAJsonbScalar(val) && !skipNested)
849 {
850 /* Recurse into container. */
851 *it = iteratorFromContainer(val->val.binary.data, *it);
852 goto recurse;
853 }
854 else
855 {
856 /*
857 * Scalar item in array, or a container and caller didn't want
858 * us to recurse into it.
859 */
860 return WJB_ELEM;
861 }
862
863 case JBI_OBJECT_START:
864 /* Set v to object on first object call */
865 val->type = jbvObject;
866 val->val.object.nPairs = (*it)->nElems;
867
868 /*
869 * v->val.object.pairs is not actually set, because we aren't
870 * doing a full conversion
871 */
872 (*it)->curIndex = 0;
873 (*it)->curDataOffset = 0;
874 (*it)->curValueOffset = getJsonbOffset((*it)->container,
875 (*it)->nElems);
876 /* Set state for next call */
877 (*it)->state = JBI_OBJECT_KEY;
878 return WJB_BEGIN_OBJECT;
879
880 case JBI_OBJECT_KEY:
881 if ((*it)->curIndex >= (*it)->nElems)
882 {
883 /*
884 * All pairs within object already processed. Report this to
885 * caller, and give it back original containing iterator
886 * (which independently tracks iteration progress at its level
887 * of nesting).
888 */
889 *it = freeAndGetParent(*it);
890 return WJB_END_OBJECT;
891 }
892 else
893 {
894 /* Return key of a key/value pair. */
895 fillJsonbValue((*it)->container, (*it)->curIndex,
896 (*it)->dataProper, (*it)->curDataOffset,
897 val);
898 if (val->type != jbvString)
899 elog(ERROR, "unexpected jsonb type as object key");
900
901 /* Set state for next call */
902 (*it)->state = JBI_OBJECT_VALUE;
903 return WJB_KEY;
904 }
905
906 case JBI_OBJECT_VALUE:
907 /* Set state for next call */
908 (*it)->state = JBI_OBJECT_KEY;
909
910 fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
911 (*it)->dataProper, (*it)->curValueOffset,
912 val);
913
914 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
915 (*it)->children[(*it)->curIndex]);
916 JBE_ADVANCE_OFFSET((*it)->curValueOffset,
917 (*it)->children[(*it)->curIndex + (*it)->nElems]);
918 (*it)->curIndex++;
919
920 /*
921 * Value may be a container, in which case we recurse with new,
922 * child iterator (unless the caller asked not to, by passing
923 * skipNested).
924 */
925 if (!IsAJsonbScalar(val) && !skipNested)
926 {
927 *it = iteratorFromContainer(val->val.binary.data, *it);
928 goto recurse;
929 }
930 else
931 return WJB_VALUE;
932 }
933
934 elog(ERROR, "invalid iterator state");
935 return -1;
936 }
937
938 /*
939 * Initialize an iterator for iterating all elements in a container.
940 */
941 static JsonbIterator *
iteratorFromContainer(JsonbContainer * container,JsonbIterator * parent)942 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
943 {
944 JsonbIterator *it;
945
946 it = palloc0(sizeof(JsonbIterator));
947 it->container = container;
948 it->parent = parent;
949 it->nElems = JsonContainerSize(container);
950
951 /* Array starts just after header */
952 it->children = container->children;
953
954 switch (container->header & (JB_FARRAY | JB_FOBJECT))
955 {
956 case JB_FARRAY:
957 it->dataProper =
958 (char *) it->children + it->nElems * sizeof(JEntry);
959 it->isScalar = JsonContainerIsScalar(container);
960 /* This is either a "raw scalar", or an array */
961 Assert(!it->isScalar || it->nElems == 1);
962
963 it->state = JBI_ARRAY_START;
964 break;
965
966 case JB_FOBJECT:
967 it->dataProper =
968 (char *) it->children + it->nElems * sizeof(JEntry) * 2;
969 it->state = JBI_OBJECT_START;
970 break;
971
972 default:
973 elog(ERROR, "unknown type of jsonb container");
974 }
975
976 return it;
977 }
978
979 /*
980 * JsonbIteratorNext() worker: Return parent, while freeing memory for current
981 * iterator
982 */
983 static JsonbIterator *
freeAndGetParent(JsonbIterator * it)984 freeAndGetParent(JsonbIterator *it)
985 {
986 JsonbIterator *v = it->parent;
987
988 pfree(it);
989 return v;
990 }
991
992 /*
993 * Worker for "contains" operator's function
994 *
995 * Formally speaking, containment is top-down, unordered subtree isomorphism.
996 *
997 * Takes iterators that belong to some container type. These iterators
998 * "belong" to those values in the sense that they've just been initialized in
999 * respect of them by the caller (perhaps in a nested fashion).
1000 *
1001 * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
1002 * We determine if mContained is contained within val.
1003 */
1004 bool
JsonbDeepContains(JsonbIterator ** val,JsonbIterator ** mContained)1005 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
1006 {
1007 JsonbValue vval,
1008 vcontained;
1009 JsonbIteratorToken rval,
1010 rcont;
1011
1012 /*
1013 * Guard against stack overflow due to overly complex Jsonb.
1014 *
1015 * Functions called here independently take this precaution, but that
1016 * might not be sufficient since this is also a recursive function.
1017 */
1018 check_stack_depth();
1019
1020 rval = JsonbIteratorNext(val, &vval, false);
1021 rcont = JsonbIteratorNext(mContained, &vcontained, false);
1022
1023 if (rval != rcont)
1024 {
1025 /*
1026 * The differing return values can immediately be taken as indicating
1027 * two differing container types at this nesting level, which is
1028 * sufficient reason to give up entirely (but it should be the case
1029 * that they're both some container type).
1030 */
1031 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
1032 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
1033 return false;
1034 }
1035 else if (rcont == WJB_BEGIN_OBJECT)
1036 {
1037 Assert(vval.type == jbvObject);
1038 Assert(vcontained.type == jbvObject);
1039
1040 /*
1041 * If the lhs has fewer pairs than the rhs, it can't possibly contain
1042 * the rhs. (This conclusion is safe only because we de-duplicate
1043 * keys in all Jsonb objects; thus there can be no corresponding
1044 * optimization in the array case.) The case probably won't arise
1045 * often, but since it's such a cheap check we may as well make it.
1046 */
1047 if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1048 return false;
1049
1050 /* Work through rhs "is it contained within?" object */
1051 for (;;)
1052 {
1053 JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1054 JsonbValue lhsValBuf;
1055
1056 rcont = JsonbIteratorNext(mContained, &vcontained, false);
1057
1058 /*
1059 * When we get through caller's rhs "is it contained within?"
1060 * object without failing to find one of its values, it's
1061 * contained.
1062 */
1063 if (rcont == WJB_END_OBJECT)
1064 return true;
1065
1066 Assert(rcont == WJB_KEY);
1067 Assert(vcontained.type == jbvString);
1068
1069 /* First, find value by key... */
1070 lhsVal =
1071 getKeyJsonValueFromContainer((*val)->container,
1072 vcontained.val.string.val,
1073 vcontained.val.string.len,
1074 &lhsValBuf);
1075 if (!lhsVal)
1076 return false;
1077
1078 /*
1079 * ...at this stage it is apparent that there is at least a key
1080 * match for this rhs pair.
1081 */
1082 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1083
1084 Assert(rcont == WJB_VALUE);
1085
1086 /*
1087 * Compare rhs pair's value with lhs pair's value just found using
1088 * key
1089 */
1090 if (lhsVal->type != vcontained.type)
1091 {
1092 return false;
1093 }
1094 else if (IsAJsonbScalar(lhsVal))
1095 {
1096 if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1097 return false;
1098 }
1099 else
1100 {
1101 /* Nested container value (object or array) */
1102 JsonbIterator *nestval,
1103 *nestContained;
1104
1105 Assert(lhsVal->type == jbvBinary);
1106 Assert(vcontained.type == jbvBinary);
1107
1108 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1109 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1110
1111 /*
1112 * Match "value" side of rhs datum object's pair recursively.
1113 * It's a nested structure.
1114 *
1115 * Note that nesting still has to "match up" at the right
1116 * nesting sub-levels. However, there need only be zero or
1117 * more matching pairs (or elements) at each nesting level
1118 * (provided the *rhs* pairs/elements *all* match on each
1119 * level), which enables searching nested structures for a
1120 * single String or other primitive type sub-datum quite
1121 * effectively (provided the user constructed the rhs nested
1122 * structure such that we "know where to look").
1123 *
1124 * In other words, the mapping of container nodes in the rhs
1125 * "vcontained" Jsonb to internal nodes on the lhs is
1126 * injective, and parent-child edges on the rhs must be mapped
1127 * to parent-child edges on the lhs to satisfy the condition
1128 * of containment (plus of course the mapped nodes must be
1129 * equal).
1130 */
1131 if (!JsonbDeepContains(&nestval, &nestContained))
1132 return false;
1133 }
1134 }
1135 }
1136 else if (rcont == WJB_BEGIN_ARRAY)
1137 {
1138 JsonbValue *lhsConts = NULL;
1139 uint32 nLhsElems = vval.val.array.nElems;
1140
1141 Assert(vval.type == jbvArray);
1142 Assert(vcontained.type == jbvArray);
1143
1144 /*
1145 * Handle distinction between "raw scalar" pseudo arrays, and real
1146 * arrays.
1147 *
1148 * A raw scalar may contain another raw scalar, and an array may
1149 * contain a raw scalar, but a raw scalar may not contain an array. We
1150 * don't do something like this for the object case, since objects can
1151 * only contain pairs, never raw scalars (a pair is represented by an
1152 * rhs object argument with a single contained pair).
1153 */
1154 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1155 return false;
1156
1157 /* Work through rhs "is it contained within?" array */
1158 for (;;)
1159 {
1160 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1161
1162 /*
1163 * When we get through caller's rhs "is it contained within?"
1164 * array without failing to find one of its values, it's
1165 * contained.
1166 */
1167 if (rcont == WJB_END_ARRAY)
1168 return true;
1169
1170 Assert(rcont == WJB_ELEM);
1171
1172 if (IsAJsonbScalar(&vcontained))
1173 {
1174 if (!findJsonbValueFromContainer((*val)->container,
1175 JB_FARRAY,
1176 &vcontained))
1177 return false;
1178 }
1179 else
1180 {
1181 uint32 i;
1182
1183 /*
1184 * If this is first container found in rhs array (at this
1185 * depth), initialize temp lhs array of containers
1186 */
1187 if (lhsConts == NULL)
1188 {
1189 uint32 j = 0;
1190
1191 /* Make room for all possible values */
1192 lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1193
1194 for (i = 0; i < nLhsElems; i++)
1195 {
1196 /* Store all lhs elements in temp array */
1197 rcont = JsonbIteratorNext(val, &vval, true);
1198 Assert(rcont == WJB_ELEM);
1199
1200 if (vval.type == jbvBinary)
1201 lhsConts[j++] = vval;
1202 }
1203
1204 /* No container elements in temp array, so give up now */
1205 if (j == 0)
1206 return false;
1207
1208 /* We may have only partially filled array */
1209 nLhsElems = j;
1210 }
1211
1212 /* XXX: Nested array containment is O(N^2) */
1213 for (i = 0; i < nLhsElems; i++)
1214 {
1215 /* Nested container value (object or array) */
1216 JsonbIterator *nestval,
1217 *nestContained;
1218 bool contains;
1219
1220 nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1221 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1222
1223 contains = JsonbDeepContains(&nestval, &nestContained);
1224
1225 if (nestval)
1226 pfree(nestval);
1227 if (nestContained)
1228 pfree(nestContained);
1229 if (contains)
1230 break;
1231 }
1232
1233 /*
1234 * Report rhs container value is not contained if couldn't
1235 * match rhs container to *some* lhs cont
1236 */
1237 if (i == nLhsElems)
1238 return false;
1239 }
1240 }
1241 }
1242 else
1243 {
1244 elog(ERROR, "invalid jsonb container type");
1245 }
1246
1247 elog(ERROR, "unexpectedly fell off end of jsonb container");
1248 return false;
1249 }
1250
1251 /*
1252 * Hash a JsonbValue scalar value, mixing the hash value into an existing
1253 * hash provided by the caller.
1254 *
1255 * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1256 * flags.
1257 */
1258 void
JsonbHashScalarValue(const JsonbValue * scalarVal,uint32 * hash)1259 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1260 {
1261 uint32 tmp;
1262
1263 /* Compute hash value for scalarVal */
1264 switch (scalarVal->type)
1265 {
1266 case jbvNull:
1267 tmp = 0x01;
1268 break;
1269 case jbvString:
1270 tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1271 scalarVal->val.string.len));
1272 break;
1273 case jbvNumeric:
1274 /* Must hash equal numerics to equal hash codes */
1275 tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1276 NumericGetDatum(scalarVal->val.numeric)));
1277 break;
1278 case jbvBool:
1279 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1280
1281 break;
1282 default:
1283 elog(ERROR, "invalid jsonb scalar type");
1284 tmp = 0; /* keep compiler quiet */
1285 break;
1286 }
1287
1288 /*
1289 * Combine hash values of successive keys, values and elements by rotating
1290 * the previous value left 1 bit, then XOR'ing in the new
1291 * key/value/element's hash value.
1292 */
1293 *hash = (*hash << 1) | (*hash >> 31);
1294 *hash ^= tmp;
1295 }
1296
1297 /*
1298 * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
1299 * JsonbHashScalarValue.
1300 */
1301 void
JsonbHashScalarValueExtended(const JsonbValue * scalarVal,uint64 * hash,uint64 seed)1302 JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
1303 uint64 seed)
1304 {
1305 uint64 tmp;
1306
1307 switch (scalarVal->type)
1308 {
1309 case jbvNull:
1310 tmp = seed + 0x01;
1311 break;
1312 case jbvString:
1313 tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
1314 scalarVal->val.string.len,
1315 seed));
1316 break;
1317 case jbvNumeric:
1318 tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
1319 NumericGetDatum(scalarVal->val.numeric),
1320 UInt64GetDatum(seed)));
1321 break;
1322 case jbvBool:
1323 if (seed)
1324 tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
1325 BoolGetDatum(scalarVal->val.boolean),
1326 UInt64GetDatum(seed)));
1327 else
1328 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1329
1330 break;
1331 default:
1332 elog(ERROR, "invalid jsonb scalar type");
1333 break;
1334 }
1335
1336 *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
1337 *hash ^= tmp;
1338 }
1339
1340 /*
1341 * Are two scalar JsonbValues of the same type a and b equal?
1342 */
1343 static bool
equalsJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1344 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1345 {
1346 if (aScalar->type == bScalar->type)
1347 {
1348 switch (aScalar->type)
1349 {
1350 case jbvNull:
1351 return true;
1352 case jbvString:
1353 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1354 case jbvNumeric:
1355 return DatumGetBool(DirectFunctionCall2(numeric_eq,
1356 PointerGetDatum(aScalar->val.numeric),
1357 PointerGetDatum(bScalar->val.numeric)));
1358 case jbvBool:
1359 return aScalar->val.boolean == bScalar->val.boolean;
1360
1361 default:
1362 elog(ERROR, "invalid jsonb scalar type");
1363 }
1364 }
1365 elog(ERROR, "jsonb scalar type mismatch");
1366 return false;
1367 }
1368
1369 /*
1370 * Compare two scalar JsonbValues, returning -1, 0, or 1.
1371 *
1372 * Strings are compared using the default collation. Used by B-tree
1373 * operators, where a lexical sort order is generally expected.
1374 */
1375 static int
compareJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1376 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1377 {
1378 if (aScalar->type == bScalar->type)
1379 {
1380 switch (aScalar->type)
1381 {
1382 case jbvNull:
1383 return 0;
1384 case jbvString:
1385 return varstr_cmp(aScalar->val.string.val,
1386 aScalar->val.string.len,
1387 bScalar->val.string.val,
1388 bScalar->val.string.len,
1389 DEFAULT_COLLATION_OID);
1390 case jbvNumeric:
1391 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1392 PointerGetDatum(aScalar->val.numeric),
1393 PointerGetDatum(bScalar->val.numeric)));
1394 case jbvBool:
1395 if (aScalar->val.boolean == bScalar->val.boolean)
1396 return 0;
1397 else if (aScalar->val.boolean > bScalar->val.boolean)
1398 return 1;
1399 else
1400 return -1;
1401 default:
1402 elog(ERROR, "invalid jsonb scalar type");
1403 }
1404 }
1405 elog(ERROR, "jsonb scalar type mismatch");
1406 return -1;
1407 }
1408
1409
1410 /*
1411 * Functions for manipulating the resizable buffer used by convertJsonb and
1412 * its subroutines.
1413 */
1414
1415 /*
1416 * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1417 * Returns the offset to the reserved area. The caller is expected to fill
1418 * the reserved area later with copyToBuffer().
1419 */
1420 static int
reserveFromBuffer(StringInfo buffer,int len)1421 reserveFromBuffer(StringInfo buffer, int len)
1422 {
1423 int offset;
1424
1425 /* Make more room if needed */
1426 enlargeStringInfo(buffer, len);
1427
1428 /* remember current offset */
1429 offset = buffer->len;
1430
1431 /* reserve the space */
1432 buffer->len += len;
1433
1434 /*
1435 * Keep a trailing null in place, even though it's not useful for us; it
1436 * seems best to preserve the invariants of StringInfos.
1437 */
1438 buffer->data[buffer->len] = '\0';
1439
1440 return offset;
1441 }
1442
1443 /*
1444 * Copy 'len' bytes to a previously reserved area in buffer.
1445 */
1446 static void
copyToBuffer(StringInfo buffer,int offset,const char * data,int len)1447 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1448 {
1449 memcpy(buffer->data + offset, data, len);
1450 }
1451
1452 /*
1453 * A shorthand for reserveFromBuffer + copyToBuffer.
1454 */
1455 static void
appendToBuffer(StringInfo buffer,const char * data,int len)1456 appendToBuffer(StringInfo buffer, const char *data, int len)
1457 {
1458 int offset;
1459
1460 offset = reserveFromBuffer(buffer, len);
1461 copyToBuffer(buffer, offset, data, len);
1462 }
1463
1464
1465 /*
1466 * Append padding, so that the length of the StringInfo is int-aligned.
1467 * Returns the number of padding bytes appended.
1468 */
1469 static short
padBufferToInt(StringInfo buffer)1470 padBufferToInt(StringInfo buffer)
1471 {
1472 int padlen,
1473 p,
1474 offset;
1475
1476 padlen = INTALIGN(buffer->len) - buffer->len;
1477
1478 offset = reserveFromBuffer(buffer, padlen);
1479
1480 /* padlen must be small, so this is probably faster than a memset */
1481 for (p = 0; p < padlen; p++)
1482 buffer->data[offset + p] = '\0';
1483
1484 return padlen;
1485 }
1486
1487 /*
1488 * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1489 */
1490 static Jsonb *
convertToJsonb(JsonbValue * val)1491 convertToJsonb(JsonbValue *val)
1492 {
1493 StringInfoData buffer;
1494 JEntry jentry;
1495 Jsonb *res;
1496
1497 /* Should not already have binary representation */
1498 Assert(val->type != jbvBinary);
1499
1500 /* Allocate an output buffer. It will be enlarged as needed */
1501 initStringInfo(&buffer);
1502
1503 /* Make room for the varlena header */
1504 reserveFromBuffer(&buffer, VARHDRSZ);
1505
1506 convertJsonbValue(&buffer, &jentry, val, 0);
1507
1508 /*
1509 * Note: the JEntry of the root is discarded. Therefore the root
1510 * JsonbContainer struct must contain enough information to tell what kind
1511 * of value it is.
1512 */
1513
1514 res = (Jsonb *) buffer.data;
1515
1516 SET_VARSIZE(res, buffer.len);
1517
1518 return res;
1519 }
1520
1521 /*
1522 * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1523 *
1524 * The JEntry header for this node is returned in *header. It is filled in
1525 * with the length of this value and appropriate type bits. If we wish to
1526 * store an end offset rather than a length, it is the caller's responsibility
1527 * to adjust for that.
1528 *
1529 * If the value is an array or an object, this recurses. 'level' is only used
1530 * for debugging purposes.
1531 */
1532 static void
convertJsonbValue(StringInfo buffer,JEntry * header,JsonbValue * val,int level)1533 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1534 {
1535 check_stack_depth();
1536
1537 if (!val)
1538 return;
1539
1540 /*
1541 * A JsonbValue passed as val should never have a type of jbvBinary, and
1542 * neither should any of its sub-components. Those values will be produced
1543 * by convertJsonbArray and convertJsonbObject, the results of which will
1544 * not be passed back to this function as an argument.
1545 */
1546
1547 if (IsAJsonbScalar(val))
1548 convertJsonbScalar(buffer, header, val);
1549 else if (val->type == jbvArray)
1550 convertJsonbArray(buffer, header, val, level);
1551 else if (val->type == jbvObject)
1552 convertJsonbObject(buffer, header, val, level);
1553 else
1554 elog(ERROR, "unknown type of jsonb container to convert");
1555 }
1556
1557 static void
convertJsonbArray(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1558 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1559 {
1560 int base_offset;
1561 int jentry_offset;
1562 int i;
1563 int totallen;
1564 uint32 header;
1565 int nElems = val->val.array.nElems;
1566
1567 /* Remember where in the buffer this array starts. */
1568 base_offset = buffer->len;
1569
1570 /* Align to 4-byte boundary (any padding counts as part of my data) */
1571 padBufferToInt(buffer);
1572
1573 /*
1574 * Construct the header Jentry and store it in the beginning of the
1575 * variable-length payload.
1576 */
1577 header = nElems | JB_FARRAY;
1578 if (val->val.array.rawScalar)
1579 {
1580 Assert(nElems == 1);
1581 Assert(level == 0);
1582 header |= JB_FSCALAR;
1583 }
1584
1585 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1586
1587 /* Reserve space for the JEntries of the elements. */
1588 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1589
1590 totallen = 0;
1591 for (i = 0; i < nElems; i++)
1592 {
1593 JsonbValue *elem = &val->val.array.elems[i];
1594 int len;
1595 JEntry meta;
1596
1597 /*
1598 * Convert element, producing a JEntry and appending its
1599 * variable-length data to buffer
1600 */
1601 convertJsonbValue(buffer, &meta, elem, level + 1);
1602
1603 len = JBE_OFFLENFLD(meta);
1604 totallen += len;
1605
1606 /*
1607 * Bail out if total variable-length data exceeds what will fit in a
1608 * JEntry length field. We check this in each iteration, not just
1609 * once at the end, to forestall possible integer overflow.
1610 */
1611 if (totallen > JENTRY_OFFLENMASK)
1612 ereport(ERROR,
1613 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1614 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1615 JENTRY_OFFLENMASK)));
1616
1617 /*
1618 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1619 */
1620 if ((i % JB_OFFSET_STRIDE) == 0)
1621 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1622
1623 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1624 jentry_offset += sizeof(JEntry);
1625 }
1626
1627 /* Total data size is everything we've appended to buffer */
1628 totallen = buffer->len - base_offset;
1629
1630 /* Check length again, since we didn't include the metadata above */
1631 if (totallen > JENTRY_OFFLENMASK)
1632 ereport(ERROR,
1633 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1634 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1635 JENTRY_OFFLENMASK)));
1636
1637 /* Initialize the header of this node in the container's JEntry array */
1638 *pheader = JENTRY_ISCONTAINER | totallen;
1639 }
1640
1641 static void
convertJsonbObject(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1642 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1643 {
1644 int base_offset;
1645 int jentry_offset;
1646 int i;
1647 int totallen;
1648 uint32 header;
1649 int nPairs = val->val.object.nPairs;
1650
1651 /* Remember where in the buffer this object starts. */
1652 base_offset = buffer->len;
1653
1654 /* Align to 4-byte boundary (any padding counts as part of my data) */
1655 padBufferToInt(buffer);
1656
1657 /*
1658 * Construct the header Jentry and store it in the beginning of the
1659 * variable-length payload.
1660 */
1661 header = nPairs | JB_FOBJECT;
1662 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1663
1664 /* Reserve space for the JEntries of the keys and values. */
1665 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1666
1667 /*
1668 * Iterate over the keys, then over the values, since that is the ordering
1669 * we want in the on-disk representation.
1670 */
1671 totallen = 0;
1672 for (i = 0; i < nPairs; i++)
1673 {
1674 JsonbPair *pair = &val->val.object.pairs[i];
1675 int len;
1676 JEntry meta;
1677
1678 /*
1679 * Convert key, producing a JEntry and appending its variable-length
1680 * data to buffer
1681 */
1682 convertJsonbScalar(buffer, &meta, &pair->key);
1683
1684 len = JBE_OFFLENFLD(meta);
1685 totallen += len;
1686
1687 /*
1688 * Bail out if total variable-length data exceeds what will fit in a
1689 * JEntry length field. We check this in each iteration, not just
1690 * once at the end, to forestall possible integer overflow.
1691 */
1692 if (totallen > JENTRY_OFFLENMASK)
1693 ereport(ERROR,
1694 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1695 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1696 JENTRY_OFFLENMASK)));
1697
1698 /*
1699 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1700 */
1701 if ((i % JB_OFFSET_STRIDE) == 0)
1702 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1703
1704 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1705 jentry_offset += sizeof(JEntry);
1706 }
1707 for (i = 0; i < nPairs; i++)
1708 {
1709 JsonbPair *pair = &val->val.object.pairs[i];
1710 int len;
1711 JEntry meta;
1712
1713 /*
1714 * Convert value, producing a JEntry and appending its variable-length
1715 * data to buffer
1716 */
1717 convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1718
1719 len = JBE_OFFLENFLD(meta);
1720 totallen += len;
1721
1722 /*
1723 * Bail out if total variable-length data exceeds what will fit in a
1724 * JEntry length field. We check this in each iteration, not just
1725 * once at the end, to forestall possible integer overflow.
1726 */
1727 if (totallen > JENTRY_OFFLENMASK)
1728 ereport(ERROR,
1729 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1730 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1731 JENTRY_OFFLENMASK)));
1732
1733 /*
1734 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1735 */
1736 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1737 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1738
1739 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1740 jentry_offset += sizeof(JEntry);
1741 }
1742
1743 /* Total data size is everything we've appended to buffer */
1744 totallen = buffer->len - base_offset;
1745
1746 /* Check length again, since we didn't include the metadata above */
1747 if (totallen > JENTRY_OFFLENMASK)
1748 ereport(ERROR,
1749 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1750 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1751 JENTRY_OFFLENMASK)));
1752
1753 /* Initialize the header of this node in the container's JEntry array */
1754 *pheader = JENTRY_ISCONTAINER | totallen;
1755 }
1756
1757 static void
convertJsonbScalar(StringInfo buffer,JEntry * jentry,JsonbValue * scalarVal)1758 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1759 {
1760 int numlen;
1761 short padlen;
1762
1763 switch (scalarVal->type)
1764 {
1765 case jbvNull:
1766 *jentry = JENTRY_ISNULL;
1767 break;
1768
1769 case jbvString:
1770 appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1771
1772 *jentry = scalarVal->val.string.len;
1773 break;
1774
1775 case jbvNumeric:
1776 numlen = VARSIZE_ANY(scalarVal->val.numeric);
1777 padlen = padBufferToInt(buffer);
1778
1779 appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1780
1781 *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1782 break;
1783
1784 case jbvBool:
1785 *jentry = (scalarVal->val.boolean) ?
1786 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1787 break;
1788
1789 case jbvDatetime:
1790 {
1791 char buf[MAXDATELEN + 1];
1792 size_t len;
1793
1794 JsonEncodeDateTime(buf,
1795 scalarVal->val.datetime.value,
1796 scalarVal->val.datetime.typid,
1797 &scalarVal->val.datetime.tz);
1798 len = strlen(buf);
1799 appendToBuffer(buffer, buf, len);
1800
1801 *jentry = len;
1802 }
1803 break;
1804
1805 default:
1806 elog(ERROR, "invalid jsonb scalar type");
1807 }
1808 }
1809
1810 /*
1811 * Compare two jbvString JsonbValue values, a and b.
1812 *
1813 * This is a special qsort() comparator used to sort strings in certain
1814 * internal contexts where it is sufficient to have a well-defined sort order.
1815 * In particular, object pair keys are sorted according to this criteria to
1816 * facilitate cheap binary searches where we don't care about lexical sort
1817 * order.
1818 *
1819 * a and b are first sorted based on their length. If a tie-breaker is
1820 * required, only then do we consider string binary equality.
1821 */
1822 static int
lengthCompareJsonbStringValue(const void * a,const void * b)1823 lengthCompareJsonbStringValue(const void *a, const void *b)
1824 {
1825 const JsonbValue *va = (const JsonbValue *) a;
1826 const JsonbValue *vb = (const JsonbValue *) b;
1827
1828 Assert(va->type == jbvString);
1829 Assert(vb->type == jbvString);
1830
1831 return lengthCompareJsonbString(va->val.string.val, va->val.string.len,
1832 vb->val.string.val, vb->val.string.len);
1833 }
1834
1835 /*
1836 * Subroutine for lengthCompareJsonbStringValue
1837 *
1838 * This is also useful separately to implement binary search on
1839 * JsonbContainers.
1840 */
1841 static int
lengthCompareJsonbString(const char * val1,int len1,const char * val2,int len2)1842 lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2)
1843 {
1844 if (len1 == len2)
1845 return memcmp(val1, val2, len1);
1846 else
1847 return len1 > len2 ? 1 : -1;
1848 }
1849
1850 /*
1851 * qsort_arg() comparator to compare JsonbPair values.
1852 *
1853 * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1854 * to true iff a and b have full binary equality, since some callers have an
1855 * interest in whether the two values are equal or merely equivalent.
1856 *
1857 * N.B: String comparisons here are "length-wise"
1858 *
1859 * Pairs with equals keys are ordered such that the order field is respected.
1860 */
1861 static int
lengthCompareJsonbPair(const void * a,const void * b,void * binequal)1862 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1863 {
1864 const JsonbPair *pa = (const JsonbPair *) a;
1865 const JsonbPair *pb = (const JsonbPair *) b;
1866 int res;
1867
1868 res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1869 if (res == 0 && binequal)
1870 *((bool *) binequal) = true;
1871
1872 /*
1873 * Guarantee keeping order of equal pair. Unique algorithm will prefer
1874 * first element as value.
1875 */
1876 if (res == 0)
1877 res = (pa->order > pb->order) ? -1 : 1;
1878
1879 return res;
1880 }
1881
1882 /*
1883 * Sort and unique-ify pairs in JsonbValue object
1884 */
1885 static void
uniqueifyJsonbObject(JsonbValue * object)1886 uniqueifyJsonbObject(JsonbValue *object)
1887 {
1888 bool hasNonUniq = false;
1889
1890 Assert(object->type == jbvObject);
1891
1892 if (object->val.object.nPairs > 1)
1893 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1894 lengthCompareJsonbPair, &hasNonUniq);
1895
1896 if (hasNonUniq)
1897 {
1898 JsonbPair *ptr = object->val.object.pairs + 1,
1899 *res = object->val.object.pairs;
1900
1901 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1902 {
1903 /* Avoid copying over duplicate */
1904 if (lengthCompareJsonbStringValue(ptr, res) != 0)
1905 {
1906 res++;
1907 if (ptr != res)
1908 memcpy(res, ptr, sizeof(JsonbPair));
1909 }
1910 ptr++;
1911 }
1912
1913 object->val.object.nPairs = res + 1 - object->val.object.pairs;
1914 }
1915 }
1916