1 /*-------------------------------------------------------------------------
2 *
3 * jsonb_util.c
4 * converting between Jsonb and JsonbValues, and iterating.
5 *
6 * Copyright (c) 2014-2021, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/jsonb_util.c
11 *
12 *-------------------------------------------------------------------------
13 */
14 #include "postgres.h"
15
16 #include "catalog/pg_collation.h"
17 #include "catalog/pg_type.h"
18 #include "common/hashfn.h"
19 #include "common/jsonapi.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/datetime.h"
23 #include "utils/json.h"
24 #include "utils/jsonb.h"
25 #include "utils/memutils.h"
26 #include "utils/varlena.h"
27
28 /*
29 * Maximum number of elements in an array (or key/value pairs in an object).
30 * This is limited by two things: the size of the JEntry array must fit
31 * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
32 * reserved for that in the JsonbContainer.header field.
33 *
34 * (The total size of an array's or object's elements is also limited by
35 * JENTRY_OFFLENMASK, but we're not concerned about that here.)
36 */
37 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
38 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
39
40 static void fillJsonbValue(JsonbContainer *container, int index,
41 char *base_addr, uint32 offset,
42 JsonbValue *result);
43 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
44 static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
45 static Jsonb *convertToJsonb(JsonbValue *val);
46 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
47 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
48 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
49 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
50
51 static int reserveFromBuffer(StringInfo buffer, int len);
52 static void appendToBuffer(StringInfo buffer, const char *data, int len);
53 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
54 static short padBufferToInt(StringInfo buffer);
55
56 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
57 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
58 static JsonbParseState *pushState(JsonbParseState **pstate);
59 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
60 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
61 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
62 static int lengthCompareJsonbStringValue(const void *a, const void *b);
63 static int lengthCompareJsonbString(const char *val1, int len1,
64 const char *val2, int len2);
65 static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
66 static void uniqueifyJsonbObject(JsonbValue *object);
67 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
68 JsonbIteratorToken seq,
69 JsonbValue *scalarVal);
70
71 void
JsonbToJsonbValue(Jsonb * jsonb,JsonbValue * val)72 JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val)
73 {
74 val->type = jbvBinary;
75 val->val.binary.data = &jsonb->root;
76 val->val.binary.len = VARSIZE(jsonb) - VARHDRSZ;
77 }
78
79 /*
80 * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
81 *
82 * Generally we find it more convenient to directly iterate through the Jsonb
83 * representation and only really convert nested scalar values.
84 * JsonbIteratorNext() does this, so that clients of the iteration code don't
85 * have to directly deal with the binary representation (JsonbDeepContains() is
86 * a notable exception, although all exceptions are internal to this module).
87 * In general, functions that accept a JsonbValue argument are concerned with
88 * the manipulation of scalar values, or simple containers of scalar values,
89 * where it would be inconvenient to deal with a great amount of other state.
90 */
91 Jsonb *
JsonbValueToJsonb(JsonbValue * val)92 JsonbValueToJsonb(JsonbValue *val)
93 {
94 Jsonb *out;
95
96 if (IsAJsonbScalar(val))
97 {
98 /* Scalar value */
99 JsonbParseState *pstate = NULL;
100 JsonbValue *res;
101 JsonbValue scalarArray;
102
103 scalarArray.type = jbvArray;
104 scalarArray.val.array.rawScalar = true;
105 scalarArray.val.array.nElems = 1;
106
107 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
108 pushJsonbValue(&pstate, WJB_ELEM, val);
109 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
110
111 out = convertToJsonb(res);
112 }
113 else if (val->type == jbvObject || val->type == jbvArray)
114 {
115 out = convertToJsonb(val);
116 }
117 else
118 {
119 Assert(val->type == jbvBinary);
120 out = palloc(VARHDRSZ + val->val.binary.len);
121 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
122 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
123 }
124
125 return out;
126 }
127
128 /*
129 * Get the offset of the variable-length portion of a Jsonb node within
130 * the variable-length-data part of its container. The node is identified
131 * by index within the container's JEntry array.
132 */
133 uint32
getJsonbOffset(const JsonbContainer * jc,int index)134 getJsonbOffset(const JsonbContainer *jc, int index)
135 {
136 uint32 offset = 0;
137 int i;
138
139 /*
140 * Start offset of this entry is equal to the end offset of the previous
141 * entry. Walk backwards to the most recent entry stored as an end
142 * offset, returning that offset plus any lengths in between.
143 */
144 for (i = index - 1; i >= 0; i--)
145 {
146 offset += JBE_OFFLENFLD(jc->children[i]);
147 if (JBE_HAS_OFF(jc->children[i]))
148 break;
149 }
150
151 return offset;
152 }
153
154 /*
155 * Get the length of the variable-length portion of a Jsonb node.
156 * The node is identified by index within the container's JEntry array.
157 */
158 uint32
getJsonbLength(const JsonbContainer * jc,int index)159 getJsonbLength(const JsonbContainer *jc, int index)
160 {
161 uint32 off;
162 uint32 len;
163
164 /*
165 * If the length is stored directly in the JEntry, just return it.
166 * Otherwise, get the begin offset of the entry, and subtract that from
167 * the stored end+1 offset.
168 */
169 if (JBE_HAS_OFF(jc->children[index]))
170 {
171 off = getJsonbOffset(jc, index);
172 len = JBE_OFFLENFLD(jc->children[index]) - off;
173 }
174 else
175 len = JBE_OFFLENFLD(jc->children[index]);
176
177 return len;
178 }
179
180 /*
181 * BT comparator worker function. Returns an integer less than, equal to, or
182 * greater than zero, indicating whether a is less than, equal to, or greater
183 * than b. Consistent with the requirements for a B-Tree operator class
184 *
185 * Strings are compared lexically, in contrast with other places where we use a
186 * much simpler comparator logic for searching through Strings. Since this is
187 * called from B-Tree support function 1, we're careful about not leaking
188 * memory here.
189 */
190 int
compareJsonbContainers(JsonbContainer * a,JsonbContainer * b)191 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
192 {
193 JsonbIterator *ita,
194 *itb;
195 int res = 0;
196
197 ita = JsonbIteratorInit(a);
198 itb = JsonbIteratorInit(b);
199
200 do
201 {
202 JsonbValue va,
203 vb;
204 JsonbIteratorToken ra,
205 rb;
206
207 ra = JsonbIteratorNext(&ita, &va, false);
208 rb = JsonbIteratorNext(&itb, &vb, false);
209
210 if (ra == rb)
211 {
212 if (ra == WJB_DONE)
213 {
214 /* Decisively equal */
215 break;
216 }
217
218 if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
219 {
220 /*
221 * There is no array or object to compare at this stage of
222 * processing. jbvArray/jbvObject values are compared
223 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
224 * tokens.
225 */
226 continue;
227 }
228
229 if (va.type == vb.type)
230 {
231 switch (va.type)
232 {
233 case jbvString:
234 case jbvNull:
235 case jbvNumeric:
236 case jbvBool:
237 res = compareJsonbScalarValue(&va, &vb);
238 break;
239 case jbvArray:
240
241 /*
242 * This could be a "raw scalar" pseudo array. That's
243 * a special case here though, since we still want the
244 * general type-based comparisons to apply, and as far
245 * as we're concerned a pseudo array is just a scalar.
246 */
247 if (va.val.array.rawScalar != vb.val.array.rawScalar)
248 res = (va.val.array.rawScalar) ? -1 : 1;
249 if (va.val.array.nElems != vb.val.array.nElems)
250 res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
251 break;
252 case jbvObject:
253 if (va.val.object.nPairs != vb.val.object.nPairs)
254 res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
255 break;
256 case jbvBinary:
257 elog(ERROR, "unexpected jbvBinary value");
258 break;
259 case jbvDatetime:
260 elog(ERROR, "unexpected jbvDatetime value");
261 break;
262 }
263 }
264 else
265 {
266 /* Type-defined order */
267 res = (va.type > vb.type) ? 1 : -1;
268 }
269 }
270 else
271 {
272 /*
273 * It's safe to assume that the types differed, and that the va
274 * and vb values passed were set.
275 *
276 * If the two values were of the same container type, then there'd
277 * have been a chance to observe the variation in the number of
278 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
279 * either two heterogeneously-typed containers, or a container and
280 * some scalar type.
281 *
282 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
283 * cases here, because we would have seen the corresponding
284 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
285 * concluded that they don't match.
286 */
287 Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
288 Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
289
290 Assert(va.type != vb.type);
291 Assert(va.type != jbvBinary);
292 Assert(vb.type != jbvBinary);
293 /* Type-defined order */
294 res = (va.type > vb.type) ? 1 : -1;
295 }
296 }
297 while (res == 0);
298
299 while (ita != NULL)
300 {
301 JsonbIterator *i = ita->parent;
302
303 pfree(ita);
304 ita = i;
305 }
306 while (itb != NULL)
307 {
308 JsonbIterator *i = itb->parent;
309
310 pfree(itb);
311 itb = i;
312 }
313
314 return res;
315 }
316
317 /*
318 * Find value in object (i.e. the "value" part of some key/value pair in an
319 * object), or find a matching element if we're looking through an array. Do
320 * so on the basis of equality of the object keys only, or alternatively
321 * element values only, with a caller-supplied value "key". The "flags"
322 * argument allows the caller to specify which container types are of interest.
323 *
324 * This exported utility function exists to facilitate various cases concerned
325 * with "containment". If asked to look through an object, the caller had
326 * better pass a Jsonb String, because their keys can only be strings.
327 * Otherwise, for an array, any type of JsonbValue will do.
328 *
329 * In order to proceed with the search, it is necessary for callers to have
330 * both specified an interest in exactly one particular container type with an
331 * appropriate flag, as well as having the pointed-to Jsonb container be of
332 * one of those same container types at the top level. (Actually, we just do
333 * whichever makes sense to save callers the trouble of figuring it out - at
334 * most one can make sense, because the container either points to an array
335 * (possibly a "raw scalar" pseudo array) or an object.)
336 *
337 * Note that we can return a jbvBinary JsonbValue if this is called on an
338 * object, but we never do so on an array. If the caller asks to look through
339 * a container type that is not of the type pointed to by the container,
340 * immediately fall through and return NULL. If we cannot find the value,
341 * return NULL. Otherwise, return palloc()'d copy of value.
342 */
343 JsonbValue *
findJsonbValueFromContainer(JsonbContainer * container,uint32 flags,JsonbValue * key)344 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
345 JsonbValue *key)
346 {
347 JEntry *children = container->children;
348 int count = JsonContainerSize(container);
349
350 Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
351
352 /* Quick out without a palloc cycle if object/array is empty */
353 if (count <= 0)
354 return NULL;
355
356 if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
357 {
358 JsonbValue *result = palloc(sizeof(JsonbValue));
359 char *base_addr = (char *) (children + count);
360 uint32 offset = 0;
361 int i;
362
363 for (i = 0; i < count; i++)
364 {
365 fillJsonbValue(container, i, base_addr, offset, result);
366
367 if (key->type == result->type)
368 {
369 if (equalsJsonbScalarValue(key, result))
370 return result;
371 }
372
373 JBE_ADVANCE_OFFSET(offset, children[i]);
374 }
375
376 pfree(result);
377 }
378 else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
379 {
380 /* Object key passed by caller must be a string */
381 Assert(key->type == jbvString);
382
383 return getKeyJsonValueFromContainer(container, key->val.string.val,
384 key->val.string.len, NULL);
385 }
386
387 /* Not found */
388 return NULL;
389 }
390
391 /*
392 * Find value by key in Jsonb object and fetch it into 'res', which is also
393 * returned.
394 *
395 * 'res' can be passed in as NULL, in which case it's newly palloc'ed here.
396 */
397 JsonbValue *
getKeyJsonValueFromContainer(JsonbContainer * container,const char * keyVal,int keyLen,JsonbValue * res)398 getKeyJsonValueFromContainer(JsonbContainer *container,
399 const char *keyVal, int keyLen, JsonbValue *res)
400 {
401 JEntry *children = container->children;
402 int count = JsonContainerSize(container);
403 char *baseAddr;
404 uint32 stopLow,
405 stopHigh;
406
407 Assert(JsonContainerIsObject(container));
408
409 /* Quick out without a palloc cycle if object is empty */
410 if (count <= 0)
411 return NULL;
412
413 /*
414 * Binary search the container. Since we know this is an object, account
415 * for *Pairs* of Jentrys
416 */
417 baseAddr = (char *) (children + count * 2);
418 stopLow = 0;
419 stopHigh = count;
420 while (stopLow < stopHigh)
421 {
422 uint32 stopMiddle;
423 int difference;
424 const char *candidateVal;
425 int candidateLen;
426
427 stopMiddle = stopLow + (stopHigh - stopLow) / 2;
428
429 candidateVal = baseAddr + getJsonbOffset(container, stopMiddle);
430 candidateLen = getJsonbLength(container, stopMiddle);
431
432 difference = lengthCompareJsonbString(candidateVal, candidateLen,
433 keyVal, keyLen);
434
435 if (difference == 0)
436 {
437 /* Found our key, return corresponding value */
438 int index = stopMiddle + count;
439
440 if (!res)
441 res = palloc(sizeof(JsonbValue));
442
443 fillJsonbValue(container, index, baseAddr,
444 getJsonbOffset(container, index),
445 res);
446
447 return res;
448 }
449 else
450 {
451 if (difference < 0)
452 stopLow = stopMiddle + 1;
453 else
454 stopHigh = stopMiddle;
455 }
456 }
457
458 /* Not found */
459 return NULL;
460 }
461
462 /*
463 * Get i-th value of a Jsonb array.
464 *
465 * Returns palloc()'d copy of the value, or NULL if it does not exist.
466 */
467 JsonbValue *
getIthJsonbValueFromContainer(JsonbContainer * container,uint32 i)468 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
469 {
470 JsonbValue *result;
471 char *base_addr;
472 uint32 nelements;
473
474 if (!JsonContainerIsArray(container))
475 elog(ERROR, "not a jsonb array");
476
477 nelements = JsonContainerSize(container);
478 base_addr = (char *) &container->children[nelements];
479
480 if (i >= nelements)
481 return NULL;
482
483 result = palloc(sizeof(JsonbValue));
484
485 fillJsonbValue(container, i, base_addr,
486 getJsonbOffset(container, i),
487 result);
488
489 return result;
490 }
491
492 /*
493 * A helper function to fill in a JsonbValue to represent an element of an
494 * array, or a key or value of an object.
495 *
496 * The node's JEntry is at container->children[index], and its variable-length
497 * data is at base_addr + offset. We make the caller determine the offset
498 * since in many cases the caller can amortize that work across multiple
499 * children. When it can't, it can just call getJsonbOffset().
500 *
501 * A nested array or object will be returned as jbvBinary, ie. it won't be
502 * expanded.
503 */
504 static void
fillJsonbValue(JsonbContainer * container,int index,char * base_addr,uint32 offset,JsonbValue * result)505 fillJsonbValue(JsonbContainer *container, int index,
506 char *base_addr, uint32 offset,
507 JsonbValue *result)
508 {
509 JEntry entry = container->children[index];
510
511 if (JBE_ISNULL(entry))
512 {
513 result->type = jbvNull;
514 }
515 else if (JBE_ISSTRING(entry))
516 {
517 result->type = jbvString;
518 result->val.string.val = base_addr + offset;
519 result->val.string.len = getJsonbLength(container, index);
520 Assert(result->val.string.len >= 0);
521 }
522 else if (JBE_ISNUMERIC(entry))
523 {
524 result->type = jbvNumeric;
525 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
526 }
527 else if (JBE_ISBOOL_TRUE(entry))
528 {
529 result->type = jbvBool;
530 result->val.boolean = true;
531 }
532 else if (JBE_ISBOOL_FALSE(entry))
533 {
534 result->type = jbvBool;
535 result->val.boolean = false;
536 }
537 else
538 {
539 Assert(JBE_ISCONTAINER(entry));
540 result->type = jbvBinary;
541 /* Remove alignment padding from data pointer and length */
542 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
543 result->val.binary.len = getJsonbLength(container, index) -
544 (INTALIGN(offset) - offset);
545 }
546 }
547
548 /*
549 * Push JsonbValue into JsonbParseState.
550 *
551 * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
552 * JsonbValue to a Jsonb.
553 *
554 * Initial state of *JsonbParseState is NULL, since it'll be allocated here
555 * originally (caller will get JsonbParseState back by reference).
556 *
557 * Only sequential tokens pertaining to non-container types should pass a
558 * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a
559 * "raw scalar" pseudo array to append it - the actual scalar should be passed
560 * next and it will be added as the only member of the array.
561 *
562 * Values of type jbvBinary, which are rolled up arrays and objects,
563 * are unpacked before being added to the result.
564 */
565 JsonbValue *
pushJsonbValue(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * jbval)566 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
567 JsonbValue *jbval)
568 {
569 JsonbIterator *it;
570 JsonbValue *res = NULL;
571 JsonbValue v;
572 JsonbIteratorToken tok;
573 int i;
574
575 if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvObject)
576 {
577 pushJsonbValue(pstate, WJB_BEGIN_OBJECT, NULL);
578 for (i = 0; i < jbval->val.object.nPairs; i++)
579 {
580 pushJsonbValue(pstate, WJB_KEY, &jbval->val.object.pairs[i].key);
581 pushJsonbValue(pstate, WJB_VALUE, &jbval->val.object.pairs[i].value);
582 }
583
584 return pushJsonbValue(pstate, WJB_END_OBJECT, NULL);
585 }
586
587 if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvArray)
588 {
589 pushJsonbValue(pstate, WJB_BEGIN_ARRAY, NULL);
590 for (i = 0; i < jbval->val.array.nElems; i++)
591 {
592 pushJsonbValue(pstate, WJB_ELEM, &jbval->val.array.elems[i]);
593 }
594
595 return pushJsonbValue(pstate, WJB_END_ARRAY, NULL);
596 }
597
598 if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
599 jbval->type != jbvBinary)
600 {
601 /* drop through */
602 return pushJsonbValueScalar(pstate, seq, jbval);
603 }
604
605 /* unpack the binary and add each piece to the pstate */
606 it = JsonbIteratorInit(jbval->val.binary.data);
607
608 if ((jbval->val.binary.data->header & JB_FSCALAR) && *pstate)
609 {
610 tok = JsonbIteratorNext(&it, &v, true);
611 Assert(tok == WJB_BEGIN_ARRAY);
612 Assert(v.type == jbvArray && v.val.array.rawScalar);
613
614 tok = JsonbIteratorNext(&it, &v, true);
615 Assert(tok == WJB_ELEM);
616
617 res = pushJsonbValueScalar(pstate, seq, &v);
618
619 tok = JsonbIteratorNext(&it, &v, true);
620 Assert(tok == WJB_END_ARRAY);
621 Assert(it == NULL);
622
623 return res;
624 }
625
626 while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
627 res = pushJsonbValueScalar(pstate, tok,
628 tok < WJB_BEGIN_ARRAY ||
629 (tok == WJB_BEGIN_ARRAY &&
630 v.val.array.rawScalar) ? &v : NULL);
631
632 return res;
633 }
634
635 /*
636 * Do the actual pushing, with only scalar or pseudo-scalar-array values
637 * accepted.
638 */
639 static JsonbValue *
pushJsonbValueScalar(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * scalarVal)640 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
641 JsonbValue *scalarVal)
642 {
643 JsonbValue *result = NULL;
644
645 switch (seq)
646 {
647 case WJB_BEGIN_ARRAY:
648 Assert(!scalarVal || scalarVal->val.array.rawScalar);
649 *pstate = pushState(pstate);
650 result = &(*pstate)->contVal;
651 (*pstate)->contVal.type = jbvArray;
652 (*pstate)->contVal.val.array.nElems = 0;
653 (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
654 scalarVal->val.array.rawScalar);
655 if (scalarVal && scalarVal->val.array.nElems > 0)
656 {
657 /* Assume that this array is still really a scalar */
658 Assert(scalarVal->type == jbvArray);
659 (*pstate)->size = scalarVal->val.array.nElems;
660 }
661 else
662 {
663 (*pstate)->size = 4;
664 }
665 (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
666 (*pstate)->size);
667 break;
668 case WJB_BEGIN_OBJECT:
669 Assert(!scalarVal);
670 *pstate = pushState(pstate);
671 result = &(*pstate)->contVal;
672 (*pstate)->contVal.type = jbvObject;
673 (*pstate)->contVal.val.object.nPairs = 0;
674 (*pstate)->size = 4;
675 (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
676 (*pstate)->size);
677 break;
678 case WJB_KEY:
679 Assert(scalarVal->type == jbvString);
680 appendKey(*pstate, scalarVal);
681 break;
682 case WJB_VALUE:
683 Assert(IsAJsonbScalar(scalarVal));
684 appendValue(*pstate, scalarVal);
685 break;
686 case WJB_ELEM:
687 Assert(IsAJsonbScalar(scalarVal));
688 appendElement(*pstate, scalarVal);
689 break;
690 case WJB_END_OBJECT:
691 uniqueifyJsonbObject(&(*pstate)->contVal);
692 /* fall through! */
693 case WJB_END_ARRAY:
694 /* Steps here common to WJB_END_OBJECT case */
695 Assert(!scalarVal);
696 result = &(*pstate)->contVal;
697
698 /*
699 * Pop stack and push current array/object as value in parent
700 * array/object
701 */
702 *pstate = (*pstate)->next;
703 if (*pstate)
704 {
705 switch ((*pstate)->contVal.type)
706 {
707 case jbvArray:
708 appendElement(*pstate, result);
709 break;
710 case jbvObject:
711 appendValue(*pstate, result);
712 break;
713 default:
714 elog(ERROR, "invalid jsonb container type");
715 }
716 }
717 break;
718 default:
719 elog(ERROR, "unrecognized jsonb sequential processing token");
720 }
721
722 return result;
723 }
724
725 /*
726 * pushJsonbValue() worker: Iteration-like forming of Jsonb
727 */
728 static JsonbParseState *
pushState(JsonbParseState ** pstate)729 pushState(JsonbParseState **pstate)
730 {
731 JsonbParseState *ns = palloc(sizeof(JsonbParseState));
732
733 ns->next = *pstate;
734 return ns;
735 }
736
737 /*
738 * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb
739 */
740 static void
appendKey(JsonbParseState * pstate,JsonbValue * string)741 appendKey(JsonbParseState *pstate, JsonbValue *string)
742 {
743 JsonbValue *object = &pstate->contVal;
744
745 Assert(object->type == jbvObject);
746 Assert(string->type == jbvString);
747
748 if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
749 ereport(ERROR,
750 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
751 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
752 JSONB_MAX_PAIRS)));
753
754 if (object->val.object.nPairs >= pstate->size)
755 {
756 pstate->size *= 2;
757 object->val.object.pairs = repalloc(object->val.object.pairs,
758 sizeof(JsonbPair) * pstate->size);
759 }
760
761 object->val.object.pairs[object->val.object.nPairs].key = *string;
762 object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
763 }
764
765 /*
766 * pushJsonbValue() worker: Append a pair value to state when generating a
767 * Jsonb
768 */
769 static void
appendValue(JsonbParseState * pstate,JsonbValue * scalarVal)770 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
771 {
772 JsonbValue *object = &pstate->contVal;
773
774 Assert(object->type == jbvObject);
775
776 object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
777 }
778
779 /*
780 * pushJsonbValue() worker: Append an element to state when generating a Jsonb
781 */
782 static void
appendElement(JsonbParseState * pstate,JsonbValue * scalarVal)783 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
784 {
785 JsonbValue *array = &pstate->contVal;
786
787 Assert(array->type == jbvArray);
788
789 if (array->val.array.nElems >= JSONB_MAX_ELEMS)
790 ereport(ERROR,
791 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
792 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
793 JSONB_MAX_ELEMS)));
794
795 if (array->val.array.nElems >= pstate->size)
796 {
797 pstate->size *= 2;
798 array->val.array.elems = repalloc(array->val.array.elems,
799 sizeof(JsonbValue) * pstate->size);
800 }
801
802 array->val.array.elems[array->val.array.nElems++] = *scalarVal;
803 }
804
805 /*
806 * Given a JsonbContainer, expand to JsonbIterator to iterate over items
807 * fully expanded to in-memory representation for manipulation.
808 *
809 * See JsonbIteratorNext() for notes on memory management.
810 */
811 JsonbIterator *
JsonbIteratorInit(JsonbContainer * container)812 JsonbIteratorInit(JsonbContainer *container)
813 {
814 return iteratorFromContainer(container, NULL);
815 }
816
817 /*
818 * Get next JsonbValue while iterating
819 *
820 * Caller should initially pass their own, original iterator. They may get
821 * back a child iterator palloc()'d here instead. The function can be relied
822 * on to free those child iterators, lest the memory allocated for highly
823 * nested objects become unreasonable, but only if callers don't end iteration
824 * early (by breaking upon having found something in a search, for example).
825 *
826 * Callers in such a scenario, that are particularly sensitive to leaking
827 * memory in a long-lived context may walk the ancestral tree from the final
828 * iterator we left them with to its oldest ancestor, pfree()ing as they go.
829 * They do not have to free any other memory previously allocated for iterators
830 * but not accessible as direct ancestors of the iterator they're last passed
831 * back.
832 *
833 * Returns "Jsonb sequential processing" token value. Iterator "state"
834 * reflects the current stage of the process in a less granular fashion, and is
835 * mostly used here to track things internally with respect to particular
836 * iterators.
837 *
838 * Clients of this function should not have to handle any jbvBinary values
839 * (since recursive calls will deal with this), provided skipNested is false.
840 * It is our job to expand the jbvBinary representation without bothering them
841 * with it. However, clients should not take it upon themselves to touch array
842 * or Object element/pair buffers, since their element/pair pointers are
843 * garbage. Also, *val will not be set when returning WJB_END_ARRAY or
844 * WJB_END_OBJECT, on the assumption that it's only useful to access values
845 * when recursing in.
846 */
847 JsonbIteratorToken
JsonbIteratorNext(JsonbIterator ** it,JsonbValue * val,bool skipNested)848 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
849 {
850 if (*it == NULL)
851 return WJB_DONE;
852
853 /*
854 * When stepping into a nested container, we jump back here to start
855 * processing the child. We will not recurse further in one call, because
856 * processing the child will always begin in JBI_ARRAY_START or
857 * JBI_OBJECT_START state.
858 */
859 recurse:
860 switch ((*it)->state)
861 {
862 case JBI_ARRAY_START:
863 /* Set v to array on first array call */
864 val->type = jbvArray;
865 val->val.array.nElems = (*it)->nElems;
866
867 /*
868 * v->val.array.elems is not actually set, because we aren't doing
869 * a full conversion
870 */
871 val->val.array.rawScalar = (*it)->isScalar;
872 (*it)->curIndex = 0;
873 (*it)->curDataOffset = 0;
874 (*it)->curValueOffset = 0; /* not actually used */
875 /* Set state for next call */
876 (*it)->state = JBI_ARRAY_ELEM;
877 return WJB_BEGIN_ARRAY;
878
879 case JBI_ARRAY_ELEM:
880 if ((*it)->curIndex >= (*it)->nElems)
881 {
882 /*
883 * All elements within array already processed. Report this
884 * to caller, and give it back original parent iterator (which
885 * independently tracks iteration progress at its level of
886 * nesting).
887 */
888 *it = freeAndGetParent(*it);
889 return WJB_END_ARRAY;
890 }
891
892 fillJsonbValue((*it)->container, (*it)->curIndex,
893 (*it)->dataProper, (*it)->curDataOffset,
894 val);
895
896 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
897 (*it)->children[(*it)->curIndex]);
898 (*it)->curIndex++;
899
900 if (!IsAJsonbScalar(val) && !skipNested)
901 {
902 /* Recurse into container. */
903 *it = iteratorFromContainer(val->val.binary.data, *it);
904 goto recurse;
905 }
906 else
907 {
908 /*
909 * Scalar item in array, or a container and caller didn't want
910 * us to recurse into it.
911 */
912 return WJB_ELEM;
913 }
914
915 case JBI_OBJECT_START:
916 /* Set v to object on first object call */
917 val->type = jbvObject;
918 val->val.object.nPairs = (*it)->nElems;
919
920 /*
921 * v->val.object.pairs is not actually set, because we aren't
922 * doing a full conversion
923 */
924 (*it)->curIndex = 0;
925 (*it)->curDataOffset = 0;
926 (*it)->curValueOffset = getJsonbOffset((*it)->container,
927 (*it)->nElems);
928 /* Set state for next call */
929 (*it)->state = JBI_OBJECT_KEY;
930 return WJB_BEGIN_OBJECT;
931
932 case JBI_OBJECT_KEY:
933 if ((*it)->curIndex >= (*it)->nElems)
934 {
935 /*
936 * All pairs within object already processed. Report this to
937 * caller, and give it back original containing iterator
938 * (which independently tracks iteration progress at its level
939 * of nesting).
940 */
941 *it = freeAndGetParent(*it);
942 return WJB_END_OBJECT;
943 }
944 else
945 {
946 /* Return key of a key/value pair. */
947 fillJsonbValue((*it)->container, (*it)->curIndex,
948 (*it)->dataProper, (*it)->curDataOffset,
949 val);
950 if (val->type != jbvString)
951 elog(ERROR, "unexpected jsonb type as object key");
952
953 /* Set state for next call */
954 (*it)->state = JBI_OBJECT_VALUE;
955 return WJB_KEY;
956 }
957
958 case JBI_OBJECT_VALUE:
959 /* Set state for next call */
960 (*it)->state = JBI_OBJECT_KEY;
961
962 fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
963 (*it)->dataProper, (*it)->curValueOffset,
964 val);
965
966 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
967 (*it)->children[(*it)->curIndex]);
968 JBE_ADVANCE_OFFSET((*it)->curValueOffset,
969 (*it)->children[(*it)->curIndex + (*it)->nElems]);
970 (*it)->curIndex++;
971
972 /*
973 * Value may be a container, in which case we recurse with new,
974 * child iterator (unless the caller asked not to, by passing
975 * skipNested).
976 */
977 if (!IsAJsonbScalar(val) && !skipNested)
978 {
979 *it = iteratorFromContainer(val->val.binary.data, *it);
980 goto recurse;
981 }
982 else
983 return WJB_VALUE;
984 }
985
986 elog(ERROR, "invalid iterator state");
987 return -1;
988 }
989
990 /*
991 * Initialize an iterator for iterating all elements in a container.
992 */
993 static JsonbIterator *
iteratorFromContainer(JsonbContainer * container,JsonbIterator * parent)994 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
995 {
996 JsonbIterator *it;
997
998 it = palloc0(sizeof(JsonbIterator));
999 it->container = container;
1000 it->parent = parent;
1001 it->nElems = JsonContainerSize(container);
1002
1003 /* Array starts just after header */
1004 it->children = container->children;
1005
1006 switch (container->header & (JB_FARRAY | JB_FOBJECT))
1007 {
1008 case JB_FARRAY:
1009 it->dataProper =
1010 (char *) it->children + it->nElems * sizeof(JEntry);
1011 it->isScalar = JsonContainerIsScalar(container);
1012 /* This is either a "raw scalar", or an array */
1013 Assert(!it->isScalar || it->nElems == 1);
1014
1015 it->state = JBI_ARRAY_START;
1016 break;
1017
1018 case JB_FOBJECT:
1019 it->dataProper =
1020 (char *) it->children + it->nElems * sizeof(JEntry) * 2;
1021 it->state = JBI_OBJECT_START;
1022 break;
1023
1024 default:
1025 elog(ERROR, "unknown type of jsonb container");
1026 }
1027
1028 return it;
1029 }
1030
1031 /*
1032 * JsonbIteratorNext() worker: Return parent, while freeing memory for current
1033 * iterator
1034 */
1035 static JsonbIterator *
freeAndGetParent(JsonbIterator * it)1036 freeAndGetParent(JsonbIterator *it)
1037 {
1038 JsonbIterator *v = it->parent;
1039
1040 pfree(it);
1041 return v;
1042 }
1043
1044 /*
1045 * Worker for "contains" operator's function
1046 *
1047 * Formally speaking, containment is top-down, unordered subtree isomorphism.
1048 *
1049 * Takes iterators that belong to some container type. These iterators
1050 * "belong" to those values in the sense that they've just been initialized in
1051 * respect of them by the caller (perhaps in a nested fashion).
1052 *
1053 * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
1054 * We determine if mContained is contained within val.
1055 */
1056 bool
JsonbDeepContains(JsonbIterator ** val,JsonbIterator ** mContained)1057 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
1058 {
1059 JsonbValue vval,
1060 vcontained;
1061 JsonbIteratorToken rval,
1062 rcont;
1063
1064 /*
1065 * Guard against stack overflow due to overly complex Jsonb.
1066 *
1067 * Functions called here independently take this precaution, but that
1068 * might not be sufficient since this is also a recursive function.
1069 */
1070 check_stack_depth();
1071
1072 rval = JsonbIteratorNext(val, &vval, false);
1073 rcont = JsonbIteratorNext(mContained, &vcontained, false);
1074
1075 if (rval != rcont)
1076 {
1077 /*
1078 * The differing return values can immediately be taken as indicating
1079 * two differing container types at this nesting level, which is
1080 * sufficient reason to give up entirely (but it should be the case
1081 * that they're both some container type).
1082 */
1083 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
1084 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
1085 return false;
1086 }
1087 else if (rcont == WJB_BEGIN_OBJECT)
1088 {
1089 Assert(vval.type == jbvObject);
1090 Assert(vcontained.type == jbvObject);
1091
1092 /*
1093 * If the lhs has fewer pairs than the rhs, it can't possibly contain
1094 * the rhs. (This conclusion is safe only because we de-duplicate
1095 * keys in all Jsonb objects; thus there can be no corresponding
1096 * optimization in the array case.) The case probably won't arise
1097 * often, but since it's such a cheap check we may as well make it.
1098 */
1099 if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1100 return false;
1101
1102 /* Work through rhs "is it contained within?" object */
1103 for (;;)
1104 {
1105 JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1106 JsonbValue lhsValBuf;
1107
1108 rcont = JsonbIteratorNext(mContained, &vcontained, false);
1109
1110 /*
1111 * When we get through caller's rhs "is it contained within?"
1112 * object without failing to find one of its values, it's
1113 * contained.
1114 */
1115 if (rcont == WJB_END_OBJECT)
1116 return true;
1117
1118 Assert(rcont == WJB_KEY);
1119 Assert(vcontained.type == jbvString);
1120
1121 /* First, find value by key... */
1122 lhsVal =
1123 getKeyJsonValueFromContainer((*val)->container,
1124 vcontained.val.string.val,
1125 vcontained.val.string.len,
1126 &lhsValBuf);
1127 if (!lhsVal)
1128 return false;
1129
1130 /*
1131 * ...at this stage it is apparent that there is at least a key
1132 * match for this rhs pair.
1133 */
1134 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1135
1136 Assert(rcont == WJB_VALUE);
1137
1138 /*
1139 * Compare rhs pair's value with lhs pair's value just found using
1140 * key
1141 */
1142 if (lhsVal->type != vcontained.type)
1143 {
1144 return false;
1145 }
1146 else if (IsAJsonbScalar(lhsVal))
1147 {
1148 if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1149 return false;
1150 }
1151 else
1152 {
1153 /* Nested container value (object or array) */
1154 JsonbIterator *nestval,
1155 *nestContained;
1156
1157 Assert(lhsVal->type == jbvBinary);
1158 Assert(vcontained.type == jbvBinary);
1159
1160 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1161 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1162
1163 /*
1164 * Match "value" side of rhs datum object's pair recursively.
1165 * It's a nested structure.
1166 *
1167 * Note that nesting still has to "match up" at the right
1168 * nesting sub-levels. However, there need only be zero or
1169 * more matching pairs (or elements) at each nesting level
1170 * (provided the *rhs* pairs/elements *all* match on each
1171 * level), which enables searching nested structures for a
1172 * single String or other primitive type sub-datum quite
1173 * effectively (provided the user constructed the rhs nested
1174 * structure such that we "know where to look").
1175 *
1176 * In other words, the mapping of container nodes in the rhs
1177 * "vcontained" Jsonb to internal nodes on the lhs is
1178 * injective, and parent-child edges on the rhs must be mapped
1179 * to parent-child edges on the lhs to satisfy the condition
1180 * of containment (plus of course the mapped nodes must be
1181 * equal).
1182 */
1183 if (!JsonbDeepContains(&nestval, &nestContained))
1184 return false;
1185 }
1186 }
1187 }
1188 else if (rcont == WJB_BEGIN_ARRAY)
1189 {
1190 JsonbValue *lhsConts = NULL;
1191 uint32 nLhsElems = vval.val.array.nElems;
1192
1193 Assert(vval.type == jbvArray);
1194 Assert(vcontained.type == jbvArray);
1195
1196 /*
1197 * Handle distinction between "raw scalar" pseudo arrays, and real
1198 * arrays.
1199 *
1200 * A raw scalar may contain another raw scalar, and an array may
1201 * contain a raw scalar, but a raw scalar may not contain an array. We
1202 * don't do something like this for the object case, since objects can
1203 * only contain pairs, never raw scalars (a pair is represented by an
1204 * rhs object argument with a single contained pair).
1205 */
1206 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1207 return false;
1208
1209 /* Work through rhs "is it contained within?" array */
1210 for (;;)
1211 {
1212 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1213
1214 /*
1215 * When we get through caller's rhs "is it contained within?"
1216 * array without failing to find one of its values, it's
1217 * contained.
1218 */
1219 if (rcont == WJB_END_ARRAY)
1220 return true;
1221
1222 Assert(rcont == WJB_ELEM);
1223
1224 if (IsAJsonbScalar(&vcontained))
1225 {
1226 if (!findJsonbValueFromContainer((*val)->container,
1227 JB_FARRAY,
1228 &vcontained))
1229 return false;
1230 }
1231 else
1232 {
1233 uint32 i;
1234
1235 /*
1236 * If this is first container found in rhs array (at this
1237 * depth), initialize temp lhs array of containers
1238 */
1239 if (lhsConts == NULL)
1240 {
1241 uint32 j = 0;
1242
1243 /* Make room for all possible values */
1244 lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1245
1246 for (i = 0; i < nLhsElems; i++)
1247 {
1248 /* Store all lhs elements in temp array */
1249 rcont = JsonbIteratorNext(val, &vval, true);
1250 Assert(rcont == WJB_ELEM);
1251
1252 if (vval.type == jbvBinary)
1253 lhsConts[j++] = vval;
1254 }
1255
1256 /* No container elements in temp array, so give up now */
1257 if (j == 0)
1258 return false;
1259
1260 /* We may have only partially filled array */
1261 nLhsElems = j;
1262 }
1263
1264 /* XXX: Nested array containment is O(N^2) */
1265 for (i = 0; i < nLhsElems; i++)
1266 {
1267 /* Nested container value (object or array) */
1268 JsonbIterator *nestval,
1269 *nestContained;
1270 bool contains;
1271
1272 nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1273 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1274
1275 contains = JsonbDeepContains(&nestval, &nestContained);
1276
1277 if (nestval)
1278 pfree(nestval);
1279 if (nestContained)
1280 pfree(nestContained);
1281 if (contains)
1282 break;
1283 }
1284
1285 /*
1286 * Report rhs container value is not contained if couldn't
1287 * match rhs container to *some* lhs cont
1288 */
1289 if (i == nLhsElems)
1290 return false;
1291 }
1292 }
1293 }
1294 else
1295 {
1296 elog(ERROR, "invalid jsonb container type");
1297 }
1298
1299 elog(ERROR, "unexpectedly fell off end of jsonb container");
1300 return false;
1301 }
1302
1303 /*
1304 * Hash a JsonbValue scalar value, mixing the hash value into an existing
1305 * hash provided by the caller.
1306 *
1307 * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1308 * flags.
1309 */
1310 void
JsonbHashScalarValue(const JsonbValue * scalarVal,uint32 * hash)1311 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1312 {
1313 uint32 tmp;
1314
1315 /* Compute hash value for scalarVal */
1316 switch (scalarVal->type)
1317 {
1318 case jbvNull:
1319 tmp = 0x01;
1320 break;
1321 case jbvString:
1322 tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1323 scalarVal->val.string.len));
1324 break;
1325 case jbvNumeric:
1326 /* Must hash equal numerics to equal hash codes */
1327 tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1328 NumericGetDatum(scalarVal->val.numeric)));
1329 break;
1330 case jbvBool:
1331 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1332
1333 break;
1334 default:
1335 elog(ERROR, "invalid jsonb scalar type");
1336 tmp = 0; /* keep compiler quiet */
1337 break;
1338 }
1339
1340 /*
1341 * Combine hash values of successive keys, values and elements by rotating
1342 * the previous value left 1 bit, then XOR'ing in the new
1343 * key/value/element's hash value.
1344 */
1345 *hash = (*hash << 1) | (*hash >> 31);
1346 *hash ^= tmp;
1347 }
1348
1349 /*
1350 * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
1351 * JsonbHashScalarValue.
1352 */
1353 void
JsonbHashScalarValueExtended(const JsonbValue * scalarVal,uint64 * hash,uint64 seed)1354 JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
1355 uint64 seed)
1356 {
1357 uint64 tmp;
1358
1359 switch (scalarVal->type)
1360 {
1361 case jbvNull:
1362 tmp = seed + 0x01;
1363 break;
1364 case jbvString:
1365 tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
1366 scalarVal->val.string.len,
1367 seed));
1368 break;
1369 case jbvNumeric:
1370 tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
1371 NumericGetDatum(scalarVal->val.numeric),
1372 UInt64GetDatum(seed)));
1373 break;
1374 case jbvBool:
1375 if (seed)
1376 tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
1377 BoolGetDatum(scalarVal->val.boolean),
1378 UInt64GetDatum(seed)));
1379 else
1380 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1381
1382 break;
1383 default:
1384 elog(ERROR, "invalid jsonb scalar type");
1385 break;
1386 }
1387
1388 *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
1389 *hash ^= tmp;
1390 }
1391
1392 /*
1393 * Are two scalar JsonbValues of the same type a and b equal?
1394 */
1395 static bool
equalsJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1396 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1397 {
1398 if (aScalar->type == bScalar->type)
1399 {
1400 switch (aScalar->type)
1401 {
1402 case jbvNull:
1403 return true;
1404 case jbvString:
1405 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1406 case jbvNumeric:
1407 return DatumGetBool(DirectFunctionCall2(numeric_eq,
1408 PointerGetDatum(aScalar->val.numeric),
1409 PointerGetDatum(bScalar->val.numeric)));
1410 case jbvBool:
1411 return aScalar->val.boolean == bScalar->val.boolean;
1412
1413 default:
1414 elog(ERROR, "invalid jsonb scalar type");
1415 }
1416 }
1417 elog(ERROR, "jsonb scalar type mismatch");
1418 return false;
1419 }
1420
1421 /*
1422 * Compare two scalar JsonbValues, returning -1, 0, or 1.
1423 *
1424 * Strings are compared using the default collation. Used by B-tree
1425 * operators, where a lexical sort order is generally expected.
1426 */
1427 static int
compareJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1428 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1429 {
1430 if (aScalar->type == bScalar->type)
1431 {
1432 switch (aScalar->type)
1433 {
1434 case jbvNull:
1435 return 0;
1436 case jbvString:
1437 return varstr_cmp(aScalar->val.string.val,
1438 aScalar->val.string.len,
1439 bScalar->val.string.val,
1440 bScalar->val.string.len,
1441 DEFAULT_COLLATION_OID);
1442 case jbvNumeric:
1443 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1444 PointerGetDatum(aScalar->val.numeric),
1445 PointerGetDatum(bScalar->val.numeric)));
1446 case jbvBool:
1447 if (aScalar->val.boolean == bScalar->val.boolean)
1448 return 0;
1449 else if (aScalar->val.boolean > bScalar->val.boolean)
1450 return 1;
1451 else
1452 return -1;
1453 default:
1454 elog(ERROR, "invalid jsonb scalar type");
1455 }
1456 }
1457 elog(ERROR, "jsonb scalar type mismatch");
1458 return -1;
1459 }
1460
1461
1462 /*
1463 * Functions for manipulating the resizable buffer used by convertJsonb and
1464 * its subroutines.
1465 */
1466
1467 /*
1468 * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1469 * Returns the offset to the reserved area. The caller is expected to fill
1470 * the reserved area later with copyToBuffer().
1471 */
1472 static int
reserveFromBuffer(StringInfo buffer,int len)1473 reserveFromBuffer(StringInfo buffer, int len)
1474 {
1475 int offset;
1476
1477 /* Make more room if needed */
1478 enlargeStringInfo(buffer, len);
1479
1480 /* remember current offset */
1481 offset = buffer->len;
1482
1483 /* reserve the space */
1484 buffer->len += len;
1485
1486 /*
1487 * Keep a trailing null in place, even though it's not useful for us; it
1488 * seems best to preserve the invariants of StringInfos.
1489 */
1490 buffer->data[buffer->len] = '\0';
1491
1492 return offset;
1493 }
1494
1495 /*
1496 * Copy 'len' bytes to a previously reserved area in buffer.
1497 */
1498 static void
copyToBuffer(StringInfo buffer,int offset,const char * data,int len)1499 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1500 {
1501 memcpy(buffer->data + offset, data, len);
1502 }
1503
1504 /*
1505 * A shorthand for reserveFromBuffer + copyToBuffer.
1506 */
1507 static void
appendToBuffer(StringInfo buffer,const char * data,int len)1508 appendToBuffer(StringInfo buffer, const char *data, int len)
1509 {
1510 int offset;
1511
1512 offset = reserveFromBuffer(buffer, len);
1513 copyToBuffer(buffer, offset, data, len);
1514 }
1515
1516
1517 /*
1518 * Append padding, so that the length of the StringInfo is int-aligned.
1519 * Returns the number of padding bytes appended.
1520 */
1521 static short
padBufferToInt(StringInfo buffer)1522 padBufferToInt(StringInfo buffer)
1523 {
1524 int padlen,
1525 p,
1526 offset;
1527
1528 padlen = INTALIGN(buffer->len) - buffer->len;
1529
1530 offset = reserveFromBuffer(buffer, padlen);
1531
1532 /* padlen must be small, so this is probably faster than a memset */
1533 for (p = 0; p < padlen; p++)
1534 buffer->data[offset + p] = '\0';
1535
1536 return padlen;
1537 }
1538
1539 /*
1540 * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1541 */
1542 static Jsonb *
convertToJsonb(JsonbValue * val)1543 convertToJsonb(JsonbValue *val)
1544 {
1545 StringInfoData buffer;
1546 JEntry jentry;
1547 Jsonb *res;
1548
1549 /* Should not already have binary representation */
1550 Assert(val->type != jbvBinary);
1551
1552 /* Allocate an output buffer. It will be enlarged as needed */
1553 initStringInfo(&buffer);
1554
1555 /* Make room for the varlena header */
1556 reserveFromBuffer(&buffer, VARHDRSZ);
1557
1558 convertJsonbValue(&buffer, &jentry, val, 0);
1559
1560 /*
1561 * Note: the JEntry of the root is discarded. Therefore the root
1562 * JsonbContainer struct must contain enough information to tell what kind
1563 * of value it is.
1564 */
1565
1566 res = (Jsonb *) buffer.data;
1567
1568 SET_VARSIZE(res, buffer.len);
1569
1570 return res;
1571 }
1572
1573 /*
1574 * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1575 *
1576 * The JEntry header for this node is returned in *header. It is filled in
1577 * with the length of this value and appropriate type bits. If we wish to
1578 * store an end offset rather than a length, it is the caller's responsibility
1579 * to adjust for that.
1580 *
1581 * If the value is an array or an object, this recurses. 'level' is only used
1582 * for debugging purposes.
1583 */
1584 static void
convertJsonbValue(StringInfo buffer,JEntry * header,JsonbValue * val,int level)1585 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1586 {
1587 check_stack_depth();
1588
1589 if (!val)
1590 return;
1591
1592 /*
1593 * A JsonbValue passed as val should never have a type of jbvBinary, and
1594 * neither should any of its sub-components. Those values will be produced
1595 * by convertJsonbArray and convertJsonbObject, the results of which will
1596 * not be passed back to this function as an argument.
1597 */
1598
1599 if (IsAJsonbScalar(val))
1600 convertJsonbScalar(buffer, header, val);
1601 else if (val->type == jbvArray)
1602 convertJsonbArray(buffer, header, val, level);
1603 else if (val->type == jbvObject)
1604 convertJsonbObject(buffer, header, val, level);
1605 else
1606 elog(ERROR, "unknown type of jsonb container to convert");
1607 }
1608
1609 static void
convertJsonbArray(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1610 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1611 {
1612 int base_offset;
1613 int jentry_offset;
1614 int i;
1615 int totallen;
1616 uint32 header;
1617 int nElems = val->val.array.nElems;
1618
1619 /* Remember where in the buffer this array starts. */
1620 base_offset = buffer->len;
1621
1622 /* Align to 4-byte boundary (any padding counts as part of my data) */
1623 padBufferToInt(buffer);
1624
1625 /*
1626 * Construct the header Jentry and store it in the beginning of the
1627 * variable-length payload.
1628 */
1629 header = nElems | JB_FARRAY;
1630 if (val->val.array.rawScalar)
1631 {
1632 Assert(nElems == 1);
1633 Assert(level == 0);
1634 header |= JB_FSCALAR;
1635 }
1636
1637 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1638
1639 /* Reserve space for the JEntries of the elements. */
1640 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1641
1642 totallen = 0;
1643 for (i = 0; i < nElems; i++)
1644 {
1645 JsonbValue *elem = &val->val.array.elems[i];
1646 int len;
1647 JEntry meta;
1648
1649 /*
1650 * Convert element, producing a JEntry and appending its
1651 * variable-length data to buffer
1652 */
1653 convertJsonbValue(buffer, &meta, elem, level + 1);
1654
1655 len = JBE_OFFLENFLD(meta);
1656 totallen += len;
1657
1658 /*
1659 * Bail out if total variable-length data exceeds what will fit in a
1660 * JEntry length field. We check this in each iteration, not just
1661 * once at the end, to forestall possible integer overflow.
1662 */
1663 if (totallen > JENTRY_OFFLENMASK)
1664 ereport(ERROR,
1665 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1666 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1667 JENTRY_OFFLENMASK)));
1668
1669 /*
1670 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1671 */
1672 if ((i % JB_OFFSET_STRIDE) == 0)
1673 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1674
1675 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1676 jentry_offset += sizeof(JEntry);
1677 }
1678
1679 /* Total data size is everything we've appended to buffer */
1680 totallen = buffer->len - base_offset;
1681
1682 /* Check length again, since we didn't include the metadata above */
1683 if (totallen > JENTRY_OFFLENMASK)
1684 ereport(ERROR,
1685 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1686 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1687 JENTRY_OFFLENMASK)));
1688
1689 /* Initialize the header of this node in the container's JEntry array */
1690 *pheader = JENTRY_ISCONTAINER | totallen;
1691 }
1692
1693 static void
convertJsonbObject(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1694 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1695 {
1696 int base_offset;
1697 int jentry_offset;
1698 int i;
1699 int totallen;
1700 uint32 header;
1701 int nPairs = val->val.object.nPairs;
1702
1703 /* Remember where in the buffer this object starts. */
1704 base_offset = buffer->len;
1705
1706 /* Align to 4-byte boundary (any padding counts as part of my data) */
1707 padBufferToInt(buffer);
1708
1709 /*
1710 * Construct the header Jentry and store it in the beginning of the
1711 * variable-length payload.
1712 */
1713 header = nPairs | JB_FOBJECT;
1714 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1715
1716 /* Reserve space for the JEntries of the keys and values. */
1717 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1718
1719 /*
1720 * Iterate over the keys, then over the values, since that is the ordering
1721 * we want in the on-disk representation.
1722 */
1723 totallen = 0;
1724 for (i = 0; i < nPairs; i++)
1725 {
1726 JsonbPair *pair = &val->val.object.pairs[i];
1727 int len;
1728 JEntry meta;
1729
1730 /*
1731 * Convert key, producing a JEntry and appending its variable-length
1732 * data to buffer
1733 */
1734 convertJsonbScalar(buffer, &meta, &pair->key);
1735
1736 len = JBE_OFFLENFLD(meta);
1737 totallen += len;
1738
1739 /*
1740 * Bail out if total variable-length data exceeds what will fit in a
1741 * JEntry length field. We check this in each iteration, not just
1742 * once at the end, to forestall possible integer overflow.
1743 */
1744 if (totallen > JENTRY_OFFLENMASK)
1745 ereport(ERROR,
1746 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1747 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1748 JENTRY_OFFLENMASK)));
1749
1750 /*
1751 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1752 */
1753 if ((i % JB_OFFSET_STRIDE) == 0)
1754 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1755
1756 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1757 jentry_offset += sizeof(JEntry);
1758 }
1759 for (i = 0; i < nPairs; i++)
1760 {
1761 JsonbPair *pair = &val->val.object.pairs[i];
1762 int len;
1763 JEntry meta;
1764
1765 /*
1766 * Convert value, producing a JEntry and appending its variable-length
1767 * data to buffer
1768 */
1769 convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1770
1771 len = JBE_OFFLENFLD(meta);
1772 totallen += len;
1773
1774 /*
1775 * Bail out if total variable-length data exceeds what will fit in a
1776 * JEntry length field. We check this in each iteration, not just
1777 * once at the end, to forestall possible integer overflow.
1778 */
1779 if (totallen > JENTRY_OFFLENMASK)
1780 ereport(ERROR,
1781 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1782 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1783 JENTRY_OFFLENMASK)));
1784
1785 /*
1786 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1787 */
1788 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1789 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1790
1791 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1792 jentry_offset += sizeof(JEntry);
1793 }
1794
1795 /* Total data size is everything we've appended to buffer */
1796 totallen = buffer->len - base_offset;
1797
1798 /* Check length again, since we didn't include the metadata above */
1799 if (totallen > JENTRY_OFFLENMASK)
1800 ereport(ERROR,
1801 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1802 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1803 JENTRY_OFFLENMASK)));
1804
1805 /* Initialize the header of this node in the container's JEntry array */
1806 *pheader = JENTRY_ISCONTAINER | totallen;
1807 }
1808
1809 static void
convertJsonbScalar(StringInfo buffer,JEntry * jentry,JsonbValue * scalarVal)1810 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1811 {
1812 int numlen;
1813 short padlen;
1814
1815 switch (scalarVal->type)
1816 {
1817 case jbvNull:
1818 *jentry = JENTRY_ISNULL;
1819 break;
1820
1821 case jbvString:
1822 appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1823
1824 *jentry = scalarVal->val.string.len;
1825 break;
1826
1827 case jbvNumeric:
1828 numlen = VARSIZE_ANY(scalarVal->val.numeric);
1829 padlen = padBufferToInt(buffer);
1830
1831 appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1832
1833 *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1834 break;
1835
1836 case jbvBool:
1837 *jentry = (scalarVal->val.boolean) ?
1838 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1839 break;
1840
1841 case jbvDatetime:
1842 {
1843 char buf[MAXDATELEN + 1];
1844 size_t len;
1845
1846 JsonEncodeDateTime(buf,
1847 scalarVal->val.datetime.value,
1848 scalarVal->val.datetime.typid,
1849 &scalarVal->val.datetime.tz);
1850 len = strlen(buf);
1851 appendToBuffer(buffer, buf, len);
1852
1853 *jentry = len;
1854 }
1855 break;
1856
1857 default:
1858 elog(ERROR, "invalid jsonb scalar type");
1859 }
1860 }
1861
1862 /*
1863 * Compare two jbvString JsonbValue values, a and b.
1864 *
1865 * This is a special qsort() comparator used to sort strings in certain
1866 * internal contexts where it is sufficient to have a well-defined sort order.
1867 * In particular, object pair keys are sorted according to this criteria to
1868 * facilitate cheap binary searches where we don't care about lexical sort
1869 * order.
1870 *
1871 * a and b are first sorted based on their length. If a tie-breaker is
1872 * required, only then do we consider string binary equality.
1873 */
1874 static int
lengthCompareJsonbStringValue(const void * a,const void * b)1875 lengthCompareJsonbStringValue(const void *a, const void *b)
1876 {
1877 const JsonbValue *va = (const JsonbValue *) a;
1878 const JsonbValue *vb = (const JsonbValue *) b;
1879
1880 Assert(va->type == jbvString);
1881 Assert(vb->type == jbvString);
1882
1883 return lengthCompareJsonbString(va->val.string.val, va->val.string.len,
1884 vb->val.string.val, vb->val.string.len);
1885 }
1886
1887 /*
1888 * Subroutine for lengthCompareJsonbStringValue
1889 *
1890 * This is also useful separately to implement binary search on
1891 * JsonbContainers.
1892 */
1893 static int
lengthCompareJsonbString(const char * val1,int len1,const char * val2,int len2)1894 lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2)
1895 {
1896 if (len1 == len2)
1897 return memcmp(val1, val2, len1);
1898 else
1899 return len1 > len2 ? 1 : -1;
1900 }
1901
1902 /*
1903 * qsort_arg() comparator to compare JsonbPair values.
1904 *
1905 * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1906 * to true iff a and b have full binary equality, since some callers have an
1907 * interest in whether the two values are equal or merely equivalent.
1908 *
1909 * N.B: String comparisons here are "length-wise"
1910 *
1911 * Pairs with equals keys are ordered such that the order field is respected.
1912 */
1913 static int
lengthCompareJsonbPair(const void * a,const void * b,void * binequal)1914 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1915 {
1916 const JsonbPair *pa = (const JsonbPair *) a;
1917 const JsonbPair *pb = (const JsonbPair *) b;
1918 int res;
1919
1920 res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1921 if (res == 0 && binequal)
1922 *((bool *) binequal) = true;
1923
1924 /*
1925 * Guarantee keeping order of equal pair. Unique algorithm will prefer
1926 * first element as value.
1927 */
1928 if (res == 0)
1929 res = (pa->order > pb->order) ? -1 : 1;
1930
1931 return res;
1932 }
1933
1934 /*
1935 * Sort and unique-ify pairs in JsonbValue object
1936 */
1937 static void
uniqueifyJsonbObject(JsonbValue * object)1938 uniqueifyJsonbObject(JsonbValue *object)
1939 {
1940 bool hasNonUniq = false;
1941
1942 Assert(object->type == jbvObject);
1943
1944 if (object->val.object.nPairs > 1)
1945 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1946 lengthCompareJsonbPair, &hasNonUniq);
1947
1948 if (hasNonUniq)
1949 {
1950 JsonbPair *ptr = object->val.object.pairs + 1,
1951 *res = object->val.object.pairs;
1952
1953 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1954 {
1955 /* Avoid copying over duplicate */
1956 if (lengthCompareJsonbStringValue(ptr, res) != 0)
1957 {
1958 res++;
1959 if (ptr != res)
1960 memcpy(res, ptr, sizeof(JsonbPair));
1961 }
1962 ptr++;
1963 }
1964
1965 object->val.object.nPairs = res + 1 - object->val.object.pairs;
1966 }
1967 }
1968