1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_util.c
4  *	  converting between Jsonb and JsonbValues, and iterating.
5  *
6  * Copyright (c) 2014-2018, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/jsonb_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/hash.h"
17 #include "catalog/pg_collation.h"
18 #include "miscadmin.h"
19 #include "utils/builtins.h"
20 #include "utils/jsonb.h"
21 #include "utils/memutils.h"
22 #include "utils/varlena.h"
23 
24 /*
25  * Maximum number of elements in an array (or key/value pairs in an object).
26  * This is limited by two things: the size of the JEntry array must fit
27  * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
28  * reserved for that in the JsonbContainer.header field.
29  *
30  * (The total size of an array's or object's elements is also limited by
31  * JENTRY_OFFLENMASK, but we're not concerned about that here.)
32  */
33 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
34 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
35 
36 static void fillJsonbValue(JsonbContainer *container, int index,
37 			   char *base_addr, uint32 offset,
38 			   JsonbValue *result);
39 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
40 static int	compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
41 static Jsonb *convertToJsonb(JsonbValue *val);
42 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
43 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
44 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
45 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
46 
47 static int	reserveFromBuffer(StringInfo buffer, int len);
48 static void appendToBuffer(StringInfo buffer, const char *data, int len);
49 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
50 static short padBufferToInt(StringInfo buffer);
51 
52 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
53 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
54 static JsonbParseState *pushState(JsonbParseState **pstate);
55 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
56 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
57 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
58 static int	lengthCompareJsonbStringValue(const void *a, const void *b);
59 static int	lengthCompareJsonbPair(const void *a, const void *b, void *arg);
60 static void uniqueifyJsonbObject(JsonbValue *object);
61 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
62 					 JsonbIteratorToken seq,
63 					 JsonbValue *scalarVal);
64 
65 /*
66  * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
67  *
68  * There isn't a JsonbToJsonbValue(), because generally we find it more
69  * convenient to directly iterate through the Jsonb representation and only
70  * really convert nested scalar values.  JsonbIteratorNext() does this, so that
71  * clients of the iteration code don't have to directly deal with the binary
72  * representation (JsonbDeepContains() is a notable exception, although all
73  * exceptions are internal to this module).  In general, functions that accept
74  * a JsonbValue argument are concerned with the manipulation of scalar values,
75  * or simple containers of scalar values, where it would be inconvenient to
76  * deal with a great amount of other state.
77  */
78 Jsonb *
JsonbValueToJsonb(JsonbValue * val)79 JsonbValueToJsonb(JsonbValue *val)
80 {
81 	Jsonb	   *out;
82 
83 	if (IsAJsonbScalar(val))
84 	{
85 		/* Scalar value */
86 		JsonbParseState *pstate = NULL;
87 		JsonbValue *res;
88 		JsonbValue	scalarArray;
89 
90 		scalarArray.type = jbvArray;
91 		scalarArray.val.array.rawScalar = true;
92 		scalarArray.val.array.nElems = 1;
93 
94 		pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
95 		pushJsonbValue(&pstate, WJB_ELEM, val);
96 		res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
97 
98 		out = convertToJsonb(res);
99 	}
100 	else if (val->type == jbvObject || val->type == jbvArray)
101 	{
102 		out = convertToJsonb(val);
103 	}
104 	else
105 	{
106 		Assert(val->type == jbvBinary);
107 		out = palloc(VARHDRSZ + val->val.binary.len);
108 		SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
109 		memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
110 	}
111 
112 	return out;
113 }
114 
115 /*
116  * Get the offset of the variable-length portion of a Jsonb node within
117  * the variable-length-data part of its container.  The node is identified
118  * by index within the container's JEntry array.
119  */
120 uint32
getJsonbOffset(const JsonbContainer * jc,int index)121 getJsonbOffset(const JsonbContainer *jc, int index)
122 {
123 	uint32		offset = 0;
124 	int			i;
125 
126 	/*
127 	 * Start offset of this entry is equal to the end offset of the previous
128 	 * entry.  Walk backwards to the most recent entry stored as an end
129 	 * offset, returning that offset plus any lengths in between.
130 	 */
131 	for (i = index - 1; i >= 0; i--)
132 	{
133 		offset += JBE_OFFLENFLD(jc->children[i]);
134 		if (JBE_HAS_OFF(jc->children[i]))
135 			break;
136 	}
137 
138 	return offset;
139 }
140 
141 /*
142  * Get the length of the variable-length portion of a Jsonb node.
143  * The node is identified by index within the container's JEntry array.
144  */
145 uint32
getJsonbLength(const JsonbContainer * jc,int index)146 getJsonbLength(const JsonbContainer *jc, int index)
147 {
148 	uint32		off;
149 	uint32		len;
150 
151 	/*
152 	 * If the length is stored directly in the JEntry, just return it.
153 	 * Otherwise, get the begin offset of the entry, and subtract that from
154 	 * the stored end+1 offset.
155 	 */
156 	if (JBE_HAS_OFF(jc->children[index]))
157 	{
158 		off = getJsonbOffset(jc, index);
159 		len = JBE_OFFLENFLD(jc->children[index]) - off;
160 	}
161 	else
162 		len = JBE_OFFLENFLD(jc->children[index]);
163 
164 	return len;
165 }
166 
167 /*
168  * BT comparator worker function.  Returns an integer less than, equal to, or
169  * greater than zero, indicating whether a is less than, equal to, or greater
170  * than b.  Consistent with the requirements for a B-Tree operator class
171  *
172  * Strings are compared lexically, in contrast with other places where we use a
173  * much simpler comparator logic for searching through Strings.  Since this is
174  * called from B-Tree support function 1, we're careful about not leaking
175  * memory here.
176  */
177 int
compareJsonbContainers(JsonbContainer * a,JsonbContainer * b)178 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
179 {
180 	JsonbIterator *ita,
181 			   *itb;
182 	int			res = 0;
183 
184 	ita = JsonbIteratorInit(a);
185 	itb = JsonbIteratorInit(b);
186 
187 	do
188 	{
189 		JsonbValue	va,
190 					vb;
191 		JsonbIteratorToken ra,
192 					rb;
193 
194 		ra = JsonbIteratorNext(&ita, &va, false);
195 		rb = JsonbIteratorNext(&itb, &vb, false);
196 
197 		if (ra == rb)
198 		{
199 			if (ra == WJB_DONE)
200 			{
201 				/* Decisively equal */
202 				break;
203 			}
204 
205 			if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
206 			{
207 				/*
208 				 * There is no array or object to compare at this stage of
209 				 * processing.  jbvArray/jbvObject values are compared
210 				 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
211 				 * tokens.
212 				 */
213 				continue;
214 			}
215 
216 			if (va.type == vb.type)
217 			{
218 				switch (va.type)
219 				{
220 					case jbvString:
221 					case jbvNull:
222 					case jbvNumeric:
223 					case jbvBool:
224 						res = compareJsonbScalarValue(&va, &vb);
225 						break;
226 					case jbvArray:
227 
228 						/*
229 						 * This could be a "raw scalar" pseudo array.  That's
230 						 * a special case here though, since we still want the
231 						 * general type-based comparisons to apply, and as far
232 						 * as we're concerned a pseudo array is just a scalar.
233 						 */
234 						if (va.val.array.rawScalar != vb.val.array.rawScalar)
235 							res = (va.val.array.rawScalar) ? -1 : 1;
236 						if (va.val.array.nElems != vb.val.array.nElems)
237 							res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
238 						break;
239 					case jbvObject:
240 						if (va.val.object.nPairs != vb.val.object.nPairs)
241 							res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
242 						break;
243 					case jbvBinary:
244 						elog(ERROR, "unexpected jbvBinary value");
245 				}
246 			}
247 			else
248 			{
249 				/* Type-defined order */
250 				res = (va.type > vb.type) ? 1 : -1;
251 			}
252 		}
253 		else
254 		{
255 			/*
256 			 * It's safe to assume that the types differed, and that the va
257 			 * and vb values passed were set.
258 			 *
259 			 * If the two values were of the same container type, then there'd
260 			 * have been a chance to observe the variation in the number of
261 			 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
262 			 * either two heterogeneously-typed containers, or a container and
263 			 * some scalar type.
264 			 *
265 			 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
266 			 * cases here, because we would have seen the corresponding
267 			 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
268 			 * concluded that they don't match.
269 			 */
270 			Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
271 			Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
272 
273 			Assert(va.type != vb.type);
274 			Assert(va.type != jbvBinary);
275 			Assert(vb.type != jbvBinary);
276 			/* Type-defined order */
277 			res = (va.type > vb.type) ? 1 : -1;
278 		}
279 	}
280 	while (res == 0);
281 
282 	while (ita != NULL)
283 	{
284 		JsonbIterator *i = ita->parent;
285 
286 		pfree(ita);
287 		ita = i;
288 	}
289 	while (itb != NULL)
290 	{
291 		JsonbIterator *i = itb->parent;
292 
293 		pfree(itb);
294 		itb = i;
295 	}
296 
297 	return res;
298 }
299 
300 /*
301  * Find value in object (i.e. the "value" part of some key/value pair in an
302  * object), or find a matching element if we're looking through an array.  Do
303  * so on the basis of equality of the object keys only, or alternatively
304  * element values only, with a caller-supplied value "key".  The "flags"
305  * argument allows the caller to specify which container types are of interest.
306  *
307  * This exported utility function exists to facilitate various cases concerned
308  * with "containment".  If asked to look through an object, the caller had
309  * better pass a Jsonb String, because their keys can only be strings.
310  * Otherwise, for an array, any type of JsonbValue will do.
311  *
312  * In order to proceed with the search, it is necessary for callers to have
313  * both specified an interest in exactly one particular container type with an
314  * appropriate flag, as well as having the pointed-to Jsonb container be of
315  * one of those same container types at the top level. (Actually, we just do
316  * whichever makes sense to save callers the trouble of figuring it out - at
317  * most one can make sense, because the container either points to an array
318  * (possibly a "raw scalar" pseudo array) or an object.)
319  *
320  * Note that we can return a jbvBinary JsonbValue if this is called on an
321  * object, but we never do so on an array.  If the caller asks to look through
322  * a container type that is not of the type pointed to by the container,
323  * immediately fall through and return NULL.  If we cannot find the value,
324  * return NULL.  Otherwise, return palloc()'d copy of value.
325  */
326 JsonbValue *
findJsonbValueFromContainer(JsonbContainer * container,uint32 flags,JsonbValue * key)327 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
328 							JsonbValue *key)
329 {
330 	JEntry	   *children = container->children;
331 	int			count = JsonContainerSize(container);
332 	JsonbValue *result;
333 
334 	Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
335 
336 	/* Quick out without a palloc cycle if object/array is empty */
337 	if (count <= 0)
338 		return NULL;
339 
340 	result = palloc(sizeof(JsonbValue));
341 
342 	if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
343 	{
344 		char	   *base_addr = (char *) (children + count);
345 		uint32		offset = 0;
346 		int			i;
347 
348 		for (i = 0; i < count; i++)
349 		{
350 			fillJsonbValue(container, i, base_addr, offset, result);
351 
352 			if (key->type == result->type)
353 			{
354 				if (equalsJsonbScalarValue(key, result))
355 					return result;
356 			}
357 
358 			JBE_ADVANCE_OFFSET(offset, children[i]);
359 		}
360 	}
361 	else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
362 	{
363 		/* Since this is an object, account for *Pairs* of Jentrys */
364 		char	   *base_addr = (char *) (children + count * 2);
365 		uint32		stopLow = 0,
366 					stopHigh = count;
367 
368 		/* Object key passed by caller must be a string */
369 		Assert(key->type == jbvString);
370 
371 		/* Binary search on object/pair keys *only* */
372 		while (stopLow < stopHigh)
373 		{
374 			uint32		stopMiddle;
375 			int			difference;
376 			JsonbValue	candidate;
377 
378 			stopMiddle = stopLow + (stopHigh - stopLow) / 2;
379 
380 			candidate.type = jbvString;
381 			candidate.val.string.val =
382 				base_addr + getJsonbOffset(container, stopMiddle);
383 			candidate.val.string.len = getJsonbLength(container, stopMiddle);
384 
385 			difference = lengthCompareJsonbStringValue(&candidate, key);
386 
387 			if (difference == 0)
388 			{
389 				/* Found our key, return corresponding value */
390 				int			index = stopMiddle + count;
391 
392 				fillJsonbValue(container, index, base_addr,
393 							   getJsonbOffset(container, index),
394 							   result);
395 
396 				return result;
397 			}
398 			else
399 			{
400 				if (difference < 0)
401 					stopLow = stopMiddle + 1;
402 				else
403 					stopHigh = stopMiddle;
404 			}
405 		}
406 	}
407 
408 	/* Not found */
409 	pfree(result);
410 	return NULL;
411 }
412 
413 /*
414  * Get i-th value of a Jsonb array.
415  *
416  * Returns palloc()'d copy of the value, or NULL if it does not exist.
417  */
418 JsonbValue *
getIthJsonbValueFromContainer(JsonbContainer * container,uint32 i)419 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
420 {
421 	JsonbValue *result;
422 	char	   *base_addr;
423 	uint32		nelements;
424 
425 	if (!JsonContainerIsArray(container))
426 		elog(ERROR, "not a jsonb array");
427 
428 	nelements = JsonContainerSize(container);
429 	base_addr = (char *) &container->children[nelements];
430 
431 	if (i >= nelements)
432 		return NULL;
433 
434 	result = palloc(sizeof(JsonbValue));
435 
436 	fillJsonbValue(container, i, base_addr,
437 				   getJsonbOffset(container, i),
438 				   result);
439 
440 	return result;
441 }
442 
443 /*
444  * A helper function to fill in a JsonbValue to represent an element of an
445  * array, or a key or value of an object.
446  *
447  * The node's JEntry is at container->children[index], and its variable-length
448  * data is at base_addr + offset.  We make the caller determine the offset
449  * since in many cases the caller can amortize that work across multiple
450  * children.  When it can't, it can just call getJsonbOffset().
451  *
452  * A nested array or object will be returned as jbvBinary, ie. it won't be
453  * expanded.
454  */
455 static void
fillJsonbValue(JsonbContainer * container,int index,char * base_addr,uint32 offset,JsonbValue * result)456 fillJsonbValue(JsonbContainer *container, int index,
457 			   char *base_addr, uint32 offset,
458 			   JsonbValue *result)
459 {
460 	JEntry		entry = container->children[index];
461 
462 	if (JBE_ISNULL(entry))
463 	{
464 		result->type = jbvNull;
465 	}
466 	else if (JBE_ISSTRING(entry))
467 	{
468 		result->type = jbvString;
469 		result->val.string.val = base_addr + offset;
470 		result->val.string.len = getJsonbLength(container, index);
471 		Assert(result->val.string.len >= 0);
472 	}
473 	else if (JBE_ISNUMERIC(entry))
474 	{
475 		result->type = jbvNumeric;
476 		result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
477 	}
478 	else if (JBE_ISBOOL_TRUE(entry))
479 	{
480 		result->type = jbvBool;
481 		result->val.boolean = true;
482 	}
483 	else if (JBE_ISBOOL_FALSE(entry))
484 	{
485 		result->type = jbvBool;
486 		result->val.boolean = false;
487 	}
488 	else
489 	{
490 		Assert(JBE_ISCONTAINER(entry));
491 		result->type = jbvBinary;
492 		/* Remove alignment padding from data pointer and length */
493 		result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
494 		result->val.binary.len = getJsonbLength(container, index) -
495 			(INTALIGN(offset) - offset);
496 	}
497 }
498 
499 /*
500  * Push JsonbValue into JsonbParseState.
501  *
502  * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
503  * JsonbValue to a Jsonb.
504  *
505  * Initial state of *JsonbParseState is NULL, since it'll be allocated here
506  * originally (caller will get JsonbParseState back by reference).
507  *
508  * Only sequential tokens pertaining to non-container types should pass a
509  * JsonbValue.  There is one exception -- WJB_BEGIN_ARRAY callers may pass a
510  * "raw scalar" pseudo array to append it - the actual scalar should be passed
511  * next and it will be added as the only member of the array.
512  *
513  * Values of type jvbBinary, which are rolled up arrays and objects,
514  * are unpacked before being added to the result.
515  */
516 JsonbValue *
pushJsonbValue(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * jbval)517 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
518 			   JsonbValue *jbval)
519 {
520 	JsonbIterator *it;
521 	JsonbValue *res = NULL;
522 	JsonbValue	v;
523 	JsonbIteratorToken tok;
524 
525 	if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
526 		jbval->type != jbvBinary)
527 	{
528 		/* drop through */
529 		return pushJsonbValueScalar(pstate, seq, jbval);
530 	}
531 
532 	/* unpack the binary and add each piece to the pstate */
533 	it = JsonbIteratorInit(jbval->val.binary.data);
534 	while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
535 		res = pushJsonbValueScalar(pstate, tok,
536 								   tok < WJB_BEGIN_ARRAY ? &v : NULL);
537 
538 	return res;
539 }
540 
541 /*
542  * Do the actual pushing, with only scalar or pseudo-scalar-array values
543  * accepted.
544  */
545 static JsonbValue *
pushJsonbValueScalar(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * scalarVal)546 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
547 					 JsonbValue *scalarVal)
548 {
549 	JsonbValue *result = NULL;
550 
551 	switch (seq)
552 	{
553 		case WJB_BEGIN_ARRAY:
554 			Assert(!scalarVal || scalarVal->val.array.rawScalar);
555 			*pstate = pushState(pstate);
556 			result = &(*pstate)->contVal;
557 			(*pstate)->contVal.type = jbvArray;
558 			(*pstate)->contVal.val.array.nElems = 0;
559 			(*pstate)->contVal.val.array.rawScalar = (scalarVal &&
560 													  scalarVal->val.array.rawScalar);
561 			if (scalarVal && scalarVal->val.array.nElems > 0)
562 			{
563 				/* Assume that this array is still really a scalar */
564 				Assert(scalarVal->type == jbvArray);
565 				(*pstate)->size = scalarVal->val.array.nElems;
566 			}
567 			else
568 			{
569 				(*pstate)->size = 4;
570 			}
571 			(*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
572 														(*pstate)->size);
573 			break;
574 		case WJB_BEGIN_OBJECT:
575 			Assert(!scalarVal);
576 			*pstate = pushState(pstate);
577 			result = &(*pstate)->contVal;
578 			(*pstate)->contVal.type = jbvObject;
579 			(*pstate)->contVal.val.object.nPairs = 0;
580 			(*pstate)->size = 4;
581 			(*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
582 														 (*pstate)->size);
583 			break;
584 		case WJB_KEY:
585 			Assert(scalarVal->type == jbvString);
586 			appendKey(*pstate, scalarVal);
587 			break;
588 		case WJB_VALUE:
589 			Assert(IsAJsonbScalar(scalarVal));
590 			appendValue(*pstate, scalarVal);
591 			break;
592 		case WJB_ELEM:
593 			Assert(IsAJsonbScalar(scalarVal));
594 			appendElement(*pstate, scalarVal);
595 			break;
596 		case WJB_END_OBJECT:
597 			uniqueifyJsonbObject(&(*pstate)->contVal);
598 			/* fall through! */
599 		case WJB_END_ARRAY:
600 			/* Steps here common to WJB_END_OBJECT case */
601 			Assert(!scalarVal);
602 			result = &(*pstate)->contVal;
603 
604 			/*
605 			 * Pop stack and push current array/object as value in parent
606 			 * array/object
607 			 */
608 			*pstate = (*pstate)->next;
609 			if (*pstate)
610 			{
611 				switch ((*pstate)->contVal.type)
612 				{
613 					case jbvArray:
614 						appendElement(*pstate, result);
615 						break;
616 					case jbvObject:
617 						appendValue(*pstate, result);
618 						break;
619 					default:
620 						elog(ERROR, "invalid jsonb container type");
621 				}
622 			}
623 			break;
624 		default:
625 			elog(ERROR, "unrecognized jsonb sequential processing token");
626 	}
627 
628 	return result;
629 }
630 
631 /*
632  * pushJsonbValue() worker:  Iteration-like forming of Jsonb
633  */
634 static JsonbParseState *
pushState(JsonbParseState ** pstate)635 pushState(JsonbParseState **pstate)
636 {
637 	JsonbParseState *ns = palloc(sizeof(JsonbParseState));
638 
639 	ns->next = *pstate;
640 	return ns;
641 }
642 
643 /*
644  * pushJsonbValue() worker:  Append a pair key to state when generating a Jsonb
645  */
646 static void
appendKey(JsonbParseState * pstate,JsonbValue * string)647 appendKey(JsonbParseState *pstate, JsonbValue *string)
648 {
649 	JsonbValue *object = &pstate->contVal;
650 
651 	Assert(object->type == jbvObject);
652 	Assert(string->type == jbvString);
653 
654 	if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
655 		ereport(ERROR,
656 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
657 				 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
658 						JSONB_MAX_PAIRS)));
659 
660 	if (object->val.object.nPairs >= pstate->size)
661 	{
662 		pstate->size *= 2;
663 		object->val.object.pairs = repalloc(object->val.object.pairs,
664 											sizeof(JsonbPair) * pstate->size);
665 	}
666 
667 	object->val.object.pairs[object->val.object.nPairs].key = *string;
668 	object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
669 }
670 
671 /*
672  * pushJsonbValue() worker:  Append a pair value to state when generating a
673  * Jsonb
674  */
675 static void
appendValue(JsonbParseState * pstate,JsonbValue * scalarVal)676 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
677 {
678 	JsonbValue *object = &pstate->contVal;
679 
680 	Assert(object->type == jbvObject);
681 
682 	object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
683 }
684 
685 /*
686  * pushJsonbValue() worker:  Append an element to state when generating a Jsonb
687  */
688 static void
appendElement(JsonbParseState * pstate,JsonbValue * scalarVal)689 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
690 {
691 	JsonbValue *array = &pstate->contVal;
692 
693 	Assert(array->type == jbvArray);
694 
695 	if (array->val.array.nElems >= JSONB_MAX_ELEMS)
696 		ereport(ERROR,
697 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
698 				 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
699 						JSONB_MAX_ELEMS)));
700 
701 	if (array->val.array.nElems >= pstate->size)
702 	{
703 		pstate->size *= 2;
704 		array->val.array.elems = repalloc(array->val.array.elems,
705 										  sizeof(JsonbValue) * pstate->size);
706 	}
707 
708 	array->val.array.elems[array->val.array.nElems++] = *scalarVal;
709 }
710 
711 /*
712  * Given a JsonbContainer, expand to JsonbIterator to iterate over items
713  * fully expanded to in-memory representation for manipulation.
714  *
715  * See JsonbIteratorNext() for notes on memory management.
716  */
717 JsonbIterator *
JsonbIteratorInit(JsonbContainer * container)718 JsonbIteratorInit(JsonbContainer *container)
719 {
720 	return iteratorFromContainer(container, NULL);
721 }
722 
723 /*
724  * Get next JsonbValue while iterating
725  *
726  * Caller should initially pass their own, original iterator.  They may get
727  * back a child iterator palloc()'d here instead.  The function can be relied
728  * on to free those child iterators, lest the memory allocated for highly
729  * nested objects become unreasonable, but only if callers don't end iteration
730  * early (by breaking upon having found something in a search, for example).
731  *
732  * Callers in such a scenario, that are particularly sensitive to leaking
733  * memory in a long-lived context may walk the ancestral tree from the final
734  * iterator we left them with to its oldest ancestor, pfree()ing as they go.
735  * They do not have to free any other memory previously allocated for iterators
736  * but not accessible as direct ancestors of the iterator they're last passed
737  * back.
738  *
739  * Returns "Jsonb sequential processing" token value.  Iterator "state"
740  * reflects the current stage of the process in a less granular fashion, and is
741  * mostly used here to track things internally with respect to particular
742  * iterators.
743  *
744  * Clients of this function should not have to handle any jbvBinary values
745  * (since recursive calls will deal with this), provided skipNested is false.
746  * It is our job to expand the jbvBinary representation without bothering them
747  * with it.  However, clients should not take it upon themselves to touch array
748  * or Object element/pair buffers, since their element/pair pointers are
749  * garbage.  Also, *val will not be set when returning WJB_END_ARRAY or
750  * WJB_END_OBJECT, on the assumption that it's only useful to access values
751  * when recursing in.
752  */
753 JsonbIteratorToken
JsonbIteratorNext(JsonbIterator ** it,JsonbValue * val,bool skipNested)754 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
755 {
756 	if (*it == NULL)
757 		return WJB_DONE;
758 
759 	/*
760 	 * When stepping into a nested container, we jump back here to start
761 	 * processing the child. We will not recurse further in one call, because
762 	 * processing the child will always begin in JBI_ARRAY_START or
763 	 * JBI_OBJECT_START state.
764 	 */
765 recurse:
766 	switch ((*it)->state)
767 	{
768 		case JBI_ARRAY_START:
769 			/* Set v to array on first array call */
770 			val->type = jbvArray;
771 			val->val.array.nElems = (*it)->nElems;
772 
773 			/*
774 			 * v->val.array.elems is not actually set, because we aren't doing
775 			 * a full conversion
776 			 */
777 			val->val.array.rawScalar = (*it)->isScalar;
778 			(*it)->curIndex = 0;
779 			(*it)->curDataOffset = 0;
780 			(*it)->curValueOffset = 0;	/* not actually used */
781 			/* Set state for next call */
782 			(*it)->state = JBI_ARRAY_ELEM;
783 			return WJB_BEGIN_ARRAY;
784 
785 		case JBI_ARRAY_ELEM:
786 			if ((*it)->curIndex >= (*it)->nElems)
787 			{
788 				/*
789 				 * All elements within array already processed.  Report this
790 				 * to caller, and give it back original parent iterator (which
791 				 * independently tracks iteration progress at its level of
792 				 * nesting).
793 				 */
794 				*it = freeAndGetParent(*it);
795 				return WJB_END_ARRAY;
796 			}
797 
798 			fillJsonbValue((*it)->container, (*it)->curIndex,
799 						   (*it)->dataProper, (*it)->curDataOffset,
800 						   val);
801 
802 			JBE_ADVANCE_OFFSET((*it)->curDataOffset,
803 							   (*it)->children[(*it)->curIndex]);
804 			(*it)->curIndex++;
805 
806 			if (!IsAJsonbScalar(val) && !skipNested)
807 			{
808 				/* Recurse into container. */
809 				*it = iteratorFromContainer(val->val.binary.data, *it);
810 				goto recurse;
811 			}
812 			else
813 			{
814 				/*
815 				 * Scalar item in array, or a container and caller didn't want
816 				 * us to recurse into it.
817 				 */
818 				return WJB_ELEM;
819 			}
820 
821 		case JBI_OBJECT_START:
822 			/* Set v to object on first object call */
823 			val->type = jbvObject;
824 			val->val.object.nPairs = (*it)->nElems;
825 
826 			/*
827 			 * v->val.object.pairs is not actually set, because we aren't
828 			 * doing a full conversion
829 			 */
830 			(*it)->curIndex = 0;
831 			(*it)->curDataOffset = 0;
832 			(*it)->curValueOffset = getJsonbOffset((*it)->container,
833 												   (*it)->nElems);
834 			/* Set state for next call */
835 			(*it)->state = JBI_OBJECT_KEY;
836 			return WJB_BEGIN_OBJECT;
837 
838 		case JBI_OBJECT_KEY:
839 			if ((*it)->curIndex >= (*it)->nElems)
840 			{
841 				/*
842 				 * All pairs within object already processed.  Report this to
843 				 * caller, and give it back original containing iterator
844 				 * (which independently tracks iteration progress at its level
845 				 * of nesting).
846 				 */
847 				*it = freeAndGetParent(*it);
848 				return WJB_END_OBJECT;
849 			}
850 			else
851 			{
852 				/* Return key of a key/value pair.  */
853 				fillJsonbValue((*it)->container, (*it)->curIndex,
854 							   (*it)->dataProper, (*it)->curDataOffset,
855 							   val);
856 				if (val->type != jbvString)
857 					elog(ERROR, "unexpected jsonb type as object key");
858 
859 				/* Set state for next call */
860 				(*it)->state = JBI_OBJECT_VALUE;
861 				return WJB_KEY;
862 			}
863 
864 		case JBI_OBJECT_VALUE:
865 			/* Set state for next call */
866 			(*it)->state = JBI_OBJECT_KEY;
867 
868 			fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
869 						   (*it)->dataProper, (*it)->curValueOffset,
870 						   val);
871 
872 			JBE_ADVANCE_OFFSET((*it)->curDataOffset,
873 							   (*it)->children[(*it)->curIndex]);
874 			JBE_ADVANCE_OFFSET((*it)->curValueOffset,
875 							   (*it)->children[(*it)->curIndex + (*it)->nElems]);
876 			(*it)->curIndex++;
877 
878 			/*
879 			 * Value may be a container, in which case we recurse with new,
880 			 * child iterator (unless the caller asked not to, by passing
881 			 * skipNested).
882 			 */
883 			if (!IsAJsonbScalar(val) && !skipNested)
884 			{
885 				*it = iteratorFromContainer(val->val.binary.data, *it);
886 				goto recurse;
887 			}
888 			else
889 				return WJB_VALUE;
890 	}
891 
892 	elog(ERROR, "invalid iterator state");
893 	return -1;
894 }
895 
896 /*
897  * Initialize an iterator for iterating all elements in a container.
898  */
899 static JsonbIterator *
iteratorFromContainer(JsonbContainer * container,JsonbIterator * parent)900 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
901 {
902 	JsonbIterator *it;
903 
904 	it = palloc0(sizeof(JsonbIterator));
905 	it->container = container;
906 	it->parent = parent;
907 	it->nElems = JsonContainerSize(container);
908 
909 	/* Array starts just after header */
910 	it->children = container->children;
911 
912 	switch (container->header & (JB_FARRAY | JB_FOBJECT))
913 	{
914 		case JB_FARRAY:
915 			it->dataProper =
916 				(char *) it->children + it->nElems * sizeof(JEntry);
917 			it->isScalar = JsonContainerIsScalar(container);
918 			/* This is either a "raw scalar", or an array */
919 			Assert(!it->isScalar || it->nElems == 1);
920 
921 			it->state = JBI_ARRAY_START;
922 			break;
923 
924 		case JB_FOBJECT:
925 			it->dataProper =
926 				(char *) it->children + it->nElems * sizeof(JEntry) * 2;
927 			it->state = JBI_OBJECT_START;
928 			break;
929 
930 		default:
931 			elog(ERROR, "unknown type of jsonb container");
932 	}
933 
934 	return it;
935 }
936 
937 /*
938  * JsonbIteratorNext() worker:	Return parent, while freeing memory for current
939  * iterator
940  */
941 static JsonbIterator *
freeAndGetParent(JsonbIterator * it)942 freeAndGetParent(JsonbIterator *it)
943 {
944 	JsonbIterator *v = it->parent;
945 
946 	pfree(it);
947 	return v;
948 }
949 
950 /*
951  * Worker for "contains" operator's function
952  *
953  * Formally speaking, containment is top-down, unordered subtree isomorphism.
954  *
955  * Takes iterators that belong to some container type.  These iterators
956  * "belong" to those values in the sense that they've just been initialized in
957  * respect of them by the caller (perhaps in a nested fashion).
958  *
959  * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
960  * We determine if mContained is contained within val.
961  */
962 bool
JsonbDeepContains(JsonbIterator ** val,JsonbIterator ** mContained)963 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
964 {
965 	JsonbValue	vval,
966 				vcontained;
967 	JsonbIteratorToken rval,
968 				rcont;
969 
970 	/*
971 	 * Guard against stack overflow due to overly complex Jsonb.
972 	 *
973 	 * Functions called here independently take this precaution, but that
974 	 * might not be sufficient since this is also a recursive function.
975 	 */
976 	check_stack_depth();
977 
978 	rval = JsonbIteratorNext(val, &vval, false);
979 	rcont = JsonbIteratorNext(mContained, &vcontained, false);
980 
981 	if (rval != rcont)
982 	{
983 		/*
984 		 * The differing return values can immediately be taken as indicating
985 		 * two differing container types at this nesting level, which is
986 		 * sufficient reason to give up entirely (but it should be the case
987 		 * that they're both some container type).
988 		 */
989 		Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
990 		Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
991 		return false;
992 	}
993 	else if (rcont == WJB_BEGIN_OBJECT)
994 	{
995 		Assert(vval.type == jbvObject);
996 		Assert(vcontained.type == jbvObject);
997 
998 		/*
999 		 * If the lhs has fewer pairs than the rhs, it can't possibly contain
1000 		 * the rhs.  (This conclusion is safe only because we de-duplicate
1001 		 * keys in all Jsonb objects; thus there can be no corresponding
1002 		 * optimization in the array case.)  The case probably won't arise
1003 		 * often, but since it's such a cheap check we may as well make it.
1004 		 */
1005 		if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1006 			return false;
1007 
1008 		/* Work through rhs "is it contained within?" object */
1009 		for (;;)
1010 		{
1011 			JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1012 
1013 			rcont = JsonbIteratorNext(mContained, &vcontained, false);
1014 
1015 			/*
1016 			 * When we get through caller's rhs "is it contained within?"
1017 			 * object without failing to find one of its values, it's
1018 			 * contained.
1019 			 */
1020 			if (rcont == WJB_END_OBJECT)
1021 				return true;
1022 
1023 			Assert(rcont == WJB_KEY);
1024 
1025 			/* First, find value by key... */
1026 			lhsVal = findJsonbValueFromContainer((*val)->container,
1027 												 JB_FOBJECT,
1028 												 &vcontained);
1029 
1030 			if (!lhsVal)
1031 				return false;
1032 
1033 			/*
1034 			 * ...at this stage it is apparent that there is at least a key
1035 			 * match for this rhs pair.
1036 			 */
1037 			rcont = JsonbIteratorNext(mContained, &vcontained, true);
1038 
1039 			Assert(rcont == WJB_VALUE);
1040 
1041 			/*
1042 			 * Compare rhs pair's value with lhs pair's value just found using
1043 			 * key
1044 			 */
1045 			if (lhsVal->type != vcontained.type)
1046 			{
1047 				return false;
1048 			}
1049 			else if (IsAJsonbScalar(lhsVal))
1050 			{
1051 				if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1052 					return false;
1053 			}
1054 			else
1055 			{
1056 				/* Nested container value (object or array) */
1057 				JsonbIterator *nestval,
1058 						   *nestContained;
1059 
1060 				Assert(lhsVal->type == jbvBinary);
1061 				Assert(vcontained.type == jbvBinary);
1062 
1063 				nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1064 				nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1065 
1066 				/*
1067 				 * Match "value" side of rhs datum object's pair recursively.
1068 				 * It's a nested structure.
1069 				 *
1070 				 * Note that nesting still has to "match up" at the right
1071 				 * nesting sub-levels.  However, there need only be zero or
1072 				 * more matching pairs (or elements) at each nesting level
1073 				 * (provided the *rhs* pairs/elements *all* match on each
1074 				 * level), which enables searching nested structures for a
1075 				 * single String or other primitive type sub-datum quite
1076 				 * effectively (provided the user constructed the rhs nested
1077 				 * structure such that we "know where to look").
1078 				 *
1079 				 * In other words, the mapping of container nodes in the rhs
1080 				 * "vcontained" Jsonb to internal nodes on the lhs is
1081 				 * injective, and parent-child edges on the rhs must be mapped
1082 				 * to parent-child edges on the lhs to satisfy the condition
1083 				 * of containment (plus of course the mapped nodes must be
1084 				 * equal).
1085 				 */
1086 				if (!JsonbDeepContains(&nestval, &nestContained))
1087 					return false;
1088 			}
1089 		}
1090 	}
1091 	else if (rcont == WJB_BEGIN_ARRAY)
1092 	{
1093 		JsonbValue *lhsConts = NULL;
1094 		uint32		nLhsElems = vval.val.array.nElems;
1095 
1096 		Assert(vval.type == jbvArray);
1097 		Assert(vcontained.type == jbvArray);
1098 
1099 		/*
1100 		 * Handle distinction between "raw scalar" pseudo arrays, and real
1101 		 * arrays.
1102 		 *
1103 		 * A raw scalar may contain another raw scalar, and an array may
1104 		 * contain a raw scalar, but a raw scalar may not contain an array. We
1105 		 * don't do something like this for the object case, since objects can
1106 		 * only contain pairs, never raw scalars (a pair is represented by an
1107 		 * rhs object argument with a single contained pair).
1108 		 */
1109 		if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1110 			return false;
1111 
1112 		/* Work through rhs "is it contained within?" array */
1113 		for (;;)
1114 		{
1115 			rcont = JsonbIteratorNext(mContained, &vcontained, true);
1116 
1117 			/*
1118 			 * When we get through caller's rhs "is it contained within?"
1119 			 * array without failing to find one of its values, it's
1120 			 * contained.
1121 			 */
1122 			if (rcont == WJB_END_ARRAY)
1123 				return true;
1124 
1125 			Assert(rcont == WJB_ELEM);
1126 
1127 			if (IsAJsonbScalar(&vcontained))
1128 			{
1129 				if (!findJsonbValueFromContainer((*val)->container,
1130 												 JB_FARRAY,
1131 												 &vcontained))
1132 					return false;
1133 			}
1134 			else
1135 			{
1136 				uint32		i;
1137 
1138 				/*
1139 				 * If this is first container found in rhs array (at this
1140 				 * depth), initialize temp lhs array of containers
1141 				 */
1142 				if (lhsConts == NULL)
1143 				{
1144 					uint32		j = 0;
1145 
1146 					/* Make room for all possible values */
1147 					lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1148 
1149 					for (i = 0; i < nLhsElems; i++)
1150 					{
1151 						/* Store all lhs elements in temp array */
1152 						rcont = JsonbIteratorNext(val, &vval, true);
1153 						Assert(rcont == WJB_ELEM);
1154 
1155 						if (vval.type == jbvBinary)
1156 							lhsConts[j++] = vval;
1157 					}
1158 
1159 					/* No container elements in temp array, so give up now */
1160 					if (j == 0)
1161 						return false;
1162 
1163 					/* We may have only partially filled array */
1164 					nLhsElems = j;
1165 				}
1166 
1167 				/* XXX: Nested array containment is O(N^2) */
1168 				for (i = 0; i < nLhsElems; i++)
1169 				{
1170 					/* Nested container value (object or array) */
1171 					JsonbIterator *nestval,
1172 							   *nestContained;
1173 					bool		contains;
1174 
1175 					nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1176 					nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1177 
1178 					contains = JsonbDeepContains(&nestval, &nestContained);
1179 
1180 					if (nestval)
1181 						pfree(nestval);
1182 					if (nestContained)
1183 						pfree(nestContained);
1184 					if (contains)
1185 						break;
1186 				}
1187 
1188 				/*
1189 				 * Report rhs container value is not contained if couldn't
1190 				 * match rhs container to *some* lhs cont
1191 				 */
1192 				if (i == nLhsElems)
1193 					return false;
1194 			}
1195 		}
1196 	}
1197 	else
1198 	{
1199 		elog(ERROR, "invalid jsonb container type");
1200 	}
1201 
1202 	elog(ERROR, "unexpectedly fell off end of jsonb container");
1203 	return false;
1204 }
1205 
1206 /*
1207  * Hash a JsonbValue scalar value, mixing the hash value into an existing
1208  * hash provided by the caller.
1209  *
1210  * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1211  * flags.
1212  */
1213 void
JsonbHashScalarValue(const JsonbValue * scalarVal,uint32 * hash)1214 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1215 {
1216 	uint32		tmp;
1217 
1218 	/* Compute hash value for scalarVal */
1219 	switch (scalarVal->type)
1220 	{
1221 		case jbvNull:
1222 			tmp = 0x01;
1223 			break;
1224 		case jbvString:
1225 			tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1226 										  scalarVal->val.string.len));
1227 			break;
1228 		case jbvNumeric:
1229 			/* Must hash equal numerics to equal hash codes */
1230 			tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1231 													 NumericGetDatum(scalarVal->val.numeric)));
1232 			break;
1233 		case jbvBool:
1234 			tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1235 
1236 			break;
1237 		default:
1238 			elog(ERROR, "invalid jsonb scalar type");
1239 			tmp = 0;			/* keep compiler quiet */
1240 			break;
1241 	}
1242 
1243 	/*
1244 	 * Combine hash values of successive keys, values and elements by rotating
1245 	 * the previous value left 1 bit, then XOR'ing in the new
1246 	 * key/value/element's hash value.
1247 	 */
1248 	*hash = (*hash << 1) | (*hash >> 31);
1249 	*hash ^= tmp;
1250 }
1251 
1252 /*
1253  * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
1254  * JsonbHashScalarValue.
1255  */
1256 void
JsonbHashScalarValueExtended(const JsonbValue * scalarVal,uint64 * hash,uint64 seed)1257 JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
1258 							 uint64 seed)
1259 {
1260 	uint64		tmp;
1261 
1262 	switch (scalarVal->type)
1263 	{
1264 		case jbvNull:
1265 			tmp = seed + 0x01;
1266 			break;
1267 		case jbvString:
1268 			tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
1269 												   scalarVal->val.string.len,
1270 												   seed));
1271 			break;
1272 		case jbvNumeric:
1273 			tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
1274 													 NumericGetDatum(scalarVal->val.numeric),
1275 													 UInt64GetDatum(seed)));
1276 			break;
1277 		case jbvBool:
1278 			if (seed)
1279 				tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
1280 														 BoolGetDatum(scalarVal->val.boolean),
1281 														 UInt64GetDatum(seed)));
1282 			else
1283 				tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1284 
1285 			break;
1286 		default:
1287 			elog(ERROR, "invalid jsonb scalar type");
1288 			break;
1289 	}
1290 
1291 	*hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
1292 	*hash ^= tmp;
1293 }
1294 
1295 /*
1296  * Are two scalar JsonbValues of the same type a and b equal?
1297  */
1298 static bool
equalsJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1299 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1300 {
1301 	if (aScalar->type == bScalar->type)
1302 	{
1303 		switch (aScalar->type)
1304 		{
1305 			case jbvNull:
1306 				return true;
1307 			case jbvString:
1308 				return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1309 			case jbvNumeric:
1310 				return DatumGetBool(DirectFunctionCall2(numeric_eq,
1311 														PointerGetDatum(aScalar->val.numeric),
1312 														PointerGetDatum(bScalar->val.numeric)));
1313 			case jbvBool:
1314 				return aScalar->val.boolean == bScalar->val.boolean;
1315 
1316 			default:
1317 				elog(ERROR, "invalid jsonb scalar type");
1318 		}
1319 	}
1320 	elog(ERROR, "jsonb scalar type mismatch");
1321 	return -1;
1322 }
1323 
1324 /*
1325  * Compare two scalar JsonbValues, returning -1, 0, or 1.
1326  *
1327  * Strings are compared using the default collation.  Used by B-tree
1328  * operators, where a lexical sort order is generally expected.
1329  */
1330 static int
compareJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1331 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1332 {
1333 	if (aScalar->type == bScalar->type)
1334 	{
1335 		switch (aScalar->type)
1336 		{
1337 			case jbvNull:
1338 				return 0;
1339 			case jbvString:
1340 				return varstr_cmp(aScalar->val.string.val,
1341 								  aScalar->val.string.len,
1342 								  bScalar->val.string.val,
1343 								  bScalar->val.string.len,
1344 								  DEFAULT_COLLATION_OID);
1345 			case jbvNumeric:
1346 				return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1347 														 PointerGetDatum(aScalar->val.numeric),
1348 														 PointerGetDatum(bScalar->val.numeric)));
1349 			case jbvBool:
1350 				if (aScalar->val.boolean == bScalar->val.boolean)
1351 					return 0;
1352 				else if (aScalar->val.boolean > bScalar->val.boolean)
1353 					return 1;
1354 				else
1355 					return -1;
1356 			default:
1357 				elog(ERROR, "invalid jsonb scalar type");
1358 		}
1359 	}
1360 	elog(ERROR, "jsonb scalar type mismatch");
1361 	return -1;
1362 }
1363 
1364 
1365 /*
1366  * Functions for manipulating the resizeable buffer used by convertJsonb and
1367  * its subroutines.
1368  */
1369 
1370 /*
1371  * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1372  * Returns the offset to the reserved area. The caller is expected to fill
1373  * the reserved area later with copyToBuffer().
1374  */
1375 static int
reserveFromBuffer(StringInfo buffer,int len)1376 reserveFromBuffer(StringInfo buffer, int len)
1377 {
1378 	int			offset;
1379 
1380 	/* Make more room if needed */
1381 	enlargeStringInfo(buffer, len);
1382 
1383 	/* remember current offset */
1384 	offset = buffer->len;
1385 
1386 	/* reserve the space */
1387 	buffer->len += len;
1388 
1389 	/*
1390 	 * Keep a trailing null in place, even though it's not useful for us; it
1391 	 * seems best to preserve the invariants of StringInfos.
1392 	 */
1393 	buffer->data[buffer->len] = '\0';
1394 
1395 	return offset;
1396 }
1397 
1398 /*
1399  * Copy 'len' bytes to a previously reserved area in buffer.
1400  */
1401 static void
copyToBuffer(StringInfo buffer,int offset,const char * data,int len)1402 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1403 {
1404 	memcpy(buffer->data + offset, data, len);
1405 }
1406 
1407 /*
1408  * A shorthand for reserveFromBuffer + copyToBuffer.
1409  */
1410 static void
appendToBuffer(StringInfo buffer,const char * data,int len)1411 appendToBuffer(StringInfo buffer, const char *data, int len)
1412 {
1413 	int			offset;
1414 
1415 	offset = reserveFromBuffer(buffer, len);
1416 	copyToBuffer(buffer, offset, data, len);
1417 }
1418 
1419 
1420 /*
1421  * Append padding, so that the length of the StringInfo is int-aligned.
1422  * Returns the number of padding bytes appended.
1423  */
1424 static short
padBufferToInt(StringInfo buffer)1425 padBufferToInt(StringInfo buffer)
1426 {
1427 	int			padlen,
1428 				p,
1429 				offset;
1430 
1431 	padlen = INTALIGN(buffer->len) - buffer->len;
1432 
1433 	offset = reserveFromBuffer(buffer, padlen);
1434 
1435 	/* padlen must be small, so this is probably faster than a memset */
1436 	for (p = 0; p < padlen; p++)
1437 		buffer->data[offset + p] = '\0';
1438 
1439 	return padlen;
1440 }
1441 
1442 /*
1443  * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1444  */
1445 static Jsonb *
convertToJsonb(JsonbValue * val)1446 convertToJsonb(JsonbValue *val)
1447 {
1448 	StringInfoData buffer;
1449 	JEntry		jentry;
1450 	Jsonb	   *res;
1451 
1452 	/* Should not already have binary representation */
1453 	Assert(val->type != jbvBinary);
1454 
1455 	/* Allocate an output buffer. It will be enlarged as needed */
1456 	initStringInfo(&buffer);
1457 
1458 	/* Make room for the varlena header */
1459 	reserveFromBuffer(&buffer, VARHDRSZ);
1460 
1461 	convertJsonbValue(&buffer, &jentry, val, 0);
1462 
1463 	/*
1464 	 * Note: the JEntry of the root is discarded. Therefore the root
1465 	 * JsonbContainer struct must contain enough information to tell what kind
1466 	 * of value it is.
1467 	 */
1468 
1469 	res = (Jsonb *) buffer.data;
1470 
1471 	SET_VARSIZE(res, buffer.len);
1472 
1473 	return res;
1474 }
1475 
1476 /*
1477  * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1478  *
1479  * The JEntry header for this node is returned in *header.  It is filled in
1480  * with the length of this value and appropriate type bits.  If we wish to
1481  * store an end offset rather than a length, it is the caller's responsibility
1482  * to adjust for that.
1483  *
1484  * If the value is an array or an object, this recurses. 'level' is only used
1485  * for debugging purposes.
1486  */
1487 static void
convertJsonbValue(StringInfo buffer,JEntry * header,JsonbValue * val,int level)1488 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1489 {
1490 	check_stack_depth();
1491 
1492 	if (!val)
1493 		return;
1494 
1495 	/*
1496 	 * A JsonbValue passed as val should never have a type of jbvBinary, and
1497 	 * neither should any of its sub-components. Those values will be produced
1498 	 * by convertJsonbArray and convertJsonbObject, the results of which will
1499 	 * not be passed back to this function as an argument.
1500 	 */
1501 
1502 	if (IsAJsonbScalar(val))
1503 		convertJsonbScalar(buffer, header, val);
1504 	else if (val->type == jbvArray)
1505 		convertJsonbArray(buffer, header, val, level);
1506 	else if (val->type == jbvObject)
1507 		convertJsonbObject(buffer, header, val, level);
1508 	else
1509 		elog(ERROR, "unknown type of jsonb container to convert");
1510 }
1511 
1512 static void
convertJsonbArray(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1513 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1514 {
1515 	int			base_offset;
1516 	int			jentry_offset;
1517 	int			i;
1518 	int			totallen;
1519 	uint32		header;
1520 	int			nElems = val->val.array.nElems;
1521 
1522 	/* Remember where in the buffer this array starts. */
1523 	base_offset = buffer->len;
1524 
1525 	/* Align to 4-byte boundary (any padding counts as part of my data) */
1526 	padBufferToInt(buffer);
1527 
1528 	/*
1529 	 * Construct the header Jentry and store it in the beginning of the
1530 	 * variable-length payload.
1531 	 */
1532 	header = nElems | JB_FARRAY;
1533 	if (val->val.array.rawScalar)
1534 	{
1535 		Assert(nElems == 1);
1536 		Assert(level == 0);
1537 		header |= JB_FSCALAR;
1538 	}
1539 
1540 	appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1541 
1542 	/* Reserve space for the JEntries of the elements. */
1543 	jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1544 
1545 	totallen = 0;
1546 	for (i = 0; i < nElems; i++)
1547 	{
1548 		JsonbValue *elem = &val->val.array.elems[i];
1549 		int			len;
1550 		JEntry		meta;
1551 
1552 		/*
1553 		 * Convert element, producing a JEntry and appending its
1554 		 * variable-length data to buffer
1555 		 */
1556 		convertJsonbValue(buffer, &meta, elem, level + 1);
1557 
1558 		len = JBE_OFFLENFLD(meta);
1559 		totallen += len;
1560 
1561 		/*
1562 		 * Bail out if total variable-length data exceeds what will fit in a
1563 		 * JEntry length field.  We check this in each iteration, not just
1564 		 * once at the end, to forestall possible integer overflow.
1565 		 */
1566 		if (totallen > JENTRY_OFFLENMASK)
1567 			ereport(ERROR,
1568 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1569 					 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1570 							JENTRY_OFFLENMASK)));
1571 
1572 		/*
1573 		 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1574 		 */
1575 		if ((i % JB_OFFSET_STRIDE) == 0)
1576 			meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1577 
1578 		copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1579 		jentry_offset += sizeof(JEntry);
1580 	}
1581 
1582 	/* Total data size is everything we've appended to buffer */
1583 	totallen = buffer->len - base_offset;
1584 
1585 	/* Check length again, since we didn't include the metadata above */
1586 	if (totallen > JENTRY_OFFLENMASK)
1587 		ereport(ERROR,
1588 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1589 				 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1590 						JENTRY_OFFLENMASK)));
1591 
1592 	/* Initialize the header of this node in the container's JEntry array */
1593 	*pheader = JENTRY_ISCONTAINER | totallen;
1594 }
1595 
1596 static void
convertJsonbObject(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1597 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1598 {
1599 	int			base_offset;
1600 	int			jentry_offset;
1601 	int			i;
1602 	int			totallen;
1603 	uint32		header;
1604 	int			nPairs = val->val.object.nPairs;
1605 
1606 	/* Remember where in the buffer this object starts. */
1607 	base_offset = buffer->len;
1608 
1609 	/* Align to 4-byte boundary (any padding counts as part of my data) */
1610 	padBufferToInt(buffer);
1611 
1612 	/*
1613 	 * Construct the header Jentry and store it in the beginning of the
1614 	 * variable-length payload.
1615 	 */
1616 	header = nPairs | JB_FOBJECT;
1617 	appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1618 
1619 	/* Reserve space for the JEntries of the keys and values. */
1620 	jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1621 
1622 	/*
1623 	 * Iterate over the keys, then over the values, since that is the ordering
1624 	 * we want in the on-disk representation.
1625 	 */
1626 	totallen = 0;
1627 	for (i = 0; i < nPairs; i++)
1628 	{
1629 		JsonbPair  *pair = &val->val.object.pairs[i];
1630 		int			len;
1631 		JEntry		meta;
1632 
1633 		/*
1634 		 * Convert key, producing a JEntry and appending its variable-length
1635 		 * data to buffer
1636 		 */
1637 		convertJsonbScalar(buffer, &meta, &pair->key);
1638 
1639 		len = JBE_OFFLENFLD(meta);
1640 		totallen += len;
1641 
1642 		/*
1643 		 * Bail out if total variable-length data exceeds what will fit in a
1644 		 * JEntry length field.  We check this in each iteration, not just
1645 		 * once at the end, to forestall possible integer overflow.
1646 		 */
1647 		if (totallen > JENTRY_OFFLENMASK)
1648 			ereport(ERROR,
1649 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1650 					 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1651 							JENTRY_OFFLENMASK)));
1652 
1653 		/*
1654 		 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1655 		 */
1656 		if ((i % JB_OFFSET_STRIDE) == 0)
1657 			meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1658 
1659 		copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1660 		jentry_offset += sizeof(JEntry);
1661 	}
1662 	for (i = 0; i < nPairs; i++)
1663 	{
1664 		JsonbPair  *pair = &val->val.object.pairs[i];
1665 		int			len;
1666 		JEntry		meta;
1667 
1668 		/*
1669 		 * Convert value, producing a JEntry and appending its variable-length
1670 		 * data to buffer
1671 		 */
1672 		convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1673 
1674 		len = JBE_OFFLENFLD(meta);
1675 		totallen += len;
1676 
1677 		/*
1678 		 * Bail out if total variable-length data exceeds what will fit in a
1679 		 * JEntry length field.  We check this in each iteration, not just
1680 		 * once at the end, to forestall possible integer overflow.
1681 		 */
1682 		if (totallen > JENTRY_OFFLENMASK)
1683 			ereport(ERROR,
1684 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1685 					 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1686 							JENTRY_OFFLENMASK)));
1687 
1688 		/*
1689 		 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1690 		 */
1691 		if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1692 			meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1693 
1694 		copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1695 		jentry_offset += sizeof(JEntry);
1696 	}
1697 
1698 	/* Total data size is everything we've appended to buffer */
1699 	totallen = buffer->len - base_offset;
1700 
1701 	/* Check length again, since we didn't include the metadata above */
1702 	if (totallen > JENTRY_OFFLENMASK)
1703 		ereport(ERROR,
1704 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1705 				 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1706 						JENTRY_OFFLENMASK)));
1707 
1708 	/* Initialize the header of this node in the container's JEntry array */
1709 	*pheader = JENTRY_ISCONTAINER | totallen;
1710 }
1711 
1712 static void
convertJsonbScalar(StringInfo buffer,JEntry * jentry,JsonbValue * scalarVal)1713 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1714 {
1715 	int			numlen;
1716 	short		padlen;
1717 
1718 	switch (scalarVal->type)
1719 	{
1720 		case jbvNull:
1721 			*jentry = JENTRY_ISNULL;
1722 			break;
1723 
1724 		case jbvString:
1725 			appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1726 
1727 			*jentry = scalarVal->val.string.len;
1728 			break;
1729 
1730 		case jbvNumeric:
1731 			numlen = VARSIZE_ANY(scalarVal->val.numeric);
1732 			padlen = padBufferToInt(buffer);
1733 
1734 			appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1735 
1736 			*jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1737 			break;
1738 
1739 		case jbvBool:
1740 			*jentry = (scalarVal->val.boolean) ?
1741 				JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1742 			break;
1743 
1744 		default:
1745 			elog(ERROR, "invalid jsonb scalar type");
1746 	}
1747 }
1748 
1749 /*
1750  * Compare two jbvString JsonbValue values, a and b.
1751  *
1752  * This is a special qsort() comparator used to sort strings in certain
1753  * internal contexts where it is sufficient to have a well-defined sort order.
1754  * In particular, object pair keys are sorted according to this criteria to
1755  * facilitate cheap binary searches where we don't care about lexical sort
1756  * order.
1757  *
1758  * a and b are first sorted based on their length.  If a tie-breaker is
1759  * required, only then do we consider string binary equality.
1760  */
1761 static int
lengthCompareJsonbStringValue(const void * a,const void * b)1762 lengthCompareJsonbStringValue(const void *a, const void *b)
1763 {
1764 	const JsonbValue *va = (const JsonbValue *) a;
1765 	const JsonbValue *vb = (const JsonbValue *) b;
1766 	int			res;
1767 
1768 	Assert(va->type == jbvString);
1769 	Assert(vb->type == jbvString);
1770 
1771 	if (va->val.string.len == vb->val.string.len)
1772 	{
1773 		res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len);
1774 	}
1775 	else
1776 	{
1777 		res = (va->val.string.len > vb->val.string.len) ? 1 : -1;
1778 	}
1779 
1780 	return res;
1781 }
1782 
1783 /*
1784  * qsort_arg() comparator to compare JsonbPair values.
1785  *
1786  * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1787  * to true iff a and b have full binary equality, since some callers have an
1788  * interest in whether the two values are equal or merely equivalent.
1789  *
1790  * N.B: String comparisons here are "length-wise"
1791  *
1792  * Pairs with equals keys are ordered such that the order field is respected.
1793  */
1794 static int
lengthCompareJsonbPair(const void * a,const void * b,void * binequal)1795 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1796 {
1797 	const JsonbPair *pa = (const JsonbPair *) a;
1798 	const JsonbPair *pb = (const JsonbPair *) b;
1799 	int			res;
1800 
1801 	res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1802 	if (res == 0 && binequal)
1803 		*((bool *) binequal) = true;
1804 
1805 	/*
1806 	 * Guarantee keeping order of equal pair.  Unique algorithm will prefer
1807 	 * first element as value.
1808 	 */
1809 	if (res == 0)
1810 		res = (pa->order > pb->order) ? -1 : 1;
1811 
1812 	return res;
1813 }
1814 
1815 /*
1816  * Sort and unique-ify pairs in JsonbValue object
1817  */
1818 static void
uniqueifyJsonbObject(JsonbValue * object)1819 uniqueifyJsonbObject(JsonbValue *object)
1820 {
1821 	bool		hasNonUniq = false;
1822 
1823 	Assert(object->type == jbvObject);
1824 
1825 	if (object->val.object.nPairs > 1)
1826 		qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1827 				  lengthCompareJsonbPair, &hasNonUniq);
1828 
1829 	if (hasNonUniq)
1830 	{
1831 		JsonbPair  *ptr = object->val.object.pairs + 1,
1832 				   *res = object->val.object.pairs;
1833 
1834 		while (ptr - object->val.object.pairs < object->val.object.nPairs)
1835 		{
1836 			/* Avoid copying over duplicate */
1837 			if (lengthCompareJsonbStringValue(ptr, res) != 0)
1838 			{
1839 				res++;
1840 				if (ptr != res)
1841 					memcpy(res, ptr, sizeof(JsonbPair));
1842 			}
1843 			ptr++;
1844 		}
1845 
1846 		object->val.object.nPairs = res + 1 - object->val.object.pairs;
1847 	}
1848 }
1849