1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_util.c
4  *	  converting between Jsonb and JsonbValues, and iterating.
5  *
6  * Copyright (c) 2014-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/jsonb_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "catalog/pg_collation.h"
17 #include "catalog/pg_type.h"
18 #include "common/hashfn.h"
19 #include "common/jsonapi.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/datetime.h"
23 #include "utils/json.h"
24 #include "utils/jsonb.h"
25 #include "utils/memutils.h"
26 #include "utils/varlena.h"
27 
28 /*
29  * Maximum number of elements in an array (or key/value pairs in an object).
30  * This is limited by two things: the size of the JEntry array must fit
31  * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
32  * reserved for that in the JsonbContainer.header field.
33  *
34  * (The total size of an array's or object's elements is also limited by
35  * JENTRY_OFFLENMASK, but we're not concerned about that here.)
36  */
37 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
38 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
39 
40 static void fillJsonbValue(JsonbContainer *container, int index,
41 						   char *base_addr, uint32 offset,
42 						   JsonbValue *result);
43 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
44 static int	compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
45 static Jsonb *convertToJsonb(JsonbValue *val);
46 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
47 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
48 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
49 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
50 
51 static int	reserveFromBuffer(StringInfo buffer, int len);
52 static void appendToBuffer(StringInfo buffer, const char *data, int len);
53 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
54 static short padBufferToInt(StringInfo buffer);
55 
56 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
57 static JsonbIterator *freeAndGetParent(JsonbIterator *it);
58 static JsonbParseState *pushState(JsonbParseState **pstate);
59 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
60 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
61 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
62 static int	lengthCompareJsonbStringValue(const void *a, const void *b);
63 static int	lengthCompareJsonbString(const char *val1, int len1,
64 									 const char *val2, int len2);
65 static int	lengthCompareJsonbPair(const void *a, const void *b, void *arg);
66 static void uniqueifyJsonbObject(JsonbValue *object);
67 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
68 										JsonbIteratorToken seq,
69 										JsonbValue *scalarVal);
70 
71 /*
72  * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
73  *
74  * There isn't a JsonbToJsonbValue(), because generally we find it more
75  * convenient to directly iterate through the Jsonb representation and only
76  * really convert nested scalar values.  JsonbIteratorNext() does this, so that
77  * clients of the iteration code don't have to directly deal with the binary
78  * representation (JsonbDeepContains() is a notable exception, although all
79  * exceptions are internal to this module).  In general, functions that accept
80  * a JsonbValue argument are concerned with the manipulation of scalar values,
81  * or simple containers of scalar values, where it would be inconvenient to
82  * deal with a great amount of other state.
83  */
84 Jsonb *
JsonbValueToJsonb(JsonbValue * val)85 JsonbValueToJsonb(JsonbValue *val)
86 {
87 	Jsonb	   *out;
88 
89 	if (IsAJsonbScalar(val))
90 	{
91 		/* Scalar value */
92 		JsonbParseState *pstate = NULL;
93 		JsonbValue *res;
94 		JsonbValue	scalarArray;
95 
96 		scalarArray.type = jbvArray;
97 		scalarArray.val.array.rawScalar = true;
98 		scalarArray.val.array.nElems = 1;
99 
100 		pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
101 		pushJsonbValue(&pstate, WJB_ELEM, val);
102 		res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
103 
104 		out = convertToJsonb(res);
105 	}
106 	else if (val->type == jbvObject || val->type == jbvArray)
107 	{
108 		out = convertToJsonb(val);
109 	}
110 	else
111 	{
112 		Assert(val->type == jbvBinary);
113 		out = palloc(VARHDRSZ + val->val.binary.len);
114 		SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
115 		memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
116 	}
117 
118 	return out;
119 }
120 
121 /*
122  * Get the offset of the variable-length portion of a Jsonb node within
123  * the variable-length-data part of its container.  The node is identified
124  * by index within the container's JEntry array.
125  */
126 uint32
getJsonbOffset(const JsonbContainer * jc,int index)127 getJsonbOffset(const JsonbContainer *jc, int index)
128 {
129 	uint32		offset = 0;
130 	int			i;
131 
132 	/*
133 	 * Start offset of this entry is equal to the end offset of the previous
134 	 * entry.  Walk backwards to the most recent entry stored as an end
135 	 * offset, returning that offset plus any lengths in between.
136 	 */
137 	for (i = index - 1; i >= 0; i--)
138 	{
139 		offset += JBE_OFFLENFLD(jc->children[i]);
140 		if (JBE_HAS_OFF(jc->children[i]))
141 			break;
142 	}
143 
144 	return offset;
145 }
146 
147 /*
148  * Get the length of the variable-length portion of a Jsonb node.
149  * The node is identified by index within the container's JEntry array.
150  */
151 uint32
getJsonbLength(const JsonbContainer * jc,int index)152 getJsonbLength(const JsonbContainer *jc, int index)
153 {
154 	uint32		off;
155 	uint32		len;
156 
157 	/*
158 	 * If the length is stored directly in the JEntry, just return it.
159 	 * Otherwise, get the begin offset of the entry, and subtract that from
160 	 * the stored end+1 offset.
161 	 */
162 	if (JBE_HAS_OFF(jc->children[index]))
163 	{
164 		off = getJsonbOffset(jc, index);
165 		len = JBE_OFFLENFLD(jc->children[index]) - off;
166 	}
167 	else
168 		len = JBE_OFFLENFLD(jc->children[index]);
169 
170 	return len;
171 }
172 
173 /*
174  * BT comparator worker function.  Returns an integer less than, equal to, or
175  * greater than zero, indicating whether a is less than, equal to, or greater
176  * than b.  Consistent with the requirements for a B-Tree operator class
177  *
178  * Strings are compared lexically, in contrast with other places where we use a
179  * much simpler comparator logic for searching through Strings.  Since this is
180  * called from B-Tree support function 1, we're careful about not leaking
181  * memory here.
182  */
183 int
compareJsonbContainers(JsonbContainer * a,JsonbContainer * b)184 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
185 {
186 	JsonbIterator *ita,
187 			   *itb;
188 	int			res = 0;
189 
190 	ita = JsonbIteratorInit(a);
191 	itb = JsonbIteratorInit(b);
192 
193 	do
194 	{
195 		JsonbValue	va,
196 					vb;
197 		JsonbIteratorToken ra,
198 					rb;
199 
200 		ra = JsonbIteratorNext(&ita, &va, false);
201 		rb = JsonbIteratorNext(&itb, &vb, false);
202 
203 		if (ra == rb)
204 		{
205 			if (ra == WJB_DONE)
206 			{
207 				/* Decisively equal */
208 				break;
209 			}
210 
211 			if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
212 			{
213 				/*
214 				 * There is no array or object to compare at this stage of
215 				 * processing.  jbvArray/jbvObject values are compared
216 				 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
217 				 * tokens.
218 				 */
219 				continue;
220 			}
221 
222 			if (va.type == vb.type)
223 			{
224 				switch (va.type)
225 				{
226 					case jbvString:
227 					case jbvNull:
228 					case jbvNumeric:
229 					case jbvBool:
230 						res = compareJsonbScalarValue(&va, &vb);
231 						break;
232 					case jbvArray:
233 
234 						/*
235 						 * This could be a "raw scalar" pseudo array.  That's
236 						 * a special case here though, since we still want the
237 						 * general type-based comparisons to apply, and as far
238 						 * as we're concerned a pseudo array is just a scalar.
239 						 */
240 						if (va.val.array.rawScalar != vb.val.array.rawScalar)
241 							res = (va.val.array.rawScalar) ? -1 : 1;
242 						if (va.val.array.nElems != vb.val.array.nElems)
243 							res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
244 						break;
245 					case jbvObject:
246 						if (va.val.object.nPairs != vb.val.object.nPairs)
247 							res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
248 						break;
249 					case jbvBinary:
250 						elog(ERROR, "unexpected jbvBinary value");
251 						break;
252 					case jbvDatetime:
253 						elog(ERROR, "unexpected jbvDatetime value");
254 						break;
255 				}
256 			}
257 			else
258 			{
259 				/* Type-defined order */
260 				res = (va.type > vb.type) ? 1 : -1;
261 			}
262 		}
263 		else
264 		{
265 			/*
266 			 * It's safe to assume that the types differed, and that the va
267 			 * and vb values passed were set.
268 			 *
269 			 * If the two values were of the same container type, then there'd
270 			 * have been a chance to observe the variation in the number of
271 			 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
272 			 * either two heterogeneously-typed containers, or a container and
273 			 * some scalar type.
274 			 *
275 			 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
276 			 * cases here, because we would have seen the corresponding
277 			 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
278 			 * concluded that they don't match.
279 			 */
280 			Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
281 			Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
282 
283 			Assert(va.type != vb.type);
284 			Assert(va.type != jbvBinary);
285 			Assert(vb.type != jbvBinary);
286 			/* Type-defined order */
287 			res = (va.type > vb.type) ? 1 : -1;
288 		}
289 	}
290 	while (res == 0);
291 
292 	while (ita != NULL)
293 	{
294 		JsonbIterator *i = ita->parent;
295 
296 		pfree(ita);
297 		ita = i;
298 	}
299 	while (itb != NULL)
300 	{
301 		JsonbIterator *i = itb->parent;
302 
303 		pfree(itb);
304 		itb = i;
305 	}
306 
307 	return res;
308 }
309 
310 /*
311  * Find value in object (i.e. the "value" part of some key/value pair in an
312  * object), or find a matching element if we're looking through an array.  Do
313  * so on the basis of equality of the object keys only, or alternatively
314  * element values only, with a caller-supplied value "key".  The "flags"
315  * argument allows the caller to specify which container types are of interest.
316  *
317  * This exported utility function exists to facilitate various cases concerned
318  * with "containment".  If asked to look through an object, the caller had
319  * better pass a Jsonb String, because their keys can only be strings.
320  * Otherwise, for an array, any type of JsonbValue will do.
321  *
322  * In order to proceed with the search, it is necessary for callers to have
323  * both specified an interest in exactly one particular container type with an
324  * appropriate flag, as well as having the pointed-to Jsonb container be of
325  * one of those same container types at the top level. (Actually, we just do
326  * whichever makes sense to save callers the trouble of figuring it out - at
327  * most one can make sense, because the container either points to an array
328  * (possibly a "raw scalar" pseudo array) or an object.)
329  *
330  * Note that we can return a jbvBinary JsonbValue if this is called on an
331  * object, but we never do so on an array.  If the caller asks to look through
332  * a container type that is not of the type pointed to by the container,
333  * immediately fall through and return NULL.  If we cannot find the value,
334  * return NULL.  Otherwise, return palloc()'d copy of value.
335  */
336 JsonbValue *
findJsonbValueFromContainer(JsonbContainer * container,uint32 flags,JsonbValue * key)337 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
338 							JsonbValue *key)
339 {
340 	JEntry	   *children = container->children;
341 	int			count = JsonContainerSize(container);
342 
343 	Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
344 
345 	/* Quick out without a palloc cycle if object/array is empty */
346 	if (count <= 0)
347 		return NULL;
348 
349 	if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
350 	{
351 		JsonbValue *result = palloc(sizeof(JsonbValue));
352 		char	   *base_addr = (char *) (children + count);
353 		uint32		offset = 0;
354 		int			i;
355 
356 		for (i = 0; i < count; i++)
357 		{
358 			fillJsonbValue(container, i, base_addr, offset, result);
359 
360 			if (key->type == result->type)
361 			{
362 				if (equalsJsonbScalarValue(key, result))
363 					return result;
364 			}
365 
366 			JBE_ADVANCE_OFFSET(offset, children[i]);
367 		}
368 
369 		pfree(result);
370 	}
371 	else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
372 	{
373 		/* Object key passed by caller must be a string */
374 		Assert(key->type == jbvString);
375 
376 		return getKeyJsonValueFromContainer(container, key->val.string.val,
377 											key->val.string.len, NULL);
378 	}
379 
380 	/* Not found */
381 	return NULL;
382 }
383 
384 /*
385  * Find value by key in Jsonb object and fetch it into 'res', which is also
386  * returned.
387  *
388  * 'res' can be passed in as NULL, in which case it's newly palloc'ed here.
389  */
390 JsonbValue *
getKeyJsonValueFromContainer(JsonbContainer * container,const char * keyVal,int keyLen,JsonbValue * res)391 getKeyJsonValueFromContainer(JsonbContainer *container,
392 							 const char *keyVal, int keyLen, JsonbValue *res)
393 {
394 	JEntry	   *children = container->children;
395 	int			count = JsonContainerSize(container);
396 	char	   *baseAddr;
397 	uint32		stopLow,
398 				stopHigh;
399 
400 	Assert(JsonContainerIsObject(container));
401 
402 	/* Quick out without a palloc cycle if object is empty */
403 	if (count <= 0)
404 		return NULL;
405 
406 	/*
407 	 * Binary search the container. Since we know this is an object, account
408 	 * for *Pairs* of Jentrys
409 	 */
410 	baseAddr = (char *) (children + count * 2);
411 	stopLow = 0;
412 	stopHigh = count;
413 	while (stopLow < stopHigh)
414 	{
415 		uint32		stopMiddle;
416 		int			difference;
417 		const char *candidateVal;
418 		int			candidateLen;
419 
420 		stopMiddle = stopLow + (stopHigh - stopLow) / 2;
421 
422 		candidateVal = baseAddr + getJsonbOffset(container, stopMiddle);
423 		candidateLen = getJsonbLength(container, stopMiddle);
424 
425 		difference = lengthCompareJsonbString(candidateVal, candidateLen,
426 											  keyVal, keyLen);
427 
428 		if (difference == 0)
429 		{
430 			/* Found our key, return corresponding value */
431 			int			index = stopMiddle + count;
432 
433 			if (!res)
434 				res = palloc(sizeof(JsonbValue));
435 
436 			fillJsonbValue(container, index, baseAddr,
437 						   getJsonbOffset(container, index),
438 						   res);
439 
440 			return res;
441 		}
442 		else
443 		{
444 			if (difference < 0)
445 				stopLow = stopMiddle + 1;
446 			else
447 				stopHigh = stopMiddle;
448 		}
449 	}
450 
451 	/* Not found */
452 	return NULL;
453 }
454 
455 /*
456  * Get i-th value of a Jsonb array.
457  *
458  * Returns palloc()'d copy of the value, or NULL if it does not exist.
459  */
460 JsonbValue *
getIthJsonbValueFromContainer(JsonbContainer * container,uint32 i)461 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
462 {
463 	JsonbValue *result;
464 	char	   *base_addr;
465 	uint32		nelements;
466 
467 	if (!JsonContainerIsArray(container))
468 		elog(ERROR, "not a jsonb array");
469 
470 	nelements = JsonContainerSize(container);
471 	base_addr = (char *) &container->children[nelements];
472 
473 	if (i >= nelements)
474 		return NULL;
475 
476 	result = palloc(sizeof(JsonbValue));
477 
478 	fillJsonbValue(container, i, base_addr,
479 				   getJsonbOffset(container, i),
480 				   result);
481 
482 	return result;
483 }
484 
485 /*
486  * A helper function to fill in a JsonbValue to represent an element of an
487  * array, or a key or value of an object.
488  *
489  * The node's JEntry is at container->children[index], and its variable-length
490  * data is at base_addr + offset.  We make the caller determine the offset
491  * since in many cases the caller can amortize that work across multiple
492  * children.  When it can't, it can just call getJsonbOffset().
493  *
494  * A nested array or object will be returned as jbvBinary, ie. it won't be
495  * expanded.
496  */
497 static void
fillJsonbValue(JsonbContainer * container,int index,char * base_addr,uint32 offset,JsonbValue * result)498 fillJsonbValue(JsonbContainer *container, int index,
499 			   char *base_addr, uint32 offset,
500 			   JsonbValue *result)
501 {
502 	JEntry		entry = container->children[index];
503 
504 	if (JBE_ISNULL(entry))
505 	{
506 		result->type = jbvNull;
507 	}
508 	else if (JBE_ISSTRING(entry))
509 	{
510 		result->type = jbvString;
511 		result->val.string.val = base_addr + offset;
512 		result->val.string.len = getJsonbLength(container, index);
513 		Assert(result->val.string.len >= 0);
514 	}
515 	else if (JBE_ISNUMERIC(entry))
516 	{
517 		result->type = jbvNumeric;
518 		result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
519 	}
520 	else if (JBE_ISBOOL_TRUE(entry))
521 	{
522 		result->type = jbvBool;
523 		result->val.boolean = true;
524 	}
525 	else if (JBE_ISBOOL_FALSE(entry))
526 	{
527 		result->type = jbvBool;
528 		result->val.boolean = false;
529 	}
530 	else
531 	{
532 		Assert(JBE_ISCONTAINER(entry));
533 		result->type = jbvBinary;
534 		/* Remove alignment padding from data pointer and length */
535 		result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
536 		result->val.binary.len = getJsonbLength(container, index) -
537 			(INTALIGN(offset) - offset);
538 	}
539 }
540 
541 /*
542  * Push JsonbValue into JsonbParseState.
543  *
544  * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
545  * JsonbValue to a Jsonb.
546  *
547  * Initial state of *JsonbParseState is NULL, since it'll be allocated here
548  * originally (caller will get JsonbParseState back by reference).
549  *
550  * Only sequential tokens pertaining to non-container types should pass a
551  * JsonbValue.  There is one exception -- WJB_BEGIN_ARRAY callers may pass a
552  * "raw scalar" pseudo array to append it - the actual scalar should be passed
553  * next and it will be added as the only member of the array.
554  *
555  * Values of type jbvBinary, which are rolled up arrays and objects,
556  * are unpacked before being added to the result.
557  */
558 JsonbValue *
pushJsonbValue(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * jbval)559 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
560 			   JsonbValue *jbval)
561 {
562 	JsonbIterator *it;
563 	JsonbValue *res = NULL;
564 	JsonbValue	v;
565 	JsonbIteratorToken tok;
566 
567 	if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
568 		jbval->type != jbvBinary)
569 	{
570 		/* drop through */
571 		return pushJsonbValueScalar(pstate, seq, jbval);
572 	}
573 
574 	/* unpack the binary and add each piece to the pstate */
575 	it = JsonbIteratorInit(jbval->val.binary.data);
576 	while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
577 		res = pushJsonbValueScalar(pstate, tok,
578 								   tok < WJB_BEGIN_ARRAY ? &v : NULL);
579 
580 	return res;
581 }
582 
583 /*
584  * Do the actual pushing, with only scalar or pseudo-scalar-array values
585  * accepted.
586  */
587 static JsonbValue *
pushJsonbValueScalar(JsonbParseState ** pstate,JsonbIteratorToken seq,JsonbValue * scalarVal)588 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
589 					 JsonbValue *scalarVal)
590 {
591 	JsonbValue *result = NULL;
592 
593 	switch (seq)
594 	{
595 		case WJB_BEGIN_ARRAY:
596 			Assert(!scalarVal || scalarVal->val.array.rawScalar);
597 			*pstate = pushState(pstate);
598 			result = &(*pstate)->contVal;
599 			(*pstate)->contVal.type = jbvArray;
600 			(*pstate)->contVal.val.array.nElems = 0;
601 			(*pstate)->contVal.val.array.rawScalar = (scalarVal &&
602 													  scalarVal->val.array.rawScalar);
603 			if (scalarVal && scalarVal->val.array.nElems > 0)
604 			{
605 				/* Assume that this array is still really a scalar */
606 				Assert(scalarVal->type == jbvArray);
607 				(*pstate)->size = scalarVal->val.array.nElems;
608 			}
609 			else
610 			{
611 				(*pstate)->size = 4;
612 			}
613 			(*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
614 														(*pstate)->size);
615 			break;
616 		case WJB_BEGIN_OBJECT:
617 			Assert(!scalarVal);
618 			*pstate = pushState(pstate);
619 			result = &(*pstate)->contVal;
620 			(*pstate)->contVal.type = jbvObject;
621 			(*pstate)->contVal.val.object.nPairs = 0;
622 			(*pstate)->size = 4;
623 			(*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
624 														 (*pstate)->size);
625 			break;
626 		case WJB_KEY:
627 			Assert(scalarVal->type == jbvString);
628 			appendKey(*pstate, scalarVal);
629 			break;
630 		case WJB_VALUE:
631 			Assert(IsAJsonbScalar(scalarVal));
632 			appendValue(*pstate, scalarVal);
633 			break;
634 		case WJB_ELEM:
635 			Assert(IsAJsonbScalar(scalarVal));
636 			appendElement(*pstate, scalarVal);
637 			break;
638 		case WJB_END_OBJECT:
639 			uniqueifyJsonbObject(&(*pstate)->contVal);
640 			/* fall through! */
641 		case WJB_END_ARRAY:
642 			/* Steps here common to WJB_END_OBJECT case */
643 			Assert(!scalarVal);
644 			result = &(*pstate)->contVal;
645 
646 			/*
647 			 * Pop stack and push current array/object as value in parent
648 			 * array/object
649 			 */
650 			*pstate = (*pstate)->next;
651 			if (*pstate)
652 			{
653 				switch ((*pstate)->contVal.type)
654 				{
655 					case jbvArray:
656 						appendElement(*pstate, result);
657 						break;
658 					case jbvObject:
659 						appendValue(*pstate, result);
660 						break;
661 					default:
662 						elog(ERROR, "invalid jsonb container type");
663 				}
664 			}
665 			break;
666 		default:
667 			elog(ERROR, "unrecognized jsonb sequential processing token");
668 	}
669 
670 	return result;
671 }
672 
673 /*
674  * pushJsonbValue() worker:  Iteration-like forming of Jsonb
675  */
676 static JsonbParseState *
pushState(JsonbParseState ** pstate)677 pushState(JsonbParseState **pstate)
678 {
679 	JsonbParseState *ns = palloc(sizeof(JsonbParseState));
680 
681 	ns->next = *pstate;
682 	return ns;
683 }
684 
685 /*
686  * pushJsonbValue() worker:  Append a pair key to state when generating a Jsonb
687  */
688 static void
appendKey(JsonbParseState * pstate,JsonbValue * string)689 appendKey(JsonbParseState *pstate, JsonbValue *string)
690 {
691 	JsonbValue *object = &pstate->contVal;
692 
693 	Assert(object->type == jbvObject);
694 	Assert(string->type == jbvString);
695 
696 	if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
697 		ereport(ERROR,
698 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
699 				 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
700 						JSONB_MAX_PAIRS)));
701 
702 	if (object->val.object.nPairs >= pstate->size)
703 	{
704 		pstate->size *= 2;
705 		object->val.object.pairs = repalloc(object->val.object.pairs,
706 											sizeof(JsonbPair) * pstate->size);
707 	}
708 
709 	object->val.object.pairs[object->val.object.nPairs].key = *string;
710 	object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
711 }
712 
713 /*
714  * pushJsonbValue() worker:  Append a pair value to state when generating a
715  * Jsonb
716  */
717 static void
appendValue(JsonbParseState * pstate,JsonbValue * scalarVal)718 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
719 {
720 	JsonbValue *object = &pstate->contVal;
721 
722 	Assert(object->type == jbvObject);
723 
724 	object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
725 }
726 
727 /*
728  * pushJsonbValue() worker:  Append an element to state when generating a Jsonb
729  */
730 static void
appendElement(JsonbParseState * pstate,JsonbValue * scalarVal)731 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
732 {
733 	JsonbValue *array = &pstate->contVal;
734 
735 	Assert(array->type == jbvArray);
736 
737 	if (array->val.array.nElems >= JSONB_MAX_ELEMS)
738 		ereport(ERROR,
739 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
740 				 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
741 						JSONB_MAX_ELEMS)));
742 
743 	if (array->val.array.nElems >= pstate->size)
744 	{
745 		pstate->size *= 2;
746 		array->val.array.elems = repalloc(array->val.array.elems,
747 										  sizeof(JsonbValue) * pstate->size);
748 	}
749 
750 	array->val.array.elems[array->val.array.nElems++] = *scalarVal;
751 }
752 
753 /*
754  * Given a JsonbContainer, expand to JsonbIterator to iterate over items
755  * fully expanded to in-memory representation for manipulation.
756  *
757  * See JsonbIteratorNext() for notes on memory management.
758  */
759 JsonbIterator *
JsonbIteratorInit(JsonbContainer * container)760 JsonbIteratorInit(JsonbContainer *container)
761 {
762 	return iteratorFromContainer(container, NULL);
763 }
764 
765 /*
766  * Get next JsonbValue while iterating
767  *
768  * Caller should initially pass their own, original iterator.  They may get
769  * back a child iterator palloc()'d here instead.  The function can be relied
770  * on to free those child iterators, lest the memory allocated for highly
771  * nested objects become unreasonable, but only if callers don't end iteration
772  * early (by breaking upon having found something in a search, for example).
773  *
774  * Callers in such a scenario, that are particularly sensitive to leaking
775  * memory in a long-lived context may walk the ancestral tree from the final
776  * iterator we left them with to its oldest ancestor, pfree()ing as they go.
777  * They do not have to free any other memory previously allocated for iterators
778  * but not accessible as direct ancestors of the iterator they're last passed
779  * back.
780  *
781  * Returns "Jsonb sequential processing" token value.  Iterator "state"
782  * reflects the current stage of the process in a less granular fashion, and is
783  * mostly used here to track things internally with respect to particular
784  * iterators.
785  *
786  * Clients of this function should not have to handle any jbvBinary values
787  * (since recursive calls will deal with this), provided skipNested is false.
788  * It is our job to expand the jbvBinary representation without bothering them
789  * with it.  However, clients should not take it upon themselves to touch array
790  * or Object element/pair buffers, since their element/pair pointers are
791  * garbage.  Also, *val will not be set when returning WJB_END_ARRAY or
792  * WJB_END_OBJECT, on the assumption that it's only useful to access values
793  * when recursing in.
794  */
795 JsonbIteratorToken
JsonbIteratorNext(JsonbIterator ** it,JsonbValue * val,bool skipNested)796 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
797 {
798 	if (*it == NULL)
799 		return WJB_DONE;
800 
801 	/*
802 	 * When stepping into a nested container, we jump back here to start
803 	 * processing the child. We will not recurse further in one call, because
804 	 * processing the child will always begin in JBI_ARRAY_START or
805 	 * JBI_OBJECT_START state.
806 	 */
807 recurse:
808 	switch ((*it)->state)
809 	{
810 		case JBI_ARRAY_START:
811 			/* Set v to array on first array call */
812 			val->type = jbvArray;
813 			val->val.array.nElems = (*it)->nElems;
814 
815 			/*
816 			 * v->val.array.elems is not actually set, because we aren't doing
817 			 * a full conversion
818 			 */
819 			val->val.array.rawScalar = (*it)->isScalar;
820 			(*it)->curIndex = 0;
821 			(*it)->curDataOffset = 0;
822 			(*it)->curValueOffset = 0;	/* not actually used */
823 			/* Set state for next call */
824 			(*it)->state = JBI_ARRAY_ELEM;
825 			return WJB_BEGIN_ARRAY;
826 
827 		case JBI_ARRAY_ELEM:
828 			if ((*it)->curIndex >= (*it)->nElems)
829 			{
830 				/*
831 				 * All elements within array already processed.  Report this
832 				 * to caller, and give it back original parent iterator (which
833 				 * independently tracks iteration progress at its level of
834 				 * nesting).
835 				 */
836 				*it = freeAndGetParent(*it);
837 				return WJB_END_ARRAY;
838 			}
839 
840 			fillJsonbValue((*it)->container, (*it)->curIndex,
841 						   (*it)->dataProper, (*it)->curDataOffset,
842 						   val);
843 
844 			JBE_ADVANCE_OFFSET((*it)->curDataOffset,
845 							   (*it)->children[(*it)->curIndex]);
846 			(*it)->curIndex++;
847 
848 			if (!IsAJsonbScalar(val) && !skipNested)
849 			{
850 				/* Recurse into container. */
851 				*it = iteratorFromContainer(val->val.binary.data, *it);
852 				goto recurse;
853 			}
854 			else
855 			{
856 				/*
857 				 * Scalar item in array, or a container and caller didn't want
858 				 * us to recurse into it.
859 				 */
860 				return WJB_ELEM;
861 			}
862 
863 		case JBI_OBJECT_START:
864 			/* Set v to object on first object call */
865 			val->type = jbvObject;
866 			val->val.object.nPairs = (*it)->nElems;
867 
868 			/*
869 			 * v->val.object.pairs is not actually set, because we aren't
870 			 * doing a full conversion
871 			 */
872 			(*it)->curIndex = 0;
873 			(*it)->curDataOffset = 0;
874 			(*it)->curValueOffset = getJsonbOffset((*it)->container,
875 												   (*it)->nElems);
876 			/* Set state for next call */
877 			(*it)->state = JBI_OBJECT_KEY;
878 			return WJB_BEGIN_OBJECT;
879 
880 		case JBI_OBJECT_KEY:
881 			if ((*it)->curIndex >= (*it)->nElems)
882 			{
883 				/*
884 				 * All pairs within object already processed.  Report this to
885 				 * caller, and give it back original containing iterator
886 				 * (which independently tracks iteration progress at its level
887 				 * of nesting).
888 				 */
889 				*it = freeAndGetParent(*it);
890 				return WJB_END_OBJECT;
891 			}
892 			else
893 			{
894 				/* Return key of a key/value pair.  */
895 				fillJsonbValue((*it)->container, (*it)->curIndex,
896 							   (*it)->dataProper, (*it)->curDataOffset,
897 							   val);
898 				if (val->type != jbvString)
899 					elog(ERROR, "unexpected jsonb type as object key");
900 
901 				/* Set state for next call */
902 				(*it)->state = JBI_OBJECT_VALUE;
903 				return WJB_KEY;
904 			}
905 
906 		case JBI_OBJECT_VALUE:
907 			/* Set state for next call */
908 			(*it)->state = JBI_OBJECT_KEY;
909 
910 			fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
911 						   (*it)->dataProper, (*it)->curValueOffset,
912 						   val);
913 
914 			JBE_ADVANCE_OFFSET((*it)->curDataOffset,
915 							   (*it)->children[(*it)->curIndex]);
916 			JBE_ADVANCE_OFFSET((*it)->curValueOffset,
917 							   (*it)->children[(*it)->curIndex + (*it)->nElems]);
918 			(*it)->curIndex++;
919 
920 			/*
921 			 * Value may be a container, in which case we recurse with new,
922 			 * child iterator (unless the caller asked not to, by passing
923 			 * skipNested).
924 			 */
925 			if (!IsAJsonbScalar(val) && !skipNested)
926 			{
927 				*it = iteratorFromContainer(val->val.binary.data, *it);
928 				goto recurse;
929 			}
930 			else
931 				return WJB_VALUE;
932 	}
933 
934 	elog(ERROR, "invalid iterator state");
935 	return -1;
936 }
937 
938 /*
939  * Initialize an iterator for iterating all elements in a container.
940  */
941 static JsonbIterator *
iteratorFromContainer(JsonbContainer * container,JsonbIterator * parent)942 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
943 {
944 	JsonbIterator *it;
945 
946 	it = palloc0(sizeof(JsonbIterator));
947 	it->container = container;
948 	it->parent = parent;
949 	it->nElems = JsonContainerSize(container);
950 
951 	/* Array starts just after header */
952 	it->children = container->children;
953 
954 	switch (container->header & (JB_FARRAY | JB_FOBJECT))
955 	{
956 		case JB_FARRAY:
957 			it->dataProper =
958 				(char *) it->children + it->nElems * sizeof(JEntry);
959 			it->isScalar = JsonContainerIsScalar(container);
960 			/* This is either a "raw scalar", or an array */
961 			Assert(!it->isScalar || it->nElems == 1);
962 
963 			it->state = JBI_ARRAY_START;
964 			break;
965 
966 		case JB_FOBJECT:
967 			it->dataProper =
968 				(char *) it->children + it->nElems * sizeof(JEntry) * 2;
969 			it->state = JBI_OBJECT_START;
970 			break;
971 
972 		default:
973 			elog(ERROR, "unknown type of jsonb container");
974 	}
975 
976 	return it;
977 }
978 
979 /*
980  * JsonbIteratorNext() worker:	Return parent, while freeing memory for current
981  * iterator
982  */
983 static JsonbIterator *
freeAndGetParent(JsonbIterator * it)984 freeAndGetParent(JsonbIterator *it)
985 {
986 	JsonbIterator *v = it->parent;
987 
988 	pfree(it);
989 	return v;
990 }
991 
992 /*
993  * Worker for "contains" operator's function
994  *
995  * Formally speaking, containment is top-down, unordered subtree isomorphism.
996  *
997  * Takes iterators that belong to some container type.  These iterators
998  * "belong" to those values in the sense that they've just been initialized in
999  * respect of them by the caller (perhaps in a nested fashion).
1000  *
1001  * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
1002  * We determine if mContained is contained within val.
1003  */
1004 bool
JsonbDeepContains(JsonbIterator ** val,JsonbIterator ** mContained)1005 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
1006 {
1007 	JsonbValue	vval,
1008 				vcontained;
1009 	JsonbIteratorToken rval,
1010 				rcont;
1011 
1012 	/*
1013 	 * Guard against stack overflow due to overly complex Jsonb.
1014 	 *
1015 	 * Functions called here independently take this precaution, but that
1016 	 * might not be sufficient since this is also a recursive function.
1017 	 */
1018 	check_stack_depth();
1019 
1020 	rval = JsonbIteratorNext(val, &vval, false);
1021 	rcont = JsonbIteratorNext(mContained, &vcontained, false);
1022 
1023 	if (rval != rcont)
1024 	{
1025 		/*
1026 		 * The differing return values can immediately be taken as indicating
1027 		 * two differing container types at this nesting level, which is
1028 		 * sufficient reason to give up entirely (but it should be the case
1029 		 * that they're both some container type).
1030 		 */
1031 		Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
1032 		Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
1033 		return false;
1034 	}
1035 	else if (rcont == WJB_BEGIN_OBJECT)
1036 	{
1037 		Assert(vval.type == jbvObject);
1038 		Assert(vcontained.type == jbvObject);
1039 
1040 		/*
1041 		 * If the lhs has fewer pairs than the rhs, it can't possibly contain
1042 		 * the rhs.  (This conclusion is safe only because we de-duplicate
1043 		 * keys in all Jsonb objects; thus there can be no corresponding
1044 		 * optimization in the array case.)  The case probably won't arise
1045 		 * often, but since it's such a cheap check we may as well make it.
1046 		 */
1047 		if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1048 			return false;
1049 
1050 		/* Work through rhs "is it contained within?" object */
1051 		for (;;)
1052 		{
1053 			JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1054 			JsonbValue	lhsValBuf;
1055 
1056 			rcont = JsonbIteratorNext(mContained, &vcontained, false);
1057 
1058 			/*
1059 			 * When we get through caller's rhs "is it contained within?"
1060 			 * object without failing to find one of its values, it's
1061 			 * contained.
1062 			 */
1063 			if (rcont == WJB_END_OBJECT)
1064 				return true;
1065 
1066 			Assert(rcont == WJB_KEY);
1067 			Assert(vcontained.type == jbvString);
1068 
1069 			/* First, find value by key... */
1070 			lhsVal =
1071 				getKeyJsonValueFromContainer((*val)->container,
1072 											 vcontained.val.string.val,
1073 											 vcontained.val.string.len,
1074 											 &lhsValBuf);
1075 			if (!lhsVal)
1076 				return false;
1077 
1078 			/*
1079 			 * ...at this stage it is apparent that there is at least a key
1080 			 * match for this rhs pair.
1081 			 */
1082 			rcont = JsonbIteratorNext(mContained, &vcontained, true);
1083 
1084 			Assert(rcont == WJB_VALUE);
1085 
1086 			/*
1087 			 * Compare rhs pair's value with lhs pair's value just found using
1088 			 * key
1089 			 */
1090 			if (lhsVal->type != vcontained.type)
1091 			{
1092 				return false;
1093 			}
1094 			else if (IsAJsonbScalar(lhsVal))
1095 			{
1096 				if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1097 					return false;
1098 			}
1099 			else
1100 			{
1101 				/* Nested container value (object or array) */
1102 				JsonbIterator *nestval,
1103 						   *nestContained;
1104 
1105 				Assert(lhsVal->type == jbvBinary);
1106 				Assert(vcontained.type == jbvBinary);
1107 
1108 				nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1109 				nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1110 
1111 				/*
1112 				 * Match "value" side of rhs datum object's pair recursively.
1113 				 * It's a nested structure.
1114 				 *
1115 				 * Note that nesting still has to "match up" at the right
1116 				 * nesting sub-levels.  However, there need only be zero or
1117 				 * more matching pairs (or elements) at each nesting level
1118 				 * (provided the *rhs* pairs/elements *all* match on each
1119 				 * level), which enables searching nested structures for a
1120 				 * single String or other primitive type sub-datum quite
1121 				 * effectively (provided the user constructed the rhs nested
1122 				 * structure such that we "know where to look").
1123 				 *
1124 				 * In other words, the mapping of container nodes in the rhs
1125 				 * "vcontained" Jsonb to internal nodes on the lhs is
1126 				 * injective, and parent-child edges on the rhs must be mapped
1127 				 * to parent-child edges on the lhs to satisfy the condition
1128 				 * of containment (plus of course the mapped nodes must be
1129 				 * equal).
1130 				 */
1131 				if (!JsonbDeepContains(&nestval, &nestContained))
1132 					return false;
1133 			}
1134 		}
1135 	}
1136 	else if (rcont == WJB_BEGIN_ARRAY)
1137 	{
1138 		JsonbValue *lhsConts = NULL;
1139 		uint32		nLhsElems = vval.val.array.nElems;
1140 
1141 		Assert(vval.type == jbvArray);
1142 		Assert(vcontained.type == jbvArray);
1143 
1144 		/*
1145 		 * Handle distinction between "raw scalar" pseudo arrays, and real
1146 		 * arrays.
1147 		 *
1148 		 * A raw scalar may contain another raw scalar, and an array may
1149 		 * contain a raw scalar, but a raw scalar may not contain an array. We
1150 		 * don't do something like this for the object case, since objects can
1151 		 * only contain pairs, never raw scalars (a pair is represented by an
1152 		 * rhs object argument with a single contained pair).
1153 		 */
1154 		if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1155 			return false;
1156 
1157 		/* Work through rhs "is it contained within?" array */
1158 		for (;;)
1159 		{
1160 			rcont = JsonbIteratorNext(mContained, &vcontained, true);
1161 
1162 			/*
1163 			 * When we get through caller's rhs "is it contained within?"
1164 			 * array without failing to find one of its values, it's
1165 			 * contained.
1166 			 */
1167 			if (rcont == WJB_END_ARRAY)
1168 				return true;
1169 
1170 			Assert(rcont == WJB_ELEM);
1171 
1172 			if (IsAJsonbScalar(&vcontained))
1173 			{
1174 				if (!findJsonbValueFromContainer((*val)->container,
1175 												 JB_FARRAY,
1176 												 &vcontained))
1177 					return false;
1178 			}
1179 			else
1180 			{
1181 				uint32		i;
1182 
1183 				/*
1184 				 * If this is first container found in rhs array (at this
1185 				 * depth), initialize temp lhs array of containers
1186 				 */
1187 				if (lhsConts == NULL)
1188 				{
1189 					uint32		j = 0;
1190 
1191 					/* Make room for all possible values */
1192 					lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1193 
1194 					for (i = 0; i < nLhsElems; i++)
1195 					{
1196 						/* Store all lhs elements in temp array */
1197 						rcont = JsonbIteratorNext(val, &vval, true);
1198 						Assert(rcont == WJB_ELEM);
1199 
1200 						if (vval.type == jbvBinary)
1201 							lhsConts[j++] = vval;
1202 					}
1203 
1204 					/* No container elements in temp array, so give up now */
1205 					if (j == 0)
1206 						return false;
1207 
1208 					/* We may have only partially filled array */
1209 					nLhsElems = j;
1210 				}
1211 
1212 				/* XXX: Nested array containment is O(N^2) */
1213 				for (i = 0; i < nLhsElems; i++)
1214 				{
1215 					/* Nested container value (object or array) */
1216 					JsonbIterator *nestval,
1217 							   *nestContained;
1218 					bool		contains;
1219 
1220 					nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1221 					nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1222 
1223 					contains = JsonbDeepContains(&nestval, &nestContained);
1224 
1225 					if (nestval)
1226 						pfree(nestval);
1227 					if (nestContained)
1228 						pfree(nestContained);
1229 					if (contains)
1230 						break;
1231 				}
1232 
1233 				/*
1234 				 * Report rhs container value is not contained if couldn't
1235 				 * match rhs container to *some* lhs cont
1236 				 */
1237 				if (i == nLhsElems)
1238 					return false;
1239 			}
1240 		}
1241 	}
1242 	else
1243 	{
1244 		elog(ERROR, "invalid jsonb container type");
1245 	}
1246 
1247 	elog(ERROR, "unexpectedly fell off end of jsonb container");
1248 	return false;
1249 }
1250 
1251 /*
1252  * Hash a JsonbValue scalar value, mixing the hash value into an existing
1253  * hash provided by the caller.
1254  *
1255  * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1256  * flags.
1257  */
1258 void
JsonbHashScalarValue(const JsonbValue * scalarVal,uint32 * hash)1259 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1260 {
1261 	uint32		tmp;
1262 
1263 	/* Compute hash value for scalarVal */
1264 	switch (scalarVal->type)
1265 	{
1266 		case jbvNull:
1267 			tmp = 0x01;
1268 			break;
1269 		case jbvString:
1270 			tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1271 										  scalarVal->val.string.len));
1272 			break;
1273 		case jbvNumeric:
1274 			/* Must hash equal numerics to equal hash codes */
1275 			tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1276 													 NumericGetDatum(scalarVal->val.numeric)));
1277 			break;
1278 		case jbvBool:
1279 			tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1280 
1281 			break;
1282 		default:
1283 			elog(ERROR, "invalid jsonb scalar type");
1284 			tmp = 0;			/* keep compiler quiet */
1285 			break;
1286 	}
1287 
1288 	/*
1289 	 * Combine hash values of successive keys, values and elements by rotating
1290 	 * the previous value left 1 bit, then XOR'ing in the new
1291 	 * key/value/element's hash value.
1292 	 */
1293 	*hash = (*hash << 1) | (*hash >> 31);
1294 	*hash ^= tmp;
1295 }
1296 
1297 /*
1298  * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
1299  * JsonbHashScalarValue.
1300  */
1301 void
JsonbHashScalarValueExtended(const JsonbValue * scalarVal,uint64 * hash,uint64 seed)1302 JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
1303 							 uint64 seed)
1304 {
1305 	uint64		tmp;
1306 
1307 	switch (scalarVal->type)
1308 	{
1309 		case jbvNull:
1310 			tmp = seed + 0x01;
1311 			break;
1312 		case jbvString:
1313 			tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
1314 												   scalarVal->val.string.len,
1315 												   seed));
1316 			break;
1317 		case jbvNumeric:
1318 			tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
1319 													 NumericGetDatum(scalarVal->val.numeric),
1320 													 UInt64GetDatum(seed)));
1321 			break;
1322 		case jbvBool:
1323 			if (seed)
1324 				tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
1325 														 BoolGetDatum(scalarVal->val.boolean),
1326 														 UInt64GetDatum(seed)));
1327 			else
1328 				tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1329 
1330 			break;
1331 		default:
1332 			elog(ERROR, "invalid jsonb scalar type");
1333 			break;
1334 	}
1335 
1336 	*hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
1337 	*hash ^= tmp;
1338 }
1339 
1340 /*
1341  * Are two scalar JsonbValues of the same type a and b equal?
1342  */
1343 static bool
equalsJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1344 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1345 {
1346 	if (aScalar->type == bScalar->type)
1347 	{
1348 		switch (aScalar->type)
1349 		{
1350 			case jbvNull:
1351 				return true;
1352 			case jbvString:
1353 				return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1354 			case jbvNumeric:
1355 				return DatumGetBool(DirectFunctionCall2(numeric_eq,
1356 														PointerGetDatum(aScalar->val.numeric),
1357 														PointerGetDatum(bScalar->val.numeric)));
1358 			case jbvBool:
1359 				return aScalar->val.boolean == bScalar->val.boolean;
1360 
1361 			default:
1362 				elog(ERROR, "invalid jsonb scalar type");
1363 		}
1364 	}
1365 	elog(ERROR, "jsonb scalar type mismatch");
1366 	return false;
1367 }
1368 
1369 /*
1370  * Compare two scalar JsonbValues, returning -1, 0, or 1.
1371  *
1372  * Strings are compared using the default collation.  Used by B-tree
1373  * operators, where a lexical sort order is generally expected.
1374  */
1375 static int
compareJsonbScalarValue(JsonbValue * aScalar,JsonbValue * bScalar)1376 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1377 {
1378 	if (aScalar->type == bScalar->type)
1379 	{
1380 		switch (aScalar->type)
1381 		{
1382 			case jbvNull:
1383 				return 0;
1384 			case jbvString:
1385 				return varstr_cmp(aScalar->val.string.val,
1386 								  aScalar->val.string.len,
1387 								  bScalar->val.string.val,
1388 								  bScalar->val.string.len,
1389 								  DEFAULT_COLLATION_OID);
1390 			case jbvNumeric:
1391 				return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1392 														 PointerGetDatum(aScalar->val.numeric),
1393 														 PointerGetDatum(bScalar->val.numeric)));
1394 			case jbvBool:
1395 				if (aScalar->val.boolean == bScalar->val.boolean)
1396 					return 0;
1397 				else if (aScalar->val.boolean > bScalar->val.boolean)
1398 					return 1;
1399 				else
1400 					return -1;
1401 			default:
1402 				elog(ERROR, "invalid jsonb scalar type");
1403 		}
1404 	}
1405 	elog(ERROR, "jsonb scalar type mismatch");
1406 	return -1;
1407 }
1408 
1409 
1410 /*
1411  * Functions for manipulating the resizable buffer used by convertJsonb and
1412  * its subroutines.
1413  */
1414 
1415 /*
1416  * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1417  * Returns the offset to the reserved area. The caller is expected to fill
1418  * the reserved area later with copyToBuffer().
1419  */
1420 static int
reserveFromBuffer(StringInfo buffer,int len)1421 reserveFromBuffer(StringInfo buffer, int len)
1422 {
1423 	int			offset;
1424 
1425 	/* Make more room if needed */
1426 	enlargeStringInfo(buffer, len);
1427 
1428 	/* remember current offset */
1429 	offset = buffer->len;
1430 
1431 	/* reserve the space */
1432 	buffer->len += len;
1433 
1434 	/*
1435 	 * Keep a trailing null in place, even though it's not useful for us; it
1436 	 * seems best to preserve the invariants of StringInfos.
1437 	 */
1438 	buffer->data[buffer->len] = '\0';
1439 
1440 	return offset;
1441 }
1442 
1443 /*
1444  * Copy 'len' bytes to a previously reserved area in buffer.
1445  */
1446 static void
copyToBuffer(StringInfo buffer,int offset,const char * data,int len)1447 copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1448 {
1449 	memcpy(buffer->data + offset, data, len);
1450 }
1451 
1452 /*
1453  * A shorthand for reserveFromBuffer + copyToBuffer.
1454  */
1455 static void
appendToBuffer(StringInfo buffer,const char * data,int len)1456 appendToBuffer(StringInfo buffer, const char *data, int len)
1457 {
1458 	int			offset;
1459 
1460 	offset = reserveFromBuffer(buffer, len);
1461 	copyToBuffer(buffer, offset, data, len);
1462 }
1463 
1464 
1465 /*
1466  * Append padding, so that the length of the StringInfo is int-aligned.
1467  * Returns the number of padding bytes appended.
1468  */
1469 static short
padBufferToInt(StringInfo buffer)1470 padBufferToInt(StringInfo buffer)
1471 {
1472 	int			padlen,
1473 				p,
1474 				offset;
1475 
1476 	padlen = INTALIGN(buffer->len) - buffer->len;
1477 
1478 	offset = reserveFromBuffer(buffer, padlen);
1479 
1480 	/* padlen must be small, so this is probably faster than a memset */
1481 	for (p = 0; p < padlen; p++)
1482 		buffer->data[offset + p] = '\0';
1483 
1484 	return padlen;
1485 }
1486 
1487 /*
1488  * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1489  */
1490 static Jsonb *
convertToJsonb(JsonbValue * val)1491 convertToJsonb(JsonbValue *val)
1492 {
1493 	StringInfoData buffer;
1494 	JEntry		jentry;
1495 	Jsonb	   *res;
1496 
1497 	/* Should not already have binary representation */
1498 	Assert(val->type != jbvBinary);
1499 
1500 	/* Allocate an output buffer. It will be enlarged as needed */
1501 	initStringInfo(&buffer);
1502 
1503 	/* Make room for the varlena header */
1504 	reserveFromBuffer(&buffer, VARHDRSZ);
1505 
1506 	convertJsonbValue(&buffer, &jentry, val, 0);
1507 
1508 	/*
1509 	 * Note: the JEntry of the root is discarded. Therefore the root
1510 	 * JsonbContainer struct must contain enough information to tell what kind
1511 	 * of value it is.
1512 	 */
1513 
1514 	res = (Jsonb *) buffer.data;
1515 
1516 	SET_VARSIZE(res, buffer.len);
1517 
1518 	return res;
1519 }
1520 
1521 /*
1522  * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1523  *
1524  * The JEntry header for this node is returned in *header.  It is filled in
1525  * with the length of this value and appropriate type bits.  If we wish to
1526  * store an end offset rather than a length, it is the caller's responsibility
1527  * to adjust for that.
1528  *
1529  * If the value is an array or an object, this recurses. 'level' is only used
1530  * for debugging purposes.
1531  */
1532 static void
convertJsonbValue(StringInfo buffer,JEntry * header,JsonbValue * val,int level)1533 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1534 {
1535 	check_stack_depth();
1536 
1537 	if (!val)
1538 		return;
1539 
1540 	/*
1541 	 * A JsonbValue passed as val should never have a type of jbvBinary, and
1542 	 * neither should any of its sub-components. Those values will be produced
1543 	 * by convertJsonbArray and convertJsonbObject, the results of which will
1544 	 * not be passed back to this function as an argument.
1545 	 */
1546 
1547 	if (IsAJsonbScalar(val))
1548 		convertJsonbScalar(buffer, header, val);
1549 	else if (val->type == jbvArray)
1550 		convertJsonbArray(buffer, header, val, level);
1551 	else if (val->type == jbvObject)
1552 		convertJsonbObject(buffer, header, val, level);
1553 	else
1554 		elog(ERROR, "unknown type of jsonb container to convert");
1555 }
1556 
1557 static void
convertJsonbArray(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1558 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1559 {
1560 	int			base_offset;
1561 	int			jentry_offset;
1562 	int			i;
1563 	int			totallen;
1564 	uint32		header;
1565 	int			nElems = val->val.array.nElems;
1566 
1567 	/* Remember where in the buffer this array starts. */
1568 	base_offset = buffer->len;
1569 
1570 	/* Align to 4-byte boundary (any padding counts as part of my data) */
1571 	padBufferToInt(buffer);
1572 
1573 	/*
1574 	 * Construct the header Jentry and store it in the beginning of the
1575 	 * variable-length payload.
1576 	 */
1577 	header = nElems | JB_FARRAY;
1578 	if (val->val.array.rawScalar)
1579 	{
1580 		Assert(nElems == 1);
1581 		Assert(level == 0);
1582 		header |= JB_FSCALAR;
1583 	}
1584 
1585 	appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1586 
1587 	/* Reserve space for the JEntries of the elements. */
1588 	jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1589 
1590 	totallen = 0;
1591 	for (i = 0; i < nElems; i++)
1592 	{
1593 		JsonbValue *elem = &val->val.array.elems[i];
1594 		int			len;
1595 		JEntry		meta;
1596 
1597 		/*
1598 		 * Convert element, producing a JEntry and appending its
1599 		 * variable-length data to buffer
1600 		 */
1601 		convertJsonbValue(buffer, &meta, elem, level + 1);
1602 
1603 		len = JBE_OFFLENFLD(meta);
1604 		totallen += len;
1605 
1606 		/*
1607 		 * Bail out if total variable-length data exceeds what will fit in a
1608 		 * JEntry length field.  We check this in each iteration, not just
1609 		 * once at the end, to forestall possible integer overflow.
1610 		 */
1611 		if (totallen > JENTRY_OFFLENMASK)
1612 			ereport(ERROR,
1613 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1614 					 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1615 							JENTRY_OFFLENMASK)));
1616 
1617 		/*
1618 		 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1619 		 */
1620 		if ((i % JB_OFFSET_STRIDE) == 0)
1621 			meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1622 
1623 		copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1624 		jentry_offset += sizeof(JEntry);
1625 	}
1626 
1627 	/* Total data size is everything we've appended to buffer */
1628 	totallen = buffer->len - base_offset;
1629 
1630 	/* Check length again, since we didn't include the metadata above */
1631 	if (totallen > JENTRY_OFFLENMASK)
1632 		ereport(ERROR,
1633 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1634 				 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1635 						JENTRY_OFFLENMASK)));
1636 
1637 	/* Initialize the header of this node in the container's JEntry array */
1638 	*pheader = JENTRY_ISCONTAINER | totallen;
1639 }
1640 
1641 static void
convertJsonbObject(StringInfo buffer,JEntry * pheader,JsonbValue * val,int level)1642 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1643 {
1644 	int			base_offset;
1645 	int			jentry_offset;
1646 	int			i;
1647 	int			totallen;
1648 	uint32		header;
1649 	int			nPairs = val->val.object.nPairs;
1650 
1651 	/* Remember where in the buffer this object starts. */
1652 	base_offset = buffer->len;
1653 
1654 	/* Align to 4-byte boundary (any padding counts as part of my data) */
1655 	padBufferToInt(buffer);
1656 
1657 	/*
1658 	 * Construct the header Jentry and store it in the beginning of the
1659 	 * variable-length payload.
1660 	 */
1661 	header = nPairs | JB_FOBJECT;
1662 	appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1663 
1664 	/* Reserve space for the JEntries of the keys and values. */
1665 	jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1666 
1667 	/*
1668 	 * Iterate over the keys, then over the values, since that is the ordering
1669 	 * we want in the on-disk representation.
1670 	 */
1671 	totallen = 0;
1672 	for (i = 0; i < nPairs; i++)
1673 	{
1674 		JsonbPair  *pair = &val->val.object.pairs[i];
1675 		int			len;
1676 		JEntry		meta;
1677 
1678 		/*
1679 		 * Convert key, producing a JEntry and appending its variable-length
1680 		 * data to buffer
1681 		 */
1682 		convertJsonbScalar(buffer, &meta, &pair->key);
1683 
1684 		len = JBE_OFFLENFLD(meta);
1685 		totallen += len;
1686 
1687 		/*
1688 		 * Bail out if total variable-length data exceeds what will fit in a
1689 		 * JEntry length field.  We check this in each iteration, not just
1690 		 * once at the end, to forestall possible integer overflow.
1691 		 */
1692 		if (totallen > JENTRY_OFFLENMASK)
1693 			ereport(ERROR,
1694 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1695 					 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1696 							JENTRY_OFFLENMASK)));
1697 
1698 		/*
1699 		 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1700 		 */
1701 		if ((i % JB_OFFSET_STRIDE) == 0)
1702 			meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1703 
1704 		copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1705 		jentry_offset += sizeof(JEntry);
1706 	}
1707 	for (i = 0; i < nPairs; i++)
1708 	{
1709 		JsonbPair  *pair = &val->val.object.pairs[i];
1710 		int			len;
1711 		JEntry		meta;
1712 
1713 		/*
1714 		 * Convert value, producing a JEntry and appending its variable-length
1715 		 * data to buffer
1716 		 */
1717 		convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1718 
1719 		len = JBE_OFFLENFLD(meta);
1720 		totallen += len;
1721 
1722 		/*
1723 		 * Bail out if total variable-length data exceeds what will fit in a
1724 		 * JEntry length field.  We check this in each iteration, not just
1725 		 * once at the end, to forestall possible integer overflow.
1726 		 */
1727 		if (totallen > JENTRY_OFFLENMASK)
1728 			ereport(ERROR,
1729 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1730 					 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1731 							JENTRY_OFFLENMASK)));
1732 
1733 		/*
1734 		 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1735 		 */
1736 		if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1737 			meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1738 
1739 		copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1740 		jentry_offset += sizeof(JEntry);
1741 	}
1742 
1743 	/* Total data size is everything we've appended to buffer */
1744 	totallen = buffer->len - base_offset;
1745 
1746 	/* Check length again, since we didn't include the metadata above */
1747 	if (totallen > JENTRY_OFFLENMASK)
1748 		ereport(ERROR,
1749 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1750 				 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1751 						JENTRY_OFFLENMASK)));
1752 
1753 	/* Initialize the header of this node in the container's JEntry array */
1754 	*pheader = JENTRY_ISCONTAINER | totallen;
1755 }
1756 
1757 static void
convertJsonbScalar(StringInfo buffer,JEntry * jentry,JsonbValue * scalarVal)1758 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1759 {
1760 	int			numlen;
1761 	short		padlen;
1762 
1763 	switch (scalarVal->type)
1764 	{
1765 		case jbvNull:
1766 			*jentry = JENTRY_ISNULL;
1767 			break;
1768 
1769 		case jbvString:
1770 			appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1771 
1772 			*jentry = scalarVal->val.string.len;
1773 			break;
1774 
1775 		case jbvNumeric:
1776 			numlen = VARSIZE_ANY(scalarVal->val.numeric);
1777 			padlen = padBufferToInt(buffer);
1778 
1779 			appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1780 
1781 			*jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1782 			break;
1783 
1784 		case jbvBool:
1785 			*jentry = (scalarVal->val.boolean) ?
1786 				JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1787 			break;
1788 
1789 		case jbvDatetime:
1790 			{
1791 				char		buf[MAXDATELEN + 1];
1792 				size_t		len;
1793 
1794 				JsonEncodeDateTime(buf,
1795 								   scalarVal->val.datetime.value,
1796 								   scalarVal->val.datetime.typid,
1797 								   &scalarVal->val.datetime.tz);
1798 				len = strlen(buf);
1799 				appendToBuffer(buffer, buf, len);
1800 
1801 				*jentry = len;
1802 			}
1803 			break;
1804 
1805 		default:
1806 			elog(ERROR, "invalid jsonb scalar type");
1807 	}
1808 }
1809 
1810 /*
1811  * Compare two jbvString JsonbValue values, a and b.
1812  *
1813  * This is a special qsort() comparator used to sort strings in certain
1814  * internal contexts where it is sufficient to have a well-defined sort order.
1815  * In particular, object pair keys are sorted according to this criteria to
1816  * facilitate cheap binary searches where we don't care about lexical sort
1817  * order.
1818  *
1819  * a and b are first sorted based on their length.  If a tie-breaker is
1820  * required, only then do we consider string binary equality.
1821  */
1822 static int
lengthCompareJsonbStringValue(const void * a,const void * b)1823 lengthCompareJsonbStringValue(const void *a, const void *b)
1824 {
1825 	const JsonbValue *va = (const JsonbValue *) a;
1826 	const JsonbValue *vb = (const JsonbValue *) b;
1827 
1828 	Assert(va->type == jbvString);
1829 	Assert(vb->type == jbvString);
1830 
1831 	return lengthCompareJsonbString(va->val.string.val, va->val.string.len,
1832 									vb->val.string.val, vb->val.string.len);
1833 }
1834 
1835 /*
1836  * Subroutine for lengthCompareJsonbStringValue
1837  *
1838  * This is also useful separately to implement binary search on
1839  * JsonbContainers.
1840  */
1841 static int
lengthCompareJsonbString(const char * val1,int len1,const char * val2,int len2)1842 lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2)
1843 {
1844 	if (len1 == len2)
1845 		return memcmp(val1, val2, len1);
1846 	else
1847 		return len1 > len2 ? 1 : -1;
1848 }
1849 
1850 /*
1851  * qsort_arg() comparator to compare JsonbPair values.
1852  *
1853  * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1854  * to true iff a and b have full binary equality, since some callers have an
1855  * interest in whether the two values are equal or merely equivalent.
1856  *
1857  * N.B: String comparisons here are "length-wise"
1858  *
1859  * Pairs with equals keys are ordered such that the order field is respected.
1860  */
1861 static int
lengthCompareJsonbPair(const void * a,const void * b,void * binequal)1862 lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1863 {
1864 	const JsonbPair *pa = (const JsonbPair *) a;
1865 	const JsonbPair *pb = (const JsonbPair *) b;
1866 	int			res;
1867 
1868 	res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1869 	if (res == 0 && binequal)
1870 		*((bool *) binequal) = true;
1871 
1872 	/*
1873 	 * Guarantee keeping order of equal pair.  Unique algorithm will prefer
1874 	 * first element as value.
1875 	 */
1876 	if (res == 0)
1877 		res = (pa->order > pb->order) ? -1 : 1;
1878 
1879 	return res;
1880 }
1881 
1882 /*
1883  * Sort and unique-ify pairs in JsonbValue object
1884  */
1885 static void
uniqueifyJsonbObject(JsonbValue * object)1886 uniqueifyJsonbObject(JsonbValue *object)
1887 {
1888 	bool		hasNonUniq = false;
1889 
1890 	Assert(object->type == jbvObject);
1891 
1892 	if (object->val.object.nPairs > 1)
1893 		qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1894 				  lengthCompareJsonbPair, &hasNonUniq);
1895 
1896 	if (hasNonUniq)
1897 	{
1898 		JsonbPair  *ptr = object->val.object.pairs + 1,
1899 				   *res = object->val.object.pairs;
1900 
1901 		while (ptr - object->val.object.pairs < object->val.object.nPairs)
1902 		{
1903 			/* Avoid copying over duplicate */
1904 			if (lengthCompareJsonbStringValue(ptr, res) != 0)
1905 			{
1906 				res++;
1907 				if (ptr != res)
1908 					memcpy(res, ptr, sizeof(JsonbPair));
1909 			}
1910 			ptr++;
1911 		}
1912 
1913 		object->val.object.nPairs = res + 1 - object->val.object.pairs;
1914 	}
1915 }
1916