1 /*-------------------------------------------------------------------------
2  *
3  * jsonb_gin.c
4  *	 GIN support functions for jsonb
5  *
6  * Copyright (c) 2014-2020, PostgreSQL Global Development Group
7  *
8  * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops.
9  * For their description see json.sgml and comments in jsonb.h.
10  *
11  * The operators support, among the others, "jsonb @? jsonpath" and
12  * "jsonb @@ jsonpath".  Expressions containing these operators are easily
13  * expressed through each other.
14  *
15  *	jb @? 'path' <=> jb @@ 'EXISTS(path)'
16  *	jb @@ 'expr' <=> jb @? '$ ? (expr)'
17  *
18  * Thus, we're going to consider only @@ operator, while regarding @? operator
19  * the same is true for jb @@ 'EXISTS(path)'.
20  *
21  * Result of jsonpath query extraction is a tree, which leaf nodes are index
22  * entries and non-leaf nodes are AND/OR logical expressions.  Basically we
23  * extract following statements out of jsonpath:
24  *
25  *	1) "accessors_chain = const",
26  *	2) "EXISTS(accessors_chain)".
27  *
28  * Accessors chain may consist of .key, [*] and [index] accessors.  jsonb_ops
29  * additionally supports .* and .**.
30  *
31  * For now, both jsonb_ops and jsonb_path_ops supports only statements of
32  * the 1st find.  jsonb_ops might also support statements of the 2nd kind,
33  * but given we have no statistics keys extracted from accessors chain
34  * are likely non-selective.  Therefore, we choose to not confuse optimizer
35  * and skip statements of the 2nd kind altogether.  In future versions that
36  * might be changed.
37  *
38  * In jsonb_ops statement of the 1st kind is split into expression of AND'ed
39  * keys and const.  Sometimes const might be interpreted as both value or key
40  * in jsonb_ops.  Then statement of 1st kind is decomposed into the expression
41  * below.
42  *
43  *	key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key)
44  *
45  * jsonb_path_ops transforms each statement of the 1st kind into single hash
46  * entry below.
47  *
48  *	HASH(key1, key2, ... , keyN, const)
49  *
50  * Despite statements of the 2nd kind are not supported by both jsonb_ops and
51  * jsonb_path_ops, EXISTS(path) expressions might be still supported,
52  * when statements of 1st kind could be extracted out of their filters.
53  *
54  * IDENTIFICATION
55  *	  src/backend/utils/adt/jsonb_gin.c
56  *
57  *-------------------------------------------------------------------------
58  */
59 
60 #include "postgres.h"
61 
62 #include "access/gin.h"
63 #include "access/stratnum.h"
64 #include "catalog/pg_collation.h"
65 #include "catalog/pg_type.h"
66 #include "common/hashfn.h"
67 #include "miscadmin.h"
68 #include "utils/builtins.h"
69 #include "utils/jsonb.h"
70 #include "utils/jsonpath.h"
71 #include "utils/varlena.h"
72 
73 typedef struct PathHashStack
74 {
75 	uint32		hash;
76 	struct PathHashStack *parent;
77 } PathHashStack;
78 
79 /* Buffer for GIN entries */
80 typedef struct GinEntries
81 {
82 	Datum	   *buf;
83 	int			count;
84 	int			allocated;
85 } GinEntries;
86 
87 typedef enum JsonPathGinNodeType
88 {
89 	JSP_GIN_OR,
90 	JSP_GIN_AND,
91 	JSP_GIN_ENTRY
92 } JsonPathGinNodeType;
93 
94 typedef struct JsonPathGinNode JsonPathGinNode;
95 
96 /* Node in jsonpath expression tree */
97 struct JsonPathGinNode
98 {
99 	JsonPathGinNodeType type;
100 	union
101 	{
102 		int			nargs;		/* valid for OR and AND nodes */
103 		int			entryIndex; /* index in GinEntries array, valid for ENTRY
104 								 * nodes after entries output */
105 		Datum		entryDatum; /* path hash or key name/scalar, valid for
106 								 * ENTRY nodes before entries output */
107 	}			val;
108 	JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER];	/* valid for OR and AND
109 													 * nodes */
110 };
111 
112 /*
113  * jsonb_ops entry extracted from jsonpath item.  Corresponding path item
114  * may be: '.key', '.*', '.**', '[index]' or '[*]'.
115  * Entry type is stored in 'type' field.
116  */
117 typedef struct JsonPathGinPathItem
118 {
119 	struct JsonPathGinPathItem *parent;
120 	Datum		keyName;		/* key name (for '.key' path item) or NULL */
121 	JsonPathItemType type;		/* type of jsonpath item */
122 } JsonPathGinPathItem;
123 
124 /* GIN representation of the extracted json path */
125 typedef union JsonPathGinPath
126 {
127 	JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */
128 	uint32		hash;			/* hash of the path (jsonb_path_ops) */
129 } JsonPathGinPath;
130 
131 typedef struct JsonPathGinContext JsonPathGinContext;
132 
133 /* Callback, which stores information about path item into JsonPathGinPath */
134 typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path,
135 											JsonPathItem *jsp);
136 
137 /*
138  * Callback, which extracts set of nodes from statement of 1st kind
139  * (scalar != NULL) or statement of 2nd kind (scalar == NULL).
140  */
141 typedef List *(*JsonPathGinExtractNodesFunc) (JsonPathGinContext *cxt,
142 											  JsonPathGinPath path,
143 											  JsonbValue *scalar,
144 											  List *nodes);
145 
146 /* Context for jsonpath entries extraction */
147 struct JsonPathGinContext
148 {
149 	JsonPathGinAddPathItemFunc add_path_item;
150 	JsonPathGinExtractNodesFunc extract_nodes;
151 	bool		lax;
152 };
153 
154 static Datum make_text_key(char flag, const char *str, int len);
155 static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);
156 
157 static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt,
158 											  JsonPathGinPath path, JsonPathItem *jsp, bool not);
159 
160 
161 /* Initialize GinEntries struct */
162 static void
init_gin_entries(GinEntries * entries,int preallocated)163 init_gin_entries(GinEntries *entries, int preallocated)
164 {
165 	entries->allocated = preallocated;
166 	entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL;
167 	entries->count = 0;
168 }
169 
170 /* Add new entry to GinEntries */
171 static int
add_gin_entry(GinEntries * entries,Datum entry)172 add_gin_entry(GinEntries *entries, Datum entry)
173 {
174 	int			id = entries->count;
175 
176 	if (entries->count >= entries->allocated)
177 	{
178 		if (entries->allocated)
179 		{
180 			entries->allocated *= 2;
181 			entries->buf = repalloc(entries->buf,
182 									sizeof(Datum) * entries->allocated);
183 		}
184 		else
185 		{
186 			entries->allocated = 8;
187 			entries->buf = palloc(sizeof(Datum) * entries->allocated);
188 		}
189 	}
190 
191 	entries->buf[entries->count++] = entry;
192 
193 	return id;
194 }
195 
196 /*
197  *
198  * jsonb_ops GIN opclass support functions
199  *
200  */
201 
202 Datum
gin_compare_jsonb(PG_FUNCTION_ARGS)203 gin_compare_jsonb(PG_FUNCTION_ARGS)
204 {
205 	text	   *arg1 = PG_GETARG_TEXT_PP(0);
206 	text	   *arg2 = PG_GETARG_TEXT_PP(1);
207 	int32		result;
208 	char	   *a1p,
209 			   *a2p;
210 	int			len1,
211 				len2;
212 
213 	a1p = VARDATA_ANY(arg1);
214 	a2p = VARDATA_ANY(arg2);
215 
216 	len1 = VARSIZE_ANY_EXHDR(arg1);
217 	len2 = VARSIZE_ANY_EXHDR(arg2);
218 
219 	/* Compare text as bttextcmp does, but always using C collation */
220 	result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);
221 
222 	PG_FREE_IF_COPY(arg1, 0);
223 	PG_FREE_IF_COPY(arg2, 1);
224 
225 	PG_RETURN_INT32(result);
226 }
227 
228 Datum
gin_extract_jsonb(PG_FUNCTION_ARGS)229 gin_extract_jsonb(PG_FUNCTION_ARGS)
230 {
231 	Jsonb	   *jb = (Jsonb *) PG_GETARG_JSONB_P(0);
232 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
233 	int			total = JB_ROOT_COUNT(jb);
234 	JsonbIterator *it;
235 	JsonbValue	v;
236 	JsonbIteratorToken r;
237 	GinEntries	entries;
238 
239 	/* If the root level is empty, we certainly have no keys */
240 	if (total == 0)
241 	{
242 		*nentries = 0;
243 		PG_RETURN_POINTER(NULL);
244 	}
245 
246 	/* Otherwise, use 2 * root count as initial estimate of result size */
247 	init_gin_entries(&entries, 2 * total);
248 
249 	it = JsonbIteratorInit(&jb->root);
250 
251 	while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
252 	{
253 		switch (r)
254 		{
255 			case WJB_KEY:
256 				add_gin_entry(&entries, make_scalar_key(&v, true));
257 				break;
258 			case WJB_ELEM:
259 				/* Pretend string array elements are keys, see jsonb.h */
260 				add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString));
261 				break;
262 			case WJB_VALUE:
263 				add_gin_entry(&entries, make_scalar_key(&v, false));
264 				break;
265 			default:
266 				/* we can ignore structural items */
267 				break;
268 		}
269 	}
270 
271 	*nentries = entries.count;
272 
273 	PG_RETURN_POINTER(entries.buf);
274 }
275 
276 /* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */
277 static bool
jsonb_ops__add_path_item(JsonPathGinPath * path,JsonPathItem * jsp)278 jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp)
279 {
280 	JsonPathGinPathItem *pentry;
281 	Datum		keyName;
282 
283 	switch (jsp->type)
284 	{
285 		case jpiRoot:
286 			path->items = NULL; /* reset path */
287 			return true;
288 
289 		case jpiKey:
290 			{
291 				int			len;
292 				char	   *key = jspGetString(jsp, &len);
293 
294 				keyName = make_text_key(JGINFLAG_KEY, key, len);
295 				break;
296 			}
297 
298 		case jpiAny:
299 		case jpiAnyKey:
300 		case jpiAnyArray:
301 		case jpiIndexArray:
302 			keyName = PointerGetDatum(NULL);
303 			break;
304 
305 		default:
306 			/* other path items like item methods are not supported */
307 			return false;
308 	}
309 
310 	pentry = palloc(sizeof(*pentry));
311 
312 	pentry->type = jsp->type;
313 	pentry->keyName = keyName;
314 	pentry->parent = path->items;
315 
316 	path->items = pentry;
317 
318 	return true;
319 }
320 
321 /* Combine existing path hash with next key hash (jsonb_path_ops) */
322 static bool
jsonb_path_ops__add_path_item(JsonPathGinPath * path,JsonPathItem * jsp)323 jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp)
324 {
325 	switch (jsp->type)
326 	{
327 		case jpiRoot:
328 			path->hash = 0;		/* reset path hash */
329 			return true;
330 
331 		case jpiKey:
332 			{
333 				JsonbValue	jbv;
334 
335 				jbv.type = jbvString;
336 				jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len);
337 
338 				JsonbHashScalarValue(&jbv, &path->hash);
339 				return true;
340 			}
341 
342 		case jpiIndexArray:
343 		case jpiAnyArray:
344 			return true;		/* path hash is unchanged */
345 
346 		default:
347 			/* other items (wildcard paths, item methods) are not supported */
348 			return false;
349 	}
350 }
351 
352 static JsonPathGinNode *
make_jsp_entry_node(Datum entry)353 make_jsp_entry_node(Datum entry)
354 {
355 	JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args));
356 
357 	node->type = JSP_GIN_ENTRY;
358 	node->val.entryDatum = entry;
359 
360 	return node;
361 }
362 
363 static JsonPathGinNode *
make_jsp_entry_node_scalar(JsonbValue * scalar,bool iskey)364 make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey)
365 {
366 	return make_jsp_entry_node(make_scalar_key(scalar, iskey));
367 }
368 
369 static JsonPathGinNode *
make_jsp_expr_node(JsonPathGinNodeType type,int nargs)370 make_jsp_expr_node(JsonPathGinNodeType type, int nargs)
371 {
372 	JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) +
373 								   sizeof(node->args[0]) * nargs);
374 
375 	node->type = type;
376 	node->val.nargs = nargs;
377 
378 	return node;
379 }
380 
381 static JsonPathGinNode *
make_jsp_expr_node_args(JsonPathGinNodeType type,List * args)382 make_jsp_expr_node_args(JsonPathGinNodeType type, List *args)
383 {
384 	JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args));
385 	ListCell   *lc;
386 	int			i = 0;
387 
388 	foreach(lc, args)
389 		node->args[i++] = lfirst(lc);
390 
391 	return node;
392 }
393 
394 static JsonPathGinNode *
make_jsp_expr_node_binary(JsonPathGinNodeType type,JsonPathGinNode * arg1,JsonPathGinNode * arg2)395 make_jsp_expr_node_binary(JsonPathGinNodeType type,
396 						  JsonPathGinNode *arg1, JsonPathGinNode *arg2)
397 {
398 	JsonPathGinNode *node = make_jsp_expr_node(type, 2);
399 
400 	node->args[0] = arg1;
401 	node->args[1] = arg2;
402 
403 	return node;
404 }
405 
406 /* Append a list of nodes from the jsonpath (jsonb_ops). */
407 static List *
jsonb_ops__extract_nodes(JsonPathGinContext * cxt,JsonPathGinPath path,JsonbValue * scalar,List * nodes)408 jsonb_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
409 						 JsonbValue *scalar, List *nodes)
410 {
411 	JsonPathGinPathItem *pentry;
412 
413 	if (scalar)
414 	{
415 		JsonPathGinNode *node;
416 
417 		/*
418 		 * Append path entry nodes only if scalar is provided.  See header
419 		 * comment for details.
420 		 */
421 		for (pentry = path.items; pentry; pentry = pentry->parent)
422 		{
423 			if (pentry->type == jpiKey) /* only keys are indexed */
424 				nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName));
425 		}
426 
427 		/* Append scalar node for equality queries. */
428 		if (scalar->type == jbvString)
429 		{
430 			JsonPathGinPathItem *last = path.items;
431 			GinTernaryValue key_entry;
432 
433 			/*
434 			 * Assuming that jsonb_ops interprets string array elements as
435 			 * keys, we may extract key or non-key entry or even both.  In the
436 			 * latter case we create OR-node.  It is possible in lax mode
437 			 * where arrays are automatically unwrapped, or in strict mode for
438 			 * jpiAny items.
439 			 */
440 
441 			if (cxt->lax)
442 				key_entry = GIN_MAYBE;
443 			else if (!last)		/* root ($) */
444 				key_entry = GIN_FALSE;
445 			else if (last->type == jpiAnyArray || last->type == jpiIndexArray)
446 				key_entry = GIN_TRUE;
447 			else if (last->type == jpiAny)
448 				key_entry = GIN_MAYBE;
449 			else
450 				key_entry = GIN_FALSE;
451 
452 			if (key_entry == GIN_MAYBE)
453 			{
454 				JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true);
455 				JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false);
456 
457 				node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2);
458 			}
459 			else
460 			{
461 				node = make_jsp_entry_node_scalar(scalar,
462 												  key_entry == GIN_TRUE);
463 			}
464 		}
465 		else
466 		{
467 			node = make_jsp_entry_node_scalar(scalar, false);
468 		}
469 
470 		nodes = lappend(nodes, node);
471 	}
472 
473 	return nodes;
474 }
475 
476 /* Append a list of nodes from the jsonpath (jsonb_path_ops). */
477 static List *
jsonb_path_ops__extract_nodes(JsonPathGinContext * cxt,JsonPathGinPath path,JsonbValue * scalar,List * nodes)478 jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
479 							  JsonbValue *scalar, List *nodes)
480 {
481 	if (scalar)
482 	{
483 		/* append path hash node for equality queries */
484 		uint32		hash = path.hash;
485 
486 		JsonbHashScalarValue(scalar, &hash);
487 
488 		return lappend(nodes,
489 					   make_jsp_entry_node(UInt32GetDatum(hash)));
490 	}
491 	else
492 	{
493 		/* jsonb_path_ops doesn't support EXISTS queries => nothing to append */
494 		return nodes;
495 	}
496 }
497 
498 /*
499  * Extract a list of expression nodes that need to be AND-ed by the caller.
500  * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and
501  * 'EXISTS(path)' otherwise.
502  */
503 static List *
extract_jsp_path_expr_nodes(JsonPathGinContext * cxt,JsonPathGinPath path,JsonPathItem * jsp,JsonbValue * scalar)504 extract_jsp_path_expr_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
505 							JsonPathItem *jsp, JsonbValue *scalar)
506 {
507 	JsonPathItem next;
508 	List	   *nodes = NIL;
509 
510 	for (;;)
511 	{
512 		switch (jsp->type)
513 		{
514 			case jpiCurrent:
515 				break;
516 
517 			case jpiFilter:
518 				{
519 					JsonPathItem arg;
520 					JsonPathGinNode *filter;
521 
522 					jspGetArg(jsp, &arg);
523 
524 					filter = extract_jsp_bool_expr(cxt, path, &arg, false);
525 
526 					if (filter)
527 						nodes = lappend(nodes, filter);
528 
529 					break;
530 				}
531 
532 			default:
533 				if (!cxt->add_path_item(&path, jsp))
534 
535 					/*
536 					 * Path is not supported by the index opclass, return only
537 					 * the extracted filter nodes.
538 					 */
539 					return nodes;
540 				break;
541 		}
542 
543 		if (!jspGetNext(jsp, &next))
544 			break;
545 
546 		jsp = &next;
547 	}
548 
549 	/*
550 	 * Append nodes from the path expression itself to the already extracted
551 	 * list of filter nodes.
552 	 */
553 	return cxt->extract_nodes(cxt, path, scalar, nodes);
554 }
555 
556 /*
557  * Extract an expression node from one of following jsonpath path expressions:
558  *   EXISTS(jsp)    (when 'scalar' is NULL)
559  *   jsp == scalar  (when 'scalar' is not NULL).
560  *
561  * The current path (@) is passed in 'path'.
562  */
563 static JsonPathGinNode *
extract_jsp_path_expr(JsonPathGinContext * cxt,JsonPathGinPath path,JsonPathItem * jsp,JsonbValue * scalar)564 extract_jsp_path_expr(JsonPathGinContext *cxt, JsonPathGinPath path,
565 					  JsonPathItem *jsp, JsonbValue *scalar)
566 {
567 	/* extract a list of nodes to be AND-ed */
568 	List	   *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar);
569 
570 	if (list_length(nodes) <= 0)
571 		/* no nodes were extracted => full scan is needed for this path */
572 		return NULL;
573 
574 	if (list_length(nodes) == 1)
575 		return linitial(nodes); /* avoid extra AND-node */
576 
577 	/* construct AND-node for path with filters */
578 	return make_jsp_expr_node_args(JSP_GIN_AND, nodes);
579 }
580 
581 /* Recursively extract nodes from the boolean jsonpath expression. */
582 static JsonPathGinNode *
extract_jsp_bool_expr(JsonPathGinContext * cxt,JsonPathGinPath path,JsonPathItem * jsp,bool not)583 extract_jsp_bool_expr(JsonPathGinContext *cxt, JsonPathGinPath path,
584 					  JsonPathItem *jsp, bool not)
585 {
586 	check_stack_depth();
587 
588 	switch (jsp->type)
589 	{
590 		case jpiAnd:			/* expr && expr */
591 		case jpiOr:				/* expr || expr */
592 			{
593 				JsonPathItem arg;
594 				JsonPathGinNode *larg;
595 				JsonPathGinNode *rarg;
596 				JsonPathGinNodeType type;
597 
598 				jspGetLeftArg(jsp, &arg);
599 				larg = extract_jsp_bool_expr(cxt, path, &arg, not);
600 
601 				jspGetRightArg(jsp, &arg);
602 				rarg = extract_jsp_bool_expr(cxt, path, &arg, not);
603 
604 				if (!larg || !rarg)
605 				{
606 					if (jsp->type == jpiOr)
607 						return NULL;
608 
609 					return larg ? larg : rarg;
610 				}
611 
612 				type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR;
613 
614 				return make_jsp_expr_node_binary(type, larg, rarg);
615 			}
616 
617 		case jpiNot:			/* !expr  */
618 			{
619 				JsonPathItem arg;
620 
621 				jspGetArg(jsp, &arg);
622 
623 				/* extract child expression inverting 'not' flag */
624 				return extract_jsp_bool_expr(cxt, path, &arg, !not);
625 			}
626 
627 		case jpiExists:			/* EXISTS(path) */
628 			{
629 				JsonPathItem arg;
630 
631 				if (not)
632 					return NULL;	/* NOT EXISTS is not supported */
633 
634 				jspGetArg(jsp, &arg);
635 
636 				return extract_jsp_path_expr(cxt, path, &arg, NULL);
637 			}
638 
639 		case jpiNotEqual:
640 
641 			/*
642 			 * 'not' == true case is not supported here because '!(path !=
643 			 * scalar)' is not equivalent to 'path == scalar' in the general
644 			 * case because of sequence comparison semantics: 'path == scalar'
645 			 * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' ===
646 			 * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path
647 			 * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but
648 			 * 'EMPTY(path)' queries are not supported by the both jsonb
649 			 * opclasses.  However in strict mode we could omit 'EMPTY(path)'
650 			 * part if the path can return exactly one item (it does not
651 			 * contain wildcard accessors or item methods like .keyvalue()
652 			 * etc.).
653 			 */
654 			return NULL;
655 
656 		case jpiEqual:			/* path == scalar */
657 			{
658 				JsonPathItem left_item;
659 				JsonPathItem right_item;
660 				JsonPathItem *path_item;
661 				JsonPathItem *scalar_item;
662 				JsonbValue	scalar;
663 
664 				if (not)
665 					return NULL;
666 
667 				jspGetLeftArg(jsp, &left_item);
668 				jspGetRightArg(jsp, &right_item);
669 
670 				if (jspIsScalar(left_item.type))
671 				{
672 					scalar_item = &left_item;
673 					path_item = &right_item;
674 				}
675 				else if (jspIsScalar(right_item.type))
676 				{
677 					scalar_item = &right_item;
678 					path_item = &left_item;
679 				}
680 				else
681 					return NULL;	/* at least one operand should be a scalar */
682 
683 				switch (scalar_item->type)
684 				{
685 					case jpiNull:
686 						scalar.type = jbvNull;
687 						break;
688 					case jpiBool:
689 						scalar.type = jbvBool;
690 						scalar.val.boolean = !!*scalar_item->content.value.data;
691 						break;
692 					case jpiNumeric:
693 						scalar.type = jbvNumeric;
694 						scalar.val.numeric =
695 							(Numeric) scalar_item->content.value.data;
696 						break;
697 					case jpiString:
698 						scalar.type = jbvString;
699 						scalar.val.string.val = scalar_item->content.value.data;
700 						scalar.val.string.len =
701 							scalar_item->content.value.datalen;
702 						break;
703 					default:
704 						elog(ERROR, "invalid scalar jsonpath item type: %d",
705 							 scalar_item->type);
706 						return NULL;
707 				}
708 
709 				return extract_jsp_path_expr(cxt, path, path_item, &scalar);
710 			}
711 
712 		default:
713 			return NULL;		/* not a boolean expression */
714 	}
715 }
716 
717 /* Recursively emit all GIN entries found in the node tree */
718 static void
emit_jsp_gin_entries(JsonPathGinNode * node,GinEntries * entries)719 emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries)
720 {
721 	check_stack_depth();
722 
723 	switch (node->type)
724 	{
725 		case JSP_GIN_ENTRY:
726 			/* replace datum with its index in the array */
727 			node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum);
728 			break;
729 
730 		case JSP_GIN_OR:
731 		case JSP_GIN_AND:
732 			{
733 				int			i;
734 
735 				for (i = 0; i < node->val.nargs; i++)
736 					emit_jsp_gin_entries(node->args[i], entries);
737 
738 				break;
739 			}
740 	}
741 }
742 
743 /*
744  * Recursively extract GIN entries from jsonpath query.
745  * Root expression node is put into (*extra_data)[0].
746  */
747 static Datum *
extract_jsp_query(JsonPath * jp,StrategyNumber strat,bool pathOps,int32 * nentries,Pointer ** extra_data)748 extract_jsp_query(JsonPath *jp, StrategyNumber strat, bool pathOps,
749 				  int32 *nentries, Pointer **extra_data)
750 {
751 	JsonPathGinContext cxt;
752 	JsonPathItem root;
753 	JsonPathGinNode *node;
754 	JsonPathGinPath path = {0};
755 	GinEntries	entries = {0};
756 
757 	cxt.lax = (jp->header & JSONPATH_LAX) != 0;
758 
759 	if (pathOps)
760 	{
761 		cxt.add_path_item = jsonb_path_ops__add_path_item;
762 		cxt.extract_nodes = jsonb_path_ops__extract_nodes;
763 	}
764 	else
765 	{
766 		cxt.add_path_item = jsonb_ops__add_path_item;
767 		cxt.extract_nodes = jsonb_ops__extract_nodes;
768 	}
769 
770 	jspInit(&root, jp);
771 
772 	node = strat == JsonbJsonpathExistsStrategyNumber
773 		? extract_jsp_path_expr(&cxt, path, &root, NULL)
774 		: extract_jsp_bool_expr(&cxt, path, &root, false);
775 
776 	if (!node)
777 	{
778 		*nentries = 0;
779 		return NULL;
780 	}
781 
782 	emit_jsp_gin_entries(node, &entries);
783 
784 	*nentries = entries.count;
785 	if (!*nentries)
786 		return NULL;
787 
788 	*extra_data = palloc0(sizeof(**extra_data) * entries.count);
789 	**extra_data = (Pointer) node;
790 
791 	return entries.buf;
792 }
793 
794 /*
795  * Recursively execute jsonpath expression.
796  * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag.
797  */
798 static GinTernaryValue
execute_jsp_gin_node(JsonPathGinNode * node,void * check,bool ternary)799 execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary)
800 {
801 	GinTernaryValue res;
802 	GinTernaryValue v;
803 	int			i;
804 
805 	switch (node->type)
806 	{
807 		case JSP_GIN_AND:
808 			res = GIN_TRUE;
809 			for (i = 0; i < node->val.nargs; i++)
810 			{
811 				v = execute_jsp_gin_node(node->args[i], check, ternary);
812 				if (v == GIN_FALSE)
813 					return GIN_FALSE;
814 				else if (v == GIN_MAYBE)
815 					res = GIN_MAYBE;
816 			}
817 			return res;
818 
819 		case JSP_GIN_OR:
820 			res = GIN_FALSE;
821 			for (i = 0; i < node->val.nargs; i++)
822 			{
823 				v = execute_jsp_gin_node(node->args[i], check, ternary);
824 				if (v == GIN_TRUE)
825 					return GIN_TRUE;
826 				else if (v == GIN_MAYBE)
827 					res = GIN_MAYBE;
828 			}
829 			return res;
830 
831 		case JSP_GIN_ENTRY:
832 			{
833 				int			index = node->val.entryIndex;
834 
835 				if (ternary)
836 					return ((GinTernaryValue *) check)[index];
837 				else
838 					return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE;
839 			}
840 
841 		default:
842 			elog(ERROR, "invalid jsonpath gin node type: %d", node->type);
843 			return GIN_FALSE;	/* keep compiler quiet */
844 	}
845 }
846 
847 Datum
gin_extract_jsonb_query(PG_FUNCTION_ARGS)848 gin_extract_jsonb_query(PG_FUNCTION_ARGS)
849 {
850 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
851 	StrategyNumber strategy = PG_GETARG_UINT16(2);
852 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
853 	Datum	   *entries;
854 
855 	if (strategy == JsonbContainsStrategyNumber)
856 	{
857 		/* Query is a jsonb, so just apply gin_extract_jsonb... */
858 		entries = (Datum *)
859 			DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
860 												PG_GETARG_DATUM(0),
861 												PointerGetDatum(nentries)));
862 		/* ...although "contains {}" requires a full index scan */
863 		if (*nentries == 0)
864 			*searchMode = GIN_SEARCH_MODE_ALL;
865 	}
866 	else if (strategy == JsonbExistsStrategyNumber)
867 	{
868 		/* Query is a text string, which we treat as a key */
869 		text	   *query = PG_GETARG_TEXT_PP(0);
870 
871 		*nentries = 1;
872 		entries = (Datum *) palloc(sizeof(Datum));
873 		entries[0] = make_text_key(JGINFLAG_KEY,
874 								   VARDATA_ANY(query),
875 								   VARSIZE_ANY_EXHDR(query));
876 	}
877 	else if (strategy == JsonbExistsAnyStrategyNumber ||
878 			 strategy == JsonbExistsAllStrategyNumber)
879 	{
880 		/* Query is a text array; each element is treated as a key */
881 		ArrayType  *query = PG_GETARG_ARRAYTYPE_P(0);
882 		Datum	   *key_datums;
883 		bool	   *key_nulls;
884 		int			key_count;
885 		int			i,
886 					j;
887 
888 		deconstruct_array(query,
889 						  TEXTOID, -1, false, TYPALIGN_INT,
890 						  &key_datums, &key_nulls, &key_count);
891 
892 		entries = (Datum *) palloc(sizeof(Datum) * key_count);
893 
894 		for (i = 0, j = 0; i < key_count; i++)
895 		{
896 			/* Nulls in the array are ignored */
897 			if (key_nulls[i])
898 				continue;
899 			entries[j++] = make_text_key(JGINFLAG_KEY,
900 										 VARDATA(key_datums[i]),
901 										 VARSIZE(key_datums[i]) - VARHDRSZ);
902 		}
903 
904 		*nentries = j;
905 		/* ExistsAll with no keys should match everything */
906 		if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
907 			*searchMode = GIN_SEARCH_MODE_ALL;
908 	}
909 	else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
910 			 strategy == JsonbJsonpathExistsStrategyNumber)
911 	{
912 		JsonPath   *jp = PG_GETARG_JSONPATH_P(0);
913 		Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
914 
915 		entries = extract_jsp_query(jp, strategy, false, nentries, extra_data);
916 
917 		if (!entries)
918 			*searchMode = GIN_SEARCH_MODE_ALL;
919 	}
920 	else
921 	{
922 		elog(ERROR, "unrecognized strategy number: %d", strategy);
923 		entries = NULL;			/* keep compiler quiet */
924 	}
925 
926 	PG_RETURN_POINTER(entries);
927 }
928 
929 Datum
gin_consistent_jsonb(PG_FUNCTION_ARGS)930 gin_consistent_jsonb(PG_FUNCTION_ARGS)
931 {
932 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
933 	StrategyNumber strategy = PG_GETARG_UINT16(1);
934 
935 	/* Jsonb	   *query = PG_GETARG_JSONB_P(2); */
936 	int32		nkeys = PG_GETARG_INT32(3);
937 
938 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
939 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
940 	bool		res = true;
941 	int32		i;
942 
943 	if (strategy == JsonbContainsStrategyNumber)
944 	{
945 		/*
946 		 * We must always recheck, since we can't tell from the index whether
947 		 * the positions of the matched items match the structure of the query
948 		 * object.  (Even if we could, we'd also have to worry about hashed
949 		 * keys and the index's failure to distinguish keys from string array
950 		 * elements.)  However, the tuple certainly doesn't match unless it
951 		 * contains all the query keys.
952 		 */
953 		*recheck = true;
954 		for (i = 0; i < nkeys; i++)
955 		{
956 			if (!check[i])
957 			{
958 				res = false;
959 				break;
960 			}
961 		}
962 	}
963 	else if (strategy == JsonbExistsStrategyNumber)
964 	{
965 		/*
966 		 * Although the key is certainly present in the index, we must recheck
967 		 * because (1) the key might be hashed, and (2) the index match might
968 		 * be for a key that's not at top level of the JSON object.  For (1),
969 		 * we could look at the query key to see if it's hashed and not
970 		 * recheck if not, but the index lacks enough info to tell about (2).
971 		 */
972 		*recheck = true;
973 		res = true;
974 	}
975 	else if (strategy == JsonbExistsAnyStrategyNumber)
976 	{
977 		/* As for plain exists, we must recheck */
978 		*recheck = true;
979 		res = true;
980 	}
981 	else if (strategy == JsonbExistsAllStrategyNumber)
982 	{
983 		/* As for plain exists, we must recheck */
984 		*recheck = true;
985 		/* ... but unless all the keys are present, we can say "false" */
986 		for (i = 0; i < nkeys; i++)
987 		{
988 			if (!check[i])
989 			{
990 				res = false;
991 				break;
992 			}
993 		}
994 	}
995 	else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
996 			 strategy == JsonbJsonpathExistsStrategyNumber)
997 	{
998 		*recheck = true;
999 
1000 		if (nkeys > 0)
1001 		{
1002 			Assert(extra_data && extra_data[0]);
1003 			res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1004 									   false) != GIN_FALSE;
1005 		}
1006 	}
1007 	else
1008 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1009 
1010 	PG_RETURN_BOOL(res);
1011 }
1012 
1013 Datum
gin_triconsistent_jsonb(PG_FUNCTION_ARGS)1014 gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
1015 {
1016 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
1017 	StrategyNumber strategy = PG_GETARG_UINT16(1);
1018 
1019 	/* Jsonb	   *query = PG_GETARG_JSONB_P(2); */
1020 	int32		nkeys = PG_GETARG_INT32(3);
1021 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
1022 	GinTernaryValue res = GIN_MAYBE;
1023 	int32		i;
1024 
1025 	/*
1026 	 * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
1027 	 * corresponds to always forcing recheck in the regular consistent
1028 	 * function, for the reasons listed there.
1029 	 */
1030 	if (strategy == JsonbContainsStrategyNumber ||
1031 		strategy == JsonbExistsAllStrategyNumber)
1032 	{
1033 		/* All extracted keys must be present */
1034 		for (i = 0; i < nkeys; i++)
1035 		{
1036 			if (check[i] == GIN_FALSE)
1037 			{
1038 				res = GIN_FALSE;
1039 				break;
1040 			}
1041 		}
1042 	}
1043 	else if (strategy == JsonbExistsStrategyNumber ||
1044 			 strategy == JsonbExistsAnyStrategyNumber)
1045 	{
1046 		/* At least one extracted key must be present */
1047 		res = GIN_FALSE;
1048 		for (i = 0; i < nkeys; i++)
1049 		{
1050 			if (check[i] == GIN_TRUE ||
1051 				check[i] == GIN_MAYBE)
1052 			{
1053 				res = GIN_MAYBE;
1054 				break;
1055 			}
1056 		}
1057 	}
1058 	else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1059 			 strategy == JsonbJsonpathExistsStrategyNumber)
1060 	{
1061 		if (nkeys > 0)
1062 		{
1063 			Assert(extra_data && extra_data[0]);
1064 			res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1065 									   true);
1066 
1067 			/* Should always recheck the result */
1068 			if (res == GIN_TRUE)
1069 				res = GIN_MAYBE;
1070 		}
1071 	}
1072 	else
1073 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1074 
1075 	PG_RETURN_GIN_TERNARY_VALUE(res);
1076 }
1077 
1078 /*
1079  *
1080  * jsonb_path_ops GIN opclass support functions
1081  *
1082  * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
1083  * value; but the JSON key(s) leading to each value are also included in its
1084  * hash computation.  This means we can only support containment queries,
1085  * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
1086  * since different hashes will be generated.
1087  *
1088  */
1089 
1090 Datum
gin_extract_jsonb_path(PG_FUNCTION_ARGS)1091 gin_extract_jsonb_path(PG_FUNCTION_ARGS)
1092 {
1093 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
1094 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
1095 	int			total = JB_ROOT_COUNT(jb);
1096 	JsonbIterator *it;
1097 	JsonbValue	v;
1098 	JsonbIteratorToken r;
1099 	PathHashStack tail;
1100 	PathHashStack *stack;
1101 	GinEntries	entries;
1102 
1103 	/* If the root level is empty, we certainly have no keys */
1104 	if (total == 0)
1105 	{
1106 		*nentries = 0;
1107 		PG_RETURN_POINTER(NULL);
1108 	}
1109 
1110 	/* Otherwise, use 2 * root count as initial estimate of result size */
1111 	init_gin_entries(&entries, 2 * total);
1112 
1113 	/* We keep a stack of partial hashes corresponding to parent key levels */
1114 	tail.parent = NULL;
1115 	tail.hash = 0;
1116 	stack = &tail;
1117 
1118 	it = JsonbIteratorInit(&jb->root);
1119 
1120 	while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
1121 	{
1122 		PathHashStack *parent;
1123 
1124 		switch (r)
1125 		{
1126 			case WJB_BEGIN_ARRAY:
1127 			case WJB_BEGIN_OBJECT:
1128 				/* Push a stack level for this object */
1129 				parent = stack;
1130 				stack = (PathHashStack *) palloc(sizeof(PathHashStack));
1131 
1132 				/*
1133 				 * We pass forward hashes from outer nesting levels so that
1134 				 * the hashes for nested values will include outer keys as
1135 				 * well as their own keys.
1136 				 *
1137 				 * Nesting an array within another array will not alter
1138 				 * innermost scalar element hash values, but that seems
1139 				 * inconsequential.
1140 				 */
1141 				stack->hash = parent->hash;
1142 				stack->parent = parent;
1143 				break;
1144 			case WJB_KEY:
1145 				/* mix this key into the current outer hash */
1146 				JsonbHashScalarValue(&v, &stack->hash);
1147 				/* hash is now ready to incorporate the value */
1148 				break;
1149 			case WJB_ELEM:
1150 			case WJB_VALUE:
1151 				/* mix the element or value's hash into the prepared hash */
1152 				JsonbHashScalarValue(&v, &stack->hash);
1153 				/* and emit an index entry */
1154 				add_gin_entry(&entries, UInt32GetDatum(stack->hash));
1155 				/* reset hash for next key, value, or sub-object */
1156 				stack->hash = stack->parent->hash;
1157 				break;
1158 			case WJB_END_ARRAY:
1159 			case WJB_END_OBJECT:
1160 				/* Pop the stack */
1161 				parent = stack->parent;
1162 				pfree(stack);
1163 				stack = parent;
1164 				/* reset hash for next key, value, or sub-object */
1165 				if (stack->parent)
1166 					stack->hash = stack->parent->hash;
1167 				else
1168 					stack->hash = 0;
1169 				break;
1170 			default:
1171 				elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
1172 		}
1173 	}
1174 
1175 	*nentries = entries.count;
1176 
1177 	PG_RETURN_POINTER(entries.buf);
1178 }
1179 
1180 Datum
gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)1181 gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
1182 {
1183 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
1184 	StrategyNumber strategy = PG_GETARG_UINT16(2);
1185 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
1186 	Datum	   *entries;
1187 
1188 	if (strategy == JsonbContainsStrategyNumber)
1189 	{
1190 		/* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
1191 		entries = (Datum *)
1192 			DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
1193 												PG_GETARG_DATUM(0),
1194 												PointerGetDatum(nentries)));
1195 
1196 		/* ... although "contains {}" requires a full index scan */
1197 		if (*nentries == 0)
1198 			*searchMode = GIN_SEARCH_MODE_ALL;
1199 	}
1200 	else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1201 			 strategy == JsonbJsonpathExistsStrategyNumber)
1202 	{
1203 		JsonPath   *jp = PG_GETARG_JSONPATH_P(0);
1204 		Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
1205 
1206 		entries = extract_jsp_query(jp, strategy, true, nentries, extra_data);
1207 
1208 		if (!entries)
1209 			*searchMode = GIN_SEARCH_MODE_ALL;
1210 	}
1211 	else
1212 	{
1213 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1214 		entries = NULL;
1215 	}
1216 
1217 	PG_RETURN_POINTER(entries);
1218 }
1219 
1220 Datum
gin_consistent_jsonb_path(PG_FUNCTION_ARGS)1221 gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
1222 {
1223 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
1224 	StrategyNumber strategy = PG_GETARG_UINT16(1);
1225 
1226 	/* Jsonb	   *query = PG_GETARG_JSONB_P(2); */
1227 	int32		nkeys = PG_GETARG_INT32(3);
1228 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
1229 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
1230 	bool		res = true;
1231 	int32		i;
1232 
1233 	if (strategy == JsonbContainsStrategyNumber)
1234 	{
1235 		/*
1236 		 * jsonb_path_ops is necessarily lossy, not only because of hash
1237 		 * collisions but also because it doesn't preserve complete
1238 		 * information about the structure of the JSON object.  Besides, there
1239 		 * are some special rules around the containment of raw scalars in
1240 		 * arrays that are not handled here.  So we must always recheck a
1241 		 * match.  However, if not all of the keys are present, the tuple
1242 		 * certainly doesn't match.
1243 		 */
1244 		*recheck = true;
1245 		for (i = 0; i < nkeys; i++)
1246 		{
1247 			if (!check[i])
1248 			{
1249 				res = false;
1250 				break;
1251 			}
1252 		}
1253 	}
1254 	else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1255 			 strategy == JsonbJsonpathExistsStrategyNumber)
1256 	{
1257 		*recheck = true;
1258 
1259 		if (nkeys > 0)
1260 		{
1261 			Assert(extra_data && extra_data[0]);
1262 			res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1263 									   false) != GIN_FALSE;
1264 		}
1265 	}
1266 	else
1267 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1268 
1269 	PG_RETURN_BOOL(res);
1270 }
1271 
1272 Datum
gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)1273 gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
1274 {
1275 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
1276 	StrategyNumber strategy = PG_GETARG_UINT16(1);
1277 
1278 	/* Jsonb	   *query = PG_GETARG_JSONB_P(2); */
1279 	int32		nkeys = PG_GETARG_INT32(3);
1280 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
1281 	GinTernaryValue res = GIN_MAYBE;
1282 	int32		i;
1283 
1284 	if (strategy == JsonbContainsStrategyNumber)
1285 	{
1286 		/*
1287 		 * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE;
1288 		 * this corresponds to always forcing recheck in the regular
1289 		 * consistent function, for the reasons listed there.
1290 		 */
1291 		for (i = 0; i < nkeys; i++)
1292 		{
1293 			if (check[i] == GIN_FALSE)
1294 			{
1295 				res = GIN_FALSE;
1296 				break;
1297 			}
1298 		}
1299 	}
1300 	else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1301 			 strategy == JsonbJsonpathExistsStrategyNumber)
1302 	{
1303 		if (nkeys > 0)
1304 		{
1305 			Assert(extra_data && extra_data[0]);
1306 			res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1307 									   true);
1308 
1309 			/* Should always recheck the result */
1310 			if (res == GIN_TRUE)
1311 				res = GIN_MAYBE;
1312 		}
1313 	}
1314 	else
1315 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1316 
1317 	PG_RETURN_GIN_TERNARY_VALUE(res);
1318 }
1319 
1320 /*
1321  * Construct a jsonb_ops GIN key from a flag byte and a textual representation
1322  * (which need not be null-terminated).  This function is responsible
1323  * for hashing overlength text representations; it will add the
1324  * JGINFLAG_HASHED bit to the flag value if it does that.
1325  */
1326 static Datum
make_text_key(char flag,const char * str,int len)1327 make_text_key(char flag, const char *str, int len)
1328 {
1329 	text	   *item;
1330 	char		hashbuf[10];
1331 
1332 	if (len > JGIN_MAXLENGTH)
1333 	{
1334 		uint32		hashval;
1335 
1336 		hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
1337 		snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
1338 		str = hashbuf;
1339 		len = 8;
1340 		flag |= JGINFLAG_HASHED;
1341 	}
1342 
1343 	/*
1344 	 * Now build the text Datum.  For simplicity we build a 4-byte-header
1345 	 * varlena text Datum here, but we expect it will get converted to short
1346 	 * header format when stored in the index.
1347 	 */
1348 	item = (text *) palloc(VARHDRSZ + len + 1);
1349 	SET_VARSIZE(item, VARHDRSZ + len + 1);
1350 
1351 	*VARDATA(item) = flag;
1352 
1353 	memcpy(VARDATA(item) + 1, str, len);
1354 
1355 	return PointerGetDatum(item);
1356 }
1357 
1358 /*
1359  * Create a textual representation of a JsonbValue that will serve as a GIN
1360  * key in a jsonb_ops index.  is_key is true if the JsonbValue is a key,
1361  * or if it is a string array element (since we pretend those are keys,
1362  * see jsonb.h).
1363  */
1364 static Datum
make_scalar_key(const JsonbValue * scalarVal,bool is_key)1365 make_scalar_key(const JsonbValue *scalarVal, bool is_key)
1366 {
1367 	Datum		item;
1368 	char	   *cstr;
1369 
1370 	switch (scalarVal->type)
1371 	{
1372 		case jbvNull:
1373 			Assert(!is_key);
1374 			item = make_text_key(JGINFLAG_NULL, "", 0);
1375 			break;
1376 		case jbvBool:
1377 			Assert(!is_key);
1378 			item = make_text_key(JGINFLAG_BOOL,
1379 								 scalarVal->val.boolean ? "t" : "f", 1);
1380 			break;
1381 		case jbvNumeric:
1382 			Assert(!is_key);
1383 
1384 			/*
1385 			 * A normalized textual representation, free of trailing zeroes,
1386 			 * is required so that numerically equal values will produce equal
1387 			 * strings.
1388 			 *
1389 			 * It isn't ideal that numerics are stored in a relatively bulky
1390 			 * textual format.  However, it's a notationally convenient way of
1391 			 * storing a "union" type in the GIN B-Tree, and indexing Jsonb
1392 			 * strings takes precedence.
1393 			 */
1394 			cstr = numeric_normalize(scalarVal->val.numeric);
1395 			item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
1396 			pfree(cstr);
1397 			break;
1398 		case jbvString:
1399 			item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
1400 								 scalarVal->val.string.val,
1401 								 scalarVal->val.string.len);
1402 			break;
1403 		default:
1404 			elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
1405 			item = 0;			/* keep compiler quiet */
1406 			break;
1407 	}
1408 
1409 	return item;
1410 }
1411