1 /*-------------------------------------------------------------------------
2  *
3  * jsonfuncs.c
4  *		Functions to process JSON data types.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/jsonfuncs.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include <limits.h>
18 
19 #include "access/htup_details.h"
20 #include "catalog/pg_type.h"
21 #include "fmgr.h"
22 #include "funcapi.h"
23 #include "lib/stringinfo.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "utils/array.h"
27 #include "utils/builtins.h"
28 #include "utils/hsearch.h"
29 #include "utils/json.h"
30 #include "utils/jsonapi.h"
31 #include "utils/jsonb.h"
32 #include "utils/lsyscache.h"
33 #include "utils/memutils.h"
34 #include "utils/syscache.h"
35 #include "utils/typcache.h"
36 
37 /* Operations available for setPath */
38 #define JB_PATH_CREATE					0x0001
39 #define JB_PATH_DELETE					0x0002
40 #define JB_PATH_REPLACE					0x0004
41 #define JB_PATH_INSERT_BEFORE			0x0008
42 #define JB_PATH_INSERT_AFTER			0x0010
43 #define JB_PATH_CREATE_OR_INSERT \
44 	(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
45 
46 /* state for json_object_keys */
47 typedef struct OkeysState
48 {
49 	JsonLexContext *lex;
50 	char	  **result;
51 	int			result_size;
52 	int			result_count;
53 	int			sent_count;
54 } OkeysState;
55 
56 /* state for iterate_json_string_values function */
57 typedef struct IterateJsonStringValuesState
58 {
59 	JsonLexContext *lex;
60 	JsonIterateStringValuesAction action;	/* an action that will be applied
61 											 * to each json value */
62 	void	   *action_state;	/* any necessary context for iteration */
63 	uint32		flags;			/* what kind of elements from a json we want
64 								 * to iterate */
65 } IterateJsonStringValuesState;
66 
67 /* state for transform_json_string_values function */
68 typedef struct TransformJsonStringValuesState
69 {
70 	JsonLexContext *lex;
71 	StringInfo	strval;			/* resulting json */
72 	JsonTransformStringValuesAction action; /* an action that will be applied
73 											 * to each json value */
74 	void	   *action_state;	/* any necessary context for transformation */
75 } TransformJsonStringValuesState;
76 
77 /* state for json_get* functions */
78 typedef struct GetState
79 {
80 	JsonLexContext *lex;
81 	text	   *tresult;
82 	char	   *result_start;
83 	bool		normalize_results;
84 	bool		next_scalar;
85 	int			npath;			/* length of each path-related array */
86 	char	  **path_names;		/* field name(s) being sought */
87 	int		   *path_indexes;	/* array index(es) being sought */
88 	bool	   *pathok;			/* is path matched to current depth? */
89 	int		   *array_cur_index;	/* current element index at each path
90 									 * level */
91 } GetState;
92 
93 /* state for json_array_length */
94 typedef struct AlenState
95 {
96 	JsonLexContext *lex;
97 	int			count;
98 } AlenState;
99 
100 /* state for json_each */
101 typedef struct EachState
102 {
103 	JsonLexContext *lex;
104 	Tuplestorestate *tuple_store;
105 	TupleDesc	ret_tdesc;
106 	MemoryContext tmp_cxt;
107 	char	   *result_start;
108 	bool		normalize_results;
109 	bool		next_scalar;
110 	char	   *normalized_scalar;
111 } EachState;
112 
113 /* state for json_array_elements */
114 typedef struct ElementsState
115 {
116 	JsonLexContext *lex;
117 	const char *function_name;
118 	Tuplestorestate *tuple_store;
119 	TupleDesc	ret_tdesc;
120 	MemoryContext tmp_cxt;
121 	char	   *result_start;
122 	bool		normalize_results;
123 	bool		next_scalar;
124 	char	   *normalized_scalar;
125 } ElementsState;
126 
127 /* state for get_json_object_as_hash */
128 typedef struct JHashState
129 {
130 	JsonLexContext *lex;
131 	const char *function_name;
132 	HTAB	   *hash;
133 	char	   *saved_scalar;
134 	char	   *save_json_start;
135 	JsonTokenType saved_token_type;
136 } JHashState;
137 
138 /* hashtable element */
139 typedef struct JsonHashEntry
140 {
141 	char		fname[NAMEDATALEN]; /* hash key (MUST BE FIRST) */
142 	char	   *val;
143 	JsonTokenType type;
144 } JsonHashEntry;
145 
146 /* structure to cache type I/O metadata needed for populate_scalar() */
147 typedef struct ScalarIOData
148 {
149 	Oid			typioparam;
150 	FmgrInfo	typiofunc;
151 } ScalarIOData;
152 
153 /* these two structures are used recursively */
154 typedef struct ColumnIOData ColumnIOData;
155 typedef struct RecordIOData RecordIOData;
156 
157 /* structure to cache metadata needed for populate_array() */
158 typedef struct ArrayIOData
159 {
160 	ColumnIOData *element_info; /* metadata cache */
161 	Oid			element_type;	/* array element type id */
162 	int32		element_typmod; /* array element type modifier */
163 } ArrayIOData;
164 
165 /* structure to cache metadata needed for populate_composite() */
166 typedef struct CompositeIOData
167 {
168 	/*
169 	 * We use pointer to a RecordIOData here because variable-length struct
170 	 * RecordIOData can't be used directly in ColumnIOData.io union
171 	 */
172 	RecordIOData *record_io;	/* metadata cache for populate_record() */
173 	TupleDesc	tupdesc;		/* cached tuple descriptor */
174 	/* these fields differ from target type only if domain over composite: */
175 	Oid			base_typid;		/* base type id */
176 	int32		base_typmod;	/* base type modifier */
177 	/* this field is used only if target type is domain over composite: */
178 	void	   *domain_info;	/* opaque cache for domain checks */
179 } CompositeIOData;
180 
181 /* structure to cache metadata needed for populate_domain() */
182 typedef struct DomainIOData
183 {
184 	ColumnIOData *base_io;		/* metadata cache */
185 	Oid			base_typid;		/* base type id */
186 	int32		base_typmod;	/* base type modifier */
187 	void	   *domain_info;	/* opaque cache for domain checks */
188 } DomainIOData;
189 
190 /* enumeration type categories */
191 typedef enum TypeCat
192 {
193 	TYPECAT_SCALAR = 's',
194 	TYPECAT_ARRAY = 'a',
195 	TYPECAT_COMPOSITE = 'c',
196 	TYPECAT_COMPOSITE_DOMAIN = 'C',
197 	TYPECAT_DOMAIN = 'd'
198 } TypeCat;
199 
200 /* these two are stolen from hstore / record_out, used in populate_record* */
201 
202 /* structure to cache record metadata needed for populate_record_field() */
203 struct ColumnIOData
204 {
205 	Oid			typid;			/* column type id */
206 	int32		typmod;			/* column type modifier */
207 	TypeCat		typcat;			/* column type category */
208 	ScalarIOData scalar_io;		/* metadata cache for directi conversion
209 								 * through input function */
210 	union
211 	{
212 		ArrayIOData array;
213 		CompositeIOData composite;
214 		DomainIOData domain;
215 	}			io;				/* metadata cache for various column type
216 								 * categories */
217 };
218 
219 /* structure to cache record metadata needed for populate_record() */
220 struct RecordIOData
221 {
222 	Oid			record_type;
223 	int32		record_typmod;
224 	int			ncolumns;
225 	ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER];
226 };
227 
228 /* per-query cache for populate_record_worker and populate_recordset_worker */
229 typedef struct PopulateRecordCache
230 {
231 	Oid			argtype;		/* declared type of the record argument */
232 	ColumnIOData c;				/* metadata cache for populate_composite() */
233 	MemoryContext fn_mcxt;		/* where this is stored */
234 } PopulateRecordCache;
235 
236 /* per-call state for populate_recordset */
237 typedef struct PopulateRecordsetState
238 {
239 	JsonLexContext *lex;
240 	const char *function_name;
241 	HTAB	   *json_hash;
242 	char	   *saved_scalar;
243 	char	   *save_json_start;
244 	JsonTokenType saved_token_type;
245 	Tuplestorestate *tuple_store;
246 	HeapTupleHeader rec;
247 	PopulateRecordCache *cache;
248 } PopulateRecordsetState;
249 
250 /* common data for populate_array_json() and populate_array_dim_jsonb() */
251 typedef struct PopulateArrayContext
252 {
253 	ArrayBuildState *astate;	/* array build state */
254 	ArrayIOData *aio;			/* metadata cache */
255 	MemoryContext acxt;			/* array build memory context */
256 	MemoryContext mcxt;			/* cache memory context */
257 	const char *colname;		/* for diagnostics only */
258 	int		   *dims;			/* dimensions */
259 	int		   *sizes;			/* current dimension counters */
260 	int			ndims;			/* number of dimensions */
261 } PopulateArrayContext;
262 
263 /* state for populate_array_json() */
264 typedef struct PopulateArrayState
265 {
266 	JsonLexContext *lex;		/* json lexer */
267 	PopulateArrayContext *ctx;	/* context */
268 	char	   *element_start;	/* start of the current array element */
269 	char	   *element_scalar; /* current array element token if it is a
270 								 * scalar */
271 	JsonTokenType element_type; /* current array element type */
272 } PopulateArrayState;
273 
274 /* state for json_strip_nulls */
275 typedef struct StripnullState
276 {
277 	JsonLexContext *lex;
278 	StringInfo	strval;
279 	bool		skip_next_null;
280 } StripnullState;
281 
282 /* structure for generalized json/jsonb value passing */
283 typedef struct JsValue
284 {
285 	bool		is_json;		/* json/jsonb */
286 	union
287 	{
288 		struct
289 		{
290 			char	   *str;	/* json string */
291 			int			len;	/* json string length or -1 if null-terminated */
292 			JsonTokenType type; /* json type */
293 		}			json;		/* json value */
294 
295 		JsonbValue *jsonb;		/* jsonb value */
296 	}			val;
297 } JsValue;
298 
299 typedef struct JsObject
300 {
301 	bool		is_json;		/* json/jsonb */
302 	union
303 	{
304 		HTAB	   *json_hash;
305 		JsonbContainer *jsonb_cont;
306 	}			val;
307 } JsObject;
308 
309 /* useful macros for testing JsValue properties */
310 #define JsValueIsNull(jsv) \
311 	((jsv)->is_json ?  \
312 		(!(jsv)->val.json.str || (jsv)->val.json.type == JSON_TOKEN_NULL) : \
313 		(!(jsv)->val.jsonb || (jsv)->val.jsonb->type == jbvNull))
314 
315 #define JsValueIsString(jsv) \
316 	((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \
317 		: ((jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString))
318 
319 #define JsObjectIsEmpty(jso) \
320 	((jso)->is_json \
321 		? hash_get_num_entries((jso)->val.json_hash) == 0 \
322 		: ((jso)->val.jsonb_cont == NULL || \
323 		   JsonContainerSize((jso)->val.jsonb_cont) == 0))
324 
325 #define JsObjectFree(jso) \
326 	do { \
327 		if ((jso)->is_json) \
328 			hash_destroy((jso)->val.json_hash); \
329 	} while (0)
330 
331 /* semantic action functions for json_object_keys */
332 static void okeys_object_field_start(void *state, char *fname, bool isnull);
333 static void okeys_array_start(void *state);
334 static void okeys_scalar(void *state, char *token, JsonTokenType tokentype);
335 
336 /* semantic action functions for json_get* functions */
337 static void get_object_start(void *state);
338 static void get_object_end(void *state);
339 static void get_object_field_start(void *state, char *fname, bool isnull);
340 static void get_object_field_end(void *state, char *fname, bool isnull);
341 static void get_array_start(void *state);
342 static void get_array_end(void *state);
343 static void get_array_element_start(void *state, bool isnull);
344 static void get_array_element_end(void *state, bool isnull);
345 static void get_scalar(void *state, char *token, JsonTokenType tokentype);
346 
347 /* common worker function for json getter functions */
348 static Datum get_path_all(FunctionCallInfo fcinfo, bool as_text);
349 static text *get_worker(text *json, char **tpath, int *ipath, int npath,
350 		   bool normalize_results);
351 static Datum get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text);
352 
353 /* semantic action functions for json_array_length */
354 static void alen_object_start(void *state);
355 static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
356 static void alen_array_element_start(void *state, bool isnull);
357 
358 /* common workers for json{b}_each* functions */
359 static Datum each_worker(FunctionCallInfo fcinfo, bool as_text);
360 static Datum each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
361 				  bool as_text);
362 
363 /* semantic action functions for json_each */
364 static void each_object_field_start(void *state, char *fname, bool isnull);
365 static void each_object_field_end(void *state, char *fname, bool isnull);
366 static void each_array_start(void *state);
367 static void each_scalar(void *state, char *token, JsonTokenType tokentype);
368 
369 /* common workers for json{b}_array_elements_* functions */
370 static Datum elements_worker(FunctionCallInfo fcinfo, const char *funcname,
371 				bool as_text);
372 static Datum elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
373 					  bool as_text);
374 
375 /* semantic action functions for json_array_elements */
376 static void elements_object_start(void *state);
377 static void elements_array_element_start(void *state, bool isnull);
378 static void elements_array_element_end(void *state, bool isnull);
379 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
380 
381 /* turn a json object into a hash table */
382 static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname);
383 
384 /* semantic actions for populate_array_json */
385 static void populate_array_object_start(void *_state);
386 static void populate_array_array_end(void *_state);
387 static void populate_array_element_start(void *_state, bool isnull);
388 static void populate_array_element_end(void *_state, bool isnull);
389 static void populate_array_scalar(void *_state, char *token, JsonTokenType tokentype);
390 
391 /* semantic action functions for get_json_object_as_hash */
392 static void hash_object_field_start(void *state, char *fname, bool isnull);
393 static void hash_object_field_end(void *state, char *fname, bool isnull);
394 static void hash_array_start(void *state);
395 static void hash_scalar(void *state, char *token, JsonTokenType tokentype);
396 
397 /* semantic action functions for populate_recordset */
398 static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
399 static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
400 static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
401 static void populate_recordset_object_start(void *state);
402 static void populate_recordset_object_end(void *state);
403 static void populate_recordset_array_start(void *state);
404 static void populate_recordset_array_element_start(void *state, bool isnull);
405 
406 /* semantic action functions for json_strip_nulls */
407 static void sn_object_start(void *state);
408 static void sn_object_end(void *state);
409 static void sn_array_start(void *state);
410 static void sn_array_end(void *state);
411 static void sn_object_field_start(void *state, char *fname, bool isnull);
412 static void sn_array_element_start(void *state, bool isnull);
413 static void sn_scalar(void *state, char *token, JsonTokenType tokentype);
414 
415 /* worker functions for populate_record, to_record, populate_recordset and to_recordset */
416 static Datum populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
417 						  bool is_json, bool have_record_arg);
418 static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
419 					   bool is_json, bool have_record_arg);
420 
421 /* helper functions for populate_record[set] */
422 static HeapTupleHeader populate_record(TupleDesc tupdesc, RecordIOData **record_p,
423 				HeapTupleHeader defaultval, MemoryContext mcxt,
424 				JsObject *obj);
425 static void get_record_type_from_argument(FunctionCallInfo fcinfo,
426 										  const char *funcname,
427 										  PopulateRecordCache *cache);
428 static void get_record_type_from_query(FunctionCallInfo fcinfo,
429 									   const char *funcname,
430 									   PopulateRecordCache *cache);
431 static void JsValueToJsObject(JsValue *jsv, JsObject *jso);
432 static Datum populate_composite(CompositeIOData *io, Oid typid,
433 				   const char *colname, MemoryContext mcxt,
434 				   HeapTupleHeader defaultval, JsValue *jsv, bool isnull);
435 static Datum populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv);
436 static void prepare_column_cache(ColumnIOData *column, Oid typid, int32 typmod,
437 					 MemoryContext mcxt, bool need_scalar);
438 static Datum populate_record_field(ColumnIOData *col, Oid typid, int32 typmod,
439 					  const char *colname, MemoryContext mcxt, Datum defaultval,
440 					  JsValue *jsv, bool *isnull);
441 static RecordIOData *allocate_record_info(MemoryContext mcxt, int ncolumns);
442 static bool JsObjectGetField(JsObject *obj, char *field, JsValue *jsv);
443 static void populate_recordset_record(PopulateRecordsetState *state, JsObject *obj);
444 static void populate_array_json(PopulateArrayContext *ctx, char *json, int len);
445 static void populate_array_dim_jsonb(PopulateArrayContext *ctx, JsonbValue *jbv,
446 						 int ndim);
447 static void populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim);
448 static void populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims);
449 static void populate_array_check_dimension(PopulateArrayContext *ctx, int ndim);
450 static void populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv);
451 static Datum populate_array(ArrayIOData *aio, const char *colname,
452 			   MemoryContext mcxt, JsValue *jsv);
453 static Datum populate_domain(DomainIOData *io, Oid typid, const char *colname,
454 				MemoryContext mcxt, JsValue *jsv, bool isnull);
455 
456 /* Worker that takes care of common setup for us */
457 static JsonbValue *findJsonbValueFromContainerLen(JsonbContainer *container,
458 							   uint32 flags,
459 							   char *key,
460 							   uint32 keylen);
461 
462 /* functions supporting jsonb_delete, jsonb_set and jsonb_concat */
463 static JsonbValue *IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
464 			   JsonbParseState **state);
465 static JsonbValue *setPath(JsonbIterator **it, Datum *path_elems,
466 		bool *path_nulls, int path_len,
467 		JsonbParseState **st, int level, Jsonb *newval,
468 		int op_type);
469 static void setPathObject(JsonbIterator **it, Datum *path_elems,
470 			  bool *path_nulls, int path_len, JsonbParseState **st,
471 			  int level,
472 			  Jsonb *newval, uint32 npairs, int op_type);
473 static void setPathArray(JsonbIterator **it, Datum *path_elems,
474 			 bool *path_nulls, int path_len, JsonbParseState **st,
475 			 int level, Jsonb *newval, uint32 nelems, int op_type);
476 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
477 
478 /* function supporting iterate_json_values */
479 static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype);
480 static void iterate_values_object_field_start(void *state, char *fname, bool isnull);
481 
482 /* functions supporting transform_json_string_values */
483 static void transform_string_values_object_start(void *state);
484 static void transform_string_values_object_end(void *state);
485 static void transform_string_values_array_start(void *state);
486 static void transform_string_values_array_end(void *state);
487 static void transform_string_values_object_field_start(void *state, char *fname, bool isnull);
488 static void transform_string_values_array_element_start(void *state, bool isnull);
489 static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
490 
491 /*
492  * SQL function json_object_keys
493  *
494  * Returns the set of keys for the object argument.
495  *
496  * This SRF operates in value-per-call mode. It processes the
497  * object during the first call, and the keys are simply stashed
498  * in an array, whose size is expanded as necessary. This is probably
499  * safe enough for a list of keys of a single object, since they are
500  * limited in size to NAMEDATALEN and the number of keys is unlikely to
501  * be so huge that it has major memory implications.
502  */
503 Datum
jsonb_object_keys(PG_FUNCTION_ARGS)504 jsonb_object_keys(PG_FUNCTION_ARGS)
505 {
506 	FuncCallContext *funcctx;
507 	OkeysState *state;
508 	int			i;
509 
510 	if (SRF_IS_FIRSTCALL())
511 	{
512 		MemoryContext oldcontext;
513 		Jsonb	   *jb = PG_GETARG_JSONB_P(0);
514 		bool		skipNested = false;
515 		JsonbIterator *it;
516 		JsonbValue	v;
517 		JsonbIteratorToken r;
518 
519 		if (JB_ROOT_IS_SCALAR(jb))
520 			ereport(ERROR,
521 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
522 					 errmsg("cannot call %s on a scalar",
523 							"jsonb_object_keys")));
524 		else if (JB_ROOT_IS_ARRAY(jb))
525 			ereport(ERROR,
526 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
527 					 errmsg("cannot call %s on an array",
528 							"jsonb_object_keys")));
529 
530 		funcctx = SRF_FIRSTCALL_INIT();
531 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
532 
533 		state = palloc(sizeof(OkeysState));
534 
535 		state->result_size = JB_ROOT_COUNT(jb);
536 		state->result_count = 0;
537 		state->sent_count = 0;
538 		state->result = palloc(state->result_size * sizeof(char *));
539 
540 		it = JsonbIteratorInit(&jb->root);
541 
542 		while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
543 		{
544 			skipNested = true;
545 
546 			if (r == WJB_KEY)
547 			{
548 				char	   *cstr;
549 
550 				cstr = palloc(v.val.string.len + 1 * sizeof(char));
551 				memcpy(cstr, v.val.string.val, v.val.string.len);
552 				cstr[v.val.string.len] = '\0';
553 				state->result[state->result_count++] = cstr;
554 			}
555 		}
556 
557 		MemoryContextSwitchTo(oldcontext);
558 		funcctx->user_fctx = (void *) state;
559 	}
560 
561 	funcctx = SRF_PERCALL_SETUP();
562 	state = (OkeysState *) funcctx->user_fctx;
563 
564 	if (state->sent_count < state->result_count)
565 	{
566 		char	   *nxt = state->result[state->sent_count++];
567 
568 		SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
569 	}
570 
571 	/* cleanup to reduce or eliminate memory leaks */
572 	for (i = 0; i < state->result_count; i++)
573 		pfree(state->result[i]);
574 	pfree(state->result);
575 	pfree(state);
576 
577 	SRF_RETURN_DONE(funcctx);
578 }
579 
580 
581 Datum
json_object_keys(PG_FUNCTION_ARGS)582 json_object_keys(PG_FUNCTION_ARGS)
583 {
584 	FuncCallContext *funcctx;
585 	OkeysState *state;
586 	int			i;
587 
588 	if (SRF_IS_FIRSTCALL())
589 	{
590 		text	   *json = PG_GETARG_TEXT_PP(0);
591 		JsonLexContext *lex = makeJsonLexContext(json, true);
592 		JsonSemAction *sem;
593 		MemoryContext oldcontext;
594 
595 		funcctx = SRF_FIRSTCALL_INIT();
596 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
597 
598 		state = palloc(sizeof(OkeysState));
599 		sem = palloc0(sizeof(JsonSemAction));
600 
601 		state->lex = lex;
602 		state->result_size = 256;
603 		state->result_count = 0;
604 		state->sent_count = 0;
605 		state->result = palloc(256 * sizeof(char *));
606 
607 		sem->semstate = (void *) state;
608 		sem->array_start = okeys_array_start;
609 		sem->scalar = okeys_scalar;
610 		sem->object_field_start = okeys_object_field_start;
611 		/* remainder are all NULL, courtesy of palloc0 above */
612 
613 		pg_parse_json(lex, sem);
614 		/* keys are now in state->result */
615 
616 		pfree(lex->strval->data);
617 		pfree(lex->strval);
618 		pfree(lex);
619 		pfree(sem);
620 
621 		MemoryContextSwitchTo(oldcontext);
622 		funcctx->user_fctx = (void *) state;
623 	}
624 
625 	funcctx = SRF_PERCALL_SETUP();
626 	state = (OkeysState *) funcctx->user_fctx;
627 
628 	if (state->sent_count < state->result_count)
629 	{
630 		char	   *nxt = state->result[state->sent_count++];
631 
632 		SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
633 	}
634 
635 	/* cleanup to reduce or eliminate memory leaks */
636 	for (i = 0; i < state->result_count; i++)
637 		pfree(state->result[i]);
638 	pfree(state->result);
639 	pfree(state);
640 
641 	SRF_RETURN_DONE(funcctx);
642 }
643 
644 static void
okeys_object_field_start(void * state,char * fname,bool isnull)645 okeys_object_field_start(void *state, char *fname, bool isnull)
646 {
647 	OkeysState *_state = (OkeysState *) state;
648 
649 	/* only collecting keys for the top level object */
650 	if (_state->lex->lex_level != 1)
651 		return;
652 
653 	/* enlarge result array if necessary */
654 	if (_state->result_count >= _state->result_size)
655 	{
656 		_state->result_size *= 2;
657 		_state->result = (char **)
658 			repalloc(_state->result, sizeof(char *) * _state->result_size);
659 	}
660 
661 	/* save a copy of the field name */
662 	_state->result[_state->result_count++] = pstrdup(fname);
663 }
664 
665 static void
okeys_array_start(void * state)666 okeys_array_start(void *state)
667 {
668 	OkeysState *_state = (OkeysState *) state;
669 
670 	/* top level must be a json object */
671 	if (_state->lex->lex_level == 0)
672 		ereport(ERROR,
673 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
674 				 errmsg("cannot call %s on an array",
675 						"json_object_keys")));
676 }
677 
678 static void
okeys_scalar(void * state,char * token,JsonTokenType tokentype)679 okeys_scalar(void *state, char *token, JsonTokenType tokentype)
680 {
681 	OkeysState *_state = (OkeysState *) state;
682 
683 	/* top level must be a json object */
684 	if (_state->lex->lex_level == 0)
685 		ereport(ERROR,
686 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
687 				 errmsg("cannot call %s on a scalar",
688 						"json_object_keys")));
689 }
690 
691 /*
692  * json and jsonb getter functions
693  * these implement the -> ->> #> and #>> operators
694  * and the json{b?}_extract_path*(json, text, ...) functions
695  */
696 
697 
698 Datum
json_object_field(PG_FUNCTION_ARGS)699 json_object_field(PG_FUNCTION_ARGS)
700 {
701 	text	   *json = PG_GETARG_TEXT_PP(0);
702 	text	   *fname = PG_GETARG_TEXT_PP(1);
703 	char	   *fnamestr = text_to_cstring(fname);
704 	text	   *result;
705 
706 	result = get_worker(json, &fnamestr, NULL, 1, false);
707 
708 	if (result != NULL)
709 		PG_RETURN_TEXT_P(result);
710 	else
711 		PG_RETURN_NULL();
712 }
713 
714 Datum
jsonb_object_field(PG_FUNCTION_ARGS)715 jsonb_object_field(PG_FUNCTION_ARGS)
716 {
717 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
718 	text	   *key = PG_GETARG_TEXT_PP(1);
719 	JsonbValue *v;
720 
721 	if (!JB_ROOT_IS_OBJECT(jb))
722 		PG_RETURN_NULL();
723 
724 	v = findJsonbValueFromContainerLen(&jb->root, JB_FOBJECT,
725 									   VARDATA_ANY(key),
726 									   VARSIZE_ANY_EXHDR(key));
727 
728 	if (v != NULL)
729 		PG_RETURN_JSONB_P(JsonbValueToJsonb(v));
730 
731 	PG_RETURN_NULL();
732 }
733 
734 Datum
json_object_field_text(PG_FUNCTION_ARGS)735 json_object_field_text(PG_FUNCTION_ARGS)
736 {
737 	text	   *json = PG_GETARG_TEXT_PP(0);
738 	text	   *fname = PG_GETARG_TEXT_PP(1);
739 	char	   *fnamestr = text_to_cstring(fname);
740 	text	   *result;
741 
742 	result = get_worker(json, &fnamestr, NULL, 1, true);
743 
744 	if (result != NULL)
745 		PG_RETURN_TEXT_P(result);
746 	else
747 		PG_RETURN_NULL();
748 }
749 
750 Datum
jsonb_object_field_text(PG_FUNCTION_ARGS)751 jsonb_object_field_text(PG_FUNCTION_ARGS)
752 {
753 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
754 	text	   *key = PG_GETARG_TEXT_PP(1);
755 	JsonbValue *v;
756 
757 	if (!JB_ROOT_IS_OBJECT(jb))
758 		PG_RETURN_NULL();
759 
760 	v = findJsonbValueFromContainerLen(&jb->root, JB_FOBJECT,
761 									   VARDATA_ANY(key),
762 									   VARSIZE_ANY_EXHDR(key));
763 
764 	if (v != NULL)
765 	{
766 		text	   *result = NULL;
767 
768 		switch (v->type)
769 		{
770 			case jbvNull:
771 				break;
772 			case jbvBool:
773 				result = cstring_to_text(v->val.boolean ? "true" : "false");
774 				break;
775 			case jbvString:
776 				result = cstring_to_text_with_len(v->val.string.val, v->val.string.len);
777 				break;
778 			case jbvNumeric:
779 				result = cstring_to_text(DatumGetCString(DirectFunctionCall1(numeric_out,
780 																			 PointerGetDatum(v->val.numeric))));
781 				break;
782 			case jbvBinary:
783 				{
784 					StringInfo	jtext = makeStringInfo();
785 
786 					(void) JsonbToCString(jtext, v->val.binary.data, -1);
787 					result = cstring_to_text_with_len(jtext->data, jtext->len);
788 				}
789 				break;
790 			default:
791 				elog(ERROR, "unrecognized jsonb type: %d", (int) v->type);
792 		}
793 
794 		if (result)
795 			PG_RETURN_TEXT_P(result);
796 	}
797 
798 	PG_RETURN_NULL();
799 }
800 
801 Datum
json_array_element(PG_FUNCTION_ARGS)802 json_array_element(PG_FUNCTION_ARGS)
803 {
804 	text	   *json = PG_GETARG_TEXT_PP(0);
805 	int			element = PG_GETARG_INT32(1);
806 	text	   *result;
807 
808 	result = get_worker(json, NULL, &element, 1, false);
809 
810 	if (result != NULL)
811 		PG_RETURN_TEXT_P(result);
812 	else
813 		PG_RETURN_NULL();
814 }
815 
816 Datum
jsonb_array_element(PG_FUNCTION_ARGS)817 jsonb_array_element(PG_FUNCTION_ARGS)
818 {
819 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
820 	int			element = PG_GETARG_INT32(1);
821 	JsonbValue *v;
822 
823 	if (!JB_ROOT_IS_ARRAY(jb))
824 		PG_RETURN_NULL();
825 
826 	/* Handle negative subscript */
827 	if (element < 0)
828 	{
829 		uint32		nelements = JB_ROOT_COUNT(jb);
830 
831 		if (-element > nelements)
832 			PG_RETURN_NULL();
833 		else
834 			element += nelements;
835 	}
836 
837 	v = getIthJsonbValueFromContainer(&jb->root, element);
838 	if (v != NULL)
839 		PG_RETURN_JSONB_P(JsonbValueToJsonb(v));
840 
841 	PG_RETURN_NULL();
842 }
843 
844 Datum
json_array_element_text(PG_FUNCTION_ARGS)845 json_array_element_text(PG_FUNCTION_ARGS)
846 {
847 	text	   *json = PG_GETARG_TEXT_PP(0);
848 	int			element = PG_GETARG_INT32(1);
849 	text	   *result;
850 
851 	result = get_worker(json, NULL, &element, 1, true);
852 
853 	if (result != NULL)
854 		PG_RETURN_TEXT_P(result);
855 	else
856 		PG_RETURN_NULL();
857 }
858 
859 Datum
jsonb_array_element_text(PG_FUNCTION_ARGS)860 jsonb_array_element_text(PG_FUNCTION_ARGS)
861 {
862 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
863 	int			element = PG_GETARG_INT32(1);
864 	JsonbValue *v;
865 
866 	if (!JB_ROOT_IS_ARRAY(jb))
867 		PG_RETURN_NULL();
868 
869 	/* Handle negative subscript */
870 	if (element < 0)
871 	{
872 		uint32		nelements = JB_ROOT_COUNT(jb);
873 
874 		if (-element > nelements)
875 			PG_RETURN_NULL();
876 		else
877 			element += nelements;
878 	}
879 
880 	v = getIthJsonbValueFromContainer(&jb->root, element);
881 	if (v != NULL)
882 	{
883 		text	   *result = NULL;
884 
885 		switch (v->type)
886 		{
887 			case jbvNull:
888 				break;
889 			case jbvBool:
890 				result = cstring_to_text(v->val.boolean ? "true" : "false");
891 				break;
892 			case jbvString:
893 				result = cstring_to_text_with_len(v->val.string.val, v->val.string.len);
894 				break;
895 			case jbvNumeric:
896 				result = cstring_to_text(DatumGetCString(DirectFunctionCall1(numeric_out,
897 																			 PointerGetDatum(v->val.numeric))));
898 				break;
899 			case jbvBinary:
900 				{
901 					StringInfo	jtext = makeStringInfo();
902 
903 					(void) JsonbToCString(jtext, v->val.binary.data, -1);
904 					result = cstring_to_text_with_len(jtext->data, jtext->len);
905 				}
906 				break;
907 			default:
908 				elog(ERROR, "unrecognized jsonb type: %d", (int) v->type);
909 		}
910 
911 		if (result)
912 			PG_RETURN_TEXT_P(result);
913 	}
914 
915 	PG_RETURN_NULL();
916 }
917 
918 Datum
json_extract_path(PG_FUNCTION_ARGS)919 json_extract_path(PG_FUNCTION_ARGS)
920 {
921 	return get_path_all(fcinfo, false);
922 }
923 
924 Datum
json_extract_path_text(PG_FUNCTION_ARGS)925 json_extract_path_text(PG_FUNCTION_ARGS)
926 {
927 	return get_path_all(fcinfo, true);
928 }
929 
930 /*
931  * common routine for extract_path functions
932  */
933 static Datum
get_path_all(FunctionCallInfo fcinfo,bool as_text)934 get_path_all(FunctionCallInfo fcinfo, bool as_text)
935 {
936 	text	   *json = PG_GETARG_TEXT_PP(0);
937 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
938 	text	   *result;
939 	Datum	   *pathtext;
940 	bool	   *pathnulls;
941 	int			npath;
942 	char	  **tpath;
943 	int		   *ipath;
944 	int			i;
945 
946 	/*
947 	 * If the array contains any null elements, return NULL, on the grounds
948 	 * that you'd have gotten NULL if any RHS value were NULL in a nested
949 	 * series of applications of the -> operator.  (Note: because we also
950 	 * return NULL for error cases such as no-such-field, this is true
951 	 * regardless of the contents of the rest of the array.)
952 	 */
953 	if (array_contains_nulls(path))
954 		PG_RETURN_NULL();
955 
956 	deconstruct_array(path, TEXTOID, -1, false, 'i',
957 					  &pathtext, &pathnulls, &npath);
958 
959 	tpath = palloc(npath * sizeof(char *));
960 	ipath = palloc(npath * sizeof(int));
961 
962 	for (i = 0; i < npath; i++)
963 	{
964 		Assert(!pathnulls[i]);
965 		tpath[i] = TextDatumGetCString(pathtext[i]);
966 
967 		/*
968 		 * we have no idea at this stage what structure the document is so
969 		 * just convert anything in the path that we can to an integer and set
970 		 * all the other integers to INT_MIN which will never match.
971 		 */
972 		if (*tpath[i] != '\0')
973 		{
974 			long		ind;
975 			char	   *endptr;
976 
977 			errno = 0;
978 			ind = strtol(tpath[i], &endptr, 10);
979 			if (*endptr == '\0' && errno == 0 && ind <= INT_MAX && ind >= INT_MIN)
980 				ipath[i] = (int) ind;
981 			else
982 				ipath[i] = INT_MIN;
983 		}
984 		else
985 			ipath[i] = INT_MIN;
986 	}
987 
988 	result = get_worker(json, tpath, ipath, npath, as_text);
989 
990 	if (result != NULL)
991 		PG_RETURN_TEXT_P(result);
992 	else
993 		PG_RETURN_NULL();
994 }
995 
996 /*
997  * get_worker
998  *
999  * common worker for all the json getter functions
1000  *
1001  * json: JSON object (in text form)
1002  * tpath[]: field name(s) to extract
1003  * ipath[]: array index(es) (zero-based) to extract, accepts negatives
1004  * npath: length of tpath[] and/or ipath[]
1005  * normalize_results: true to de-escape string and null scalars
1006  *
1007  * tpath can be NULL, or any one tpath[] entry can be NULL, if an object
1008  * field is not to be matched at that nesting level.  Similarly, ipath can
1009  * be NULL, or any one ipath[] entry can be INT_MIN if an array element is
1010  * not to be matched at that nesting level (a json datum should never be
1011  * large enough to have -INT_MIN elements due to MaxAllocSize restriction).
1012  */
1013 static text *
get_worker(text * json,char ** tpath,int * ipath,int npath,bool normalize_results)1014 get_worker(text *json,
1015 		   char **tpath,
1016 		   int *ipath,
1017 		   int npath,
1018 		   bool normalize_results)
1019 {
1020 	JsonLexContext *lex = makeJsonLexContext(json, true);
1021 	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
1022 	GetState   *state = palloc0(sizeof(GetState));
1023 
1024 	Assert(npath >= 0);
1025 
1026 	state->lex = lex;
1027 	/* is it "_as_text" variant? */
1028 	state->normalize_results = normalize_results;
1029 	state->npath = npath;
1030 	state->path_names = tpath;
1031 	state->path_indexes = ipath;
1032 	state->pathok = palloc0(sizeof(bool) * npath);
1033 	state->array_cur_index = palloc(sizeof(int) * npath);
1034 
1035 	if (npath > 0)
1036 		state->pathok[0] = true;
1037 
1038 	sem->semstate = (void *) state;
1039 
1040 	/*
1041 	 * Not all variants need all the semantic routines. Only set the ones that
1042 	 * are actually needed for maximum efficiency.
1043 	 */
1044 	sem->scalar = get_scalar;
1045 	if (npath == 0)
1046 	{
1047 		sem->object_start = get_object_start;
1048 		sem->object_end = get_object_end;
1049 		sem->array_start = get_array_start;
1050 		sem->array_end = get_array_end;
1051 	}
1052 	if (tpath != NULL)
1053 	{
1054 		sem->object_field_start = get_object_field_start;
1055 		sem->object_field_end = get_object_field_end;
1056 	}
1057 	if (ipath != NULL)
1058 	{
1059 		sem->array_start = get_array_start;
1060 		sem->array_element_start = get_array_element_start;
1061 		sem->array_element_end = get_array_element_end;
1062 	}
1063 
1064 	pg_parse_json(lex, sem);
1065 
1066 	return state->tresult;
1067 }
1068 
1069 static void
get_object_start(void * state)1070 get_object_start(void *state)
1071 {
1072 	GetState   *_state = (GetState *) state;
1073 	int			lex_level = _state->lex->lex_level;
1074 
1075 	if (lex_level == 0 && _state->npath == 0)
1076 	{
1077 		/*
1078 		 * Special case: we should match the entire object.  We only need this
1079 		 * at outermost level because at nested levels the match will have
1080 		 * been started by the outer field or array element callback.
1081 		 */
1082 		_state->result_start = _state->lex->token_start;
1083 	}
1084 }
1085 
1086 static void
get_object_end(void * state)1087 get_object_end(void *state)
1088 {
1089 	GetState   *_state = (GetState *) state;
1090 	int			lex_level = _state->lex->lex_level;
1091 
1092 	if (lex_level == 0 && _state->npath == 0)
1093 	{
1094 		/* Special case: return the entire object */
1095 		char	   *start = _state->result_start;
1096 		int			len = _state->lex->prev_token_terminator - start;
1097 
1098 		_state->tresult = cstring_to_text_with_len(start, len);
1099 	}
1100 }
1101 
1102 static void
get_object_field_start(void * state,char * fname,bool isnull)1103 get_object_field_start(void *state, char *fname, bool isnull)
1104 {
1105 	GetState   *_state = (GetState *) state;
1106 	bool		get_next = false;
1107 	int			lex_level = _state->lex->lex_level;
1108 
1109 	if (lex_level <= _state->npath &&
1110 		_state->pathok[lex_level - 1] &&
1111 		_state->path_names != NULL &&
1112 		_state->path_names[lex_level - 1] != NULL &&
1113 		strcmp(fname, _state->path_names[lex_level - 1]) == 0)
1114 	{
1115 		if (lex_level < _state->npath)
1116 		{
1117 			/* if not at end of path just mark path ok */
1118 			_state->pathok[lex_level] = true;
1119 		}
1120 		else
1121 		{
1122 			/* end of path, so we want this value */
1123 			get_next = true;
1124 		}
1125 	}
1126 
1127 	if (get_next)
1128 	{
1129 		/* this object overrides any previous matching object */
1130 		_state->tresult = NULL;
1131 		_state->result_start = NULL;
1132 
1133 		if (_state->normalize_results &&
1134 			_state->lex->token_type == JSON_TOKEN_STRING)
1135 		{
1136 			/* for as_text variants, tell get_scalar to set it for us */
1137 			_state->next_scalar = true;
1138 		}
1139 		else
1140 		{
1141 			/* for non-as_text variants, just note the json starting point */
1142 			_state->result_start = _state->lex->token_start;
1143 		}
1144 	}
1145 }
1146 
1147 static void
get_object_field_end(void * state,char * fname,bool isnull)1148 get_object_field_end(void *state, char *fname, bool isnull)
1149 {
1150 	GetState   *_state = (GetState *) state;
1151 	bool		get_last = false;
1152 	int			lex_level = _state->lex->lex_level;
1153 
1154 	/* same tests as in get_object_field_start */
1155 	if (lex_level <= _state->npath &&
1156 		_state->pathok[lex_level - 1] &&
1157 		_state->path_names != NULL &&
1158 		_state->path_names[lex_level - 1] != NULL &&
1159 		strcmp(fname, _state->path_names[lex_level - 1]) == 0)
1160 	{
1161 		if (lex_level < _state->npath)
1162 		{
1163 			/* done with this field so reset pathok */
1164 			_state->pathok[lex_level] = false;
1165 		}
1166 		else
1167 		{
1168 			/* end of path, so we want this value */
1169 			get_last = true;
1170 		}
1171 	}
1172 
1173 	/* for as_text scalar case, our work is already done */
1174 	if (get_last && _state->result_start != NULL)
1175 	{
1176 		/*
1177 		 * make a text object from the string from the previously noted json
1178 		 * start up to the end of the previous token (the lexer is by now
1179 		 * ahead of us on whatever came after what we're interested in).
1180 		 */
1181 		if (isnull && _state->normalize_results)
1182 			_state->tresult = (text *) NULL;
1183 		else
1184 		{
1185 			char	   *start = _state->result_start;
1186 			int			len = _state->lex->prev_token_terminator - start;
1187 
1188 			_state->tresult = cstring_to_text_with_len(start, len);
1189 		}
1190 
1191 		/* this should be unnecessary but let's do it for cleanliness: */
1192 		_state->result_start = NULL;
1193 	}
1194 }
1195 
1196 static void
get_array_start(void * state)1197 get_array_start(void *state)
1198 {
1199 	GetState   *_state = (GetState *) state;
1200 	int			lex_level = _state->lex->lex_level;
1201 
1202 	if (lex_level < _state->npath)
1203 	{
1204 		/* Initialize counting of elements in this array */
1205 		_state->array_cur_index[lex_level] = -1;
1206 
1207 		/* INT_MIN value is reserved to represent invalid subscript */
1208 		if (_state->path_indexes[lex_level] < 0 &&
1209 			_state->path_indexes[lex_level] != INT_MIN)
1210 		{
1211 			/* Negative subscript -- convert to positive-wise subscript */
1212 			int			nelements = json_count_array_elements(_state->lex);
1213 
1214 			if (-_state->path_indexes[lex_level] <= nelements)
1215 				_state->path_indexes[lex_level] += nelements;
1216 		}
1217 	}
1218 	else if (lex_level == 0 && _state->npath == 0)
1219 	{
1220 		/*
1221 		 * Special case: we should match the entire array.  We only need this
1222 		 * at the outermost level because at nested levels the match will have
1223 		 * been started by the outer field or array element callback.
1224 		 */
1225 		_state->result_start = _state->lex->token_start;
1226 	}
1227 }
1228 
1229 static void
get_array_end(void * state)1230 get_array_end(void *state)
1231 {
1232 	GetState   *_state = (GetState *) state;
1233 	int			lex_level = _state->lex->lex_level;
1234 
1235 	if (lex_level == 0 && _state->npath == 0)
1236 	{
1237 		/* Special case: return the entire array */
1238 		char	   *start = _state->result_start;
1239 		int			len = _state->lex->prev_token_terminator - start;
1240 
1241 		_state->tresult = cstring_to_text_with_len(start, len);
1242 	}
1243 }
1244 
1245 static void
get_array_element_start(void * state,bool isnull)1246 get_array_element_start(void *state, bool isnull)
1247 {
1248 	GetState   *_state = (GetState *) state;
1249 	bool		get_next = false;
1250 	int			lex_level = _state->lex->lex_level;
1251 
1252 	/* Update array element counter */
1253 	if (lex_level <= _state->npath)
1254 		_state->array_cur_index[lex_level - 1]++;
1255 
1256 	if (lex_level <= _state->npath &&
1257 		_state->pathok[lex_level - 1] &&
1258 		_state->path_indexes != NULL &&
1259 		_state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1])
1260 	{
1261 		if (lex_level < _state->npath)
1262 		{
1263 			/* if not at end of path just mark path ok */
1264 			_state->pathok[lex_level] = true;
1265 		}
1266 		else
1267 		{
1268 			/* end of path, so we want this value */
1269 			get_next = true;
1270 		}
1271 	}
1272 
1273 	/* same logic as for objects */
1274 	if (get_next)
1275 	{
1276 		_state->tresult = NULL;
1277 		_state->result_start = NULL;
1278 
1279 		if (_state->normalize_results &&
1280 			_state->lex->token_type == JSON_TOKEN_STRING)
1281 		{
1282 			_state->next_scalar = true;
1283 		}
1284 		else
1285 		{
1286 			_state->result_start = _state->lex->token_start;
1287 		}
1288 	}
1289 }
1290 
1291 static void
get_array_element_end(void * state,bool isnull)1292 get_array_element_end(void *state, bool isnull)
1293 {
1294 	GetState   *_state = (GetState *) state;
1295 	bool		get_last = false;
1296 	int			lex_level = _state->lex->lex_level;
1297 
1298 	/* same tests as in get_array_element_start */
1299 	if (lex_level <= _state->npath &&
1300 		_state->pathok[lex_level - 1] &&
1301 		_state->path_indexes != NULL &&
1302 		_state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1])
1303 	{
1304 		if (lex_level < _state->npath)
1305 		{
1306 			/* done with this element so reset pathok */
1307 			_state->pathok[lex_level] = false;
1308 		}
1309 		else
1310 		{
1311 			/* end of path, so we want this value */
1312 			get_last = true;
1313 		}
1314 	}
1315 
1316 	/* same logic as for objects */
1317 	if (get_last && _state->result_start != NULL)
1318 	{
1319 		if (isnull && _state->normalize_results)
1320 			_state->tresult = (text *) NULL;
1321 		else
1322 		{
1323 			char	   *start = _state->result_start;
1324 			int			len = _state->lex->prev_token_terminator - start;
1325 
1326 			_state->tresult = cstring_to_text_with_len(start, len);
1327 		}
1328 
1329 		_state->result_start = NULL;
1330 	}
1331 }
1332 
1333 static void
get_scalar(void * state,char * token,JsonTokenType tokentype)1334 get_scalar(void *state, char *token, JsonTokenType tokentype)
1335 {
1336 	GetState   *_state = (GetState *) state;
1337 	int			lex_level = _state->lex->lex_level;
1338 
1339 	/* Check for whole-object match */
1340 	if (lex_level == 0 && _state->npath == 0)
1341 	{
1342 		if (_state->normalize_results && tokentype == JSON_TOKEN_STRING)
1343 		{
1344 			/* we want the de-escaped string */
1345 			_state->next_scalar = true;
1346 		}
1347 		else if (_state->normalize_results && tokentype == JSON_TOKEN_NULL)
1348 		{
1349 			_state->tresult = (text *) NULL;
1350 		}
1351 		else
1352 		{
1353 			/*
1354 			 * This is a bit hokey: we will suppress whitespace after the
1355 			 * scalar token, but not whitespace before it.  Probably not worth
1356 			 * doing our own space-skipping to avoid that.
1357 			 */
1358 			char	   *start = _state->lex->input;
1359 			int			len = _state->lex->prev_token_terminator - start;
1360 
1361 			_state->tresult = cstring_to_text_with_len(start, len);
1362 		}
1363 	}
1364 
1365 	if (_state->next_scalar)
1366 	{
1367 		/* a de-escaped text value is wanted, so supply it */
1368 		_state->tresult = cstring_to_text(token);
1369 		/* make sure the next call to get_scalar doesn't overwrite it */
1370 		_state->next_scalar = false;
1371 	}
1372 }
1373 
1374 Datum
jsonb_extract_path(PG_FUNCTION_ARGS)1375 jsonb_extract_path(PG_FUNCTION_ARGS)
1376 {
1377 	return get_jsonb_path_all(fcinfo, false);
1378 }
1379 
1380 Datum
jsonb_extract_path_text(PG_FUNCTION_ARGS)1381 jsonb_extract_path_text(PG_FUNCTION_ARGS)
1382 {
1383 	return get_jsonb_path_all(fcinfo, true);
1384 }
1385 
1386 static Datum
get_jsonb_path_all(FunctionCallInfo fcinfo,bool as_text)1387 get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text)
1388 {
1389 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
1390 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
1391 	Jsonb	   *res;
1392 	Datum	   *pathtext;
1393 	bool	   *pathnulls;
1394 	int			npath;
1395 	int			i;
1396 	bool		have_object = false,
1397 				have_array = false;
1398 	JsonbValue *jbvp = NULL;
1399 	JsonbValue	tv;
1400 	JsonbContainer *container;
1401 
1402 	/*
1403 	 * If the array contains any null elements, return NULL, on the grounds
1404 	 * that you'd have gotten NULL if any RHS value were NULL in a nested
1405 	 * series of applications of the -> operator.  (Note: because we also
1406 	 * return NULL for error cases such as no-such-field, this is true
1407 	 * regardless of the contents of the rest of the array.)
1408 	 */
1409 	if (array_contains_nulls(path))
1410 		PG_RETURN_NULL();
1411 
1412 	deconstruct_array(path, TEXTOID, -1, false, 'i',
1413 					  &pathtext, &pathnulls, &npath);
1414 
1415 	/* Identify whether we have object, array, or scalar at top-level */
1416 	container = &jb->root;
1417 
1418 	if (JB_ROOT_IS_OBJECT(jb))
1419 		have_object = true;
1420 	else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb))
1421 		have_array = true;
1422 	else
1423 	{
1424 		Assert(JB_ROOT_IS_ARRAY(jb) && JB_ROOT_IS_SCALAR(jb));
1425 		/* Extract the scalar value, if it is what we'll return */
1426 		if (npath <= 0)
1427 			jbvp = getIthJsonbValueFromContainer(container, 0);
1428 	}
1429 
1430 	/*
1431 	 * If the array is empty, return the entire LHS object, on the grounds
1432 	 * that we should do zero field or element extractions.  For the
1433 	 * non-scalar case we can just hand back the object without much work. For
1434 	 * the scalar case, fall through and deal with the value below the loop.
1435 	 * (This inconsistency arises because there's no easy way to generate a
1436 	 * JsonbValue directly for root-level containers.)
1437 	 */
1438 	if (npath <= 0 && jbvp == NULL)
1439 	{
1440 		if (as_text)
1441 		{
1442 			PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL,
1443 															container,
1444 															VARSIZE(jb))));
1445 		}
1446 		else
1447 		{
1448 			/* not text mode - just hand back the jsonb */
1449 			PG_RETURN_JSONB_P(jb);
1450 		}
1451 	}
1452 
1453 	for (i = 0; i < npath; i++)
1454 	{
1455 		if (have_object)
1456 		{
1457 			jbvp = findJsonbValueFromContainerLen(container,
1458 												  JB_FOBJECT,
1459 												  VARDATA(pathtext[i]),
1460 												  VARSIZE(pathtext[i]) - VARHDRSZ);
1461 		}
1462 		else if (have_array)
1463 		{
1464 			long		lindex;
1465 			uint32		index;
1466 			char	   *indextext = TextDatumGetCString(pathtext[i]);
1467 			char	   *endptr;
1468 
1469 			errno = 0;
1470 			lindex = strtol(indextext, &endptr, 10);
1471 			if (endptr == indextext || *endptr != '\0' || errno != 0 ||
1472 				lindex > INT_MAX || lindex < INT_MIN)
1473 				PG_RETURN_NULL();
1474 
1475 			if (lindex >= 0)
1476 			{
1477 				index = (uint32) lindex;
1478 			}
1479 			else
1480 			{
1481 				/* Handle negative subscript */
1482 				uint32		nelements;
1483 
1484 				/* Container must be array, but make sure */
1485 				if (!JsonContainerIsArray(container))
1486 					elog(ERROR, "not a jsonb array");
1487 
1488 				nelements = JsonContainerSize(container);
1489 
1490 				if (-lindex > nelements)
1491 					PG_RETURN_NULL();
1492 				else
1493 					index = nelements + lindex;
1494 			}
1495 
1496 			jbvp = getIthJsonbValueFromContainer(container, index);
1497 		}
1498 		else
1499 		{
1500 			/* scalar, extraction yields a null */
1501 			PG_RETURN_NULL();
1502 		}
1503 
1504 		if (jbvp == NULL)
1505 			PG_RETURN_NULL();
1506 		else if (i == npath - 1)
1507 			break;
1508 
1509 		if (jbvp->type == jbvBinary)
1510 		{
1511 			JsonbIterator *it = JsonbIteratorInit((JsonbContainer *) jbvp->val.binary.data);
1512 			JsonbIteratorToken r;
1513 
1514 			r = JsonbIteratorNext(&it, &tv, true);
1515 			container = (JsonbContainer *) jbvp->val.binary.data;
1516 			have_object = r == WJB_BEGIN_OBJECT;
1517 			have_array = r == WJB_BEGIN_ARRAY;
1518 		}
1519 		else
1520 		{
1521 			have_object = jbvp->type == jbvObject;
1522 			have_array = jbvp->type == jbvArray;
1523 		}
1524 	}
1525 
1526 	if (as_text)
1527 	{
1528 		/* special-case outputs for string and null values */
1529 		if (jbvp->type == jbvString)
1530 			PG_RETURN_TEXT_P(cstring_to_text_with_len(jbvp->val.string.val,
1531 													  jbvp->val.string.len));
1532 		if (jbvp->type == jbvNull)
1533 			PG_RETURN_NULL();
1534 	}
1535 
1536 	res = JsonbValueToJsonb(jbvp);
1537 
1538 	if (as_text)
1539 	{
1540 		PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL,
1541 														&res->root,
1542 														VARSIZE(res))));
1543 	}
1544 	else
1545 	{
1546 		/* not text mode - just hand back the jsonb */
1547 		PG_RETURN_JSONB_P(res);
1548 	}
1549 }
1550 
1551 /*
1552  * SQL function json_array_length(json) -> int
1553  */
1554 Datum
json_array_length(PG_FUNCTION_ARGS)1555 json_array_length(PG_FUNCTION_ARGS)
1556 {
1557 	text	   *json = PG_GETARG_TEXT_PP(0);
1558 	AlenState  *state;
1559 	JsonLexContext *lex;
1560 	JsonSemAction *sem;
1561 
1562 	lex = makeJsonLexContext(json, false);
1563 	state = palloc0(sizeof(AlenState));
1564 	sem = palloc0(sizeof(JsonSemAction));
1565 
1566 	/* palloc0 does this for us */
1567 #if 0
1568 	state->count = 0;
1569 #endif
1570 	state->lex = lex;
1571 
1572 	sem->semstate = (void *) state;
1573 	sem->object_start = alen_object_start;
1574 	sem->scalar = alen_scalar;
1575 	sem->array_element_start = alen_array_element_start;
1576 
1577 	pg_parse_json(lex, sem);
1578 
1579 	PG_RETURN_INT32(state->count);
1580 }
1581 
1582 Datum
jsonb_array_length(PG_FUNCTION_ARGS)1583 jsonb_array_length(PG_FUNCTION_ARGS)
1584 {
1585 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
1586 
1587 	if (JB_ROOT_IS_SCALAR(jb))
1588 		ereport(ERROR,
1589 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1590 				 errmsg("cannot get array length of a scalar")));
1591 	else if (!JB_ROOT_IS_ARRAY(jb))
1592 		ereport(ERROR,
1593 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1594 				 errmsg("cannot get array length of a non-array")));
1595 
1596 	PG_RETURN_INT32(JB_ROOT_COUNT(jb));
1597 }
1598 
1599 /*
1600  * These next two checks ensure that the json is an array (since it can't be
1601  * a scalar or an object).
1602  */
1603 
1604 static void
alen_object_start(void * state)1605 alen_object_start(void *state)
1606 {
1607 	AlenState  *_state = (AlenState *) state;
1608 
1609 	/* json structure check */
1610 	if (_state->lex->lex_level == 0)
1611 		ereport(ERROR,
1612 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1613 				 errmsg("cannot get array length of a non-array")));
1614 }
1615 
1616 static void
alen_scalar(void * state,char * token,JsonTokenType tokentype)1617 alen_scalar(void *state, char *token, JsonTokenType tokentype)
1618 {
1619 	AlenState  *_state = (AlenState *) state;
1620 
1621 	/* json structure check */
1622 	if (_state->lex->lex_level == 0)
1623 		ereport(ERROR,
1624 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1625 				 errmsg("cannot get array length of a scalar")));
1626 }
1627 
1628 static void
alen_array_element_start(void * state,bool isnull)1629 alen_array_element_start(void *state, bool isnull)
1630 {
1631 	AlenState  *_state = (AlenState *) state;
1632 
1633 	/* just count up all the level 1 elements */
1634 	if (_state->lex->lex_level == 1)
1635 		_state->count++;
1636 }
1637 
1638 /*
1639  * SQL function json_each and json_each_text
1640  *
1641  * decompose a json object into key value pairs.
1642  *
1643  * Unlike json_object_keys() these SRFs operate in materialize mode,
1644  * stashing results into a Tuplestore object as they go.
1645  * The construction of tuples is done using a temporary memory context
1646  * that is cleared out after each tuple is built.
1647  */
1648 Datum
json_each(PG_FUNCTION_ARGS)1649 json_each(PG_FUNCTION_ARGS)
1650 {
1651 	return each_worker(fcinfo, false);
1652 }
1653 
1654 Datum
jsonb_each(PG_FUNCTION_ARGS)1655 jsonb_each(PG_FUNCTION_ARGS)
1656 {
1657 	return each_worker_jsonb(fcinfo, "jsonb_each", false);
1658 }
1659 
1660 Datum
json_each_text(PG_FUNCTION_ARGS)1661 json_each_text(PG_FUNCTION_ARGS)
1662 {
1663 	return each_worker(fcinfo, true);
1664 }
1665 
1666 Datum
jsonb_each_text(PG_FUNCTION_ARGS)1667 jsonb_each_text(PG_FUNCTION_ARGS)
1668 {
1669 	return each_worker_jsonb(fcinfo, "jsonb_each_text", true);
1670 }
1671 
1672 static Datum
each_worker_jsonb(FunctionCallInfo fcinfo,const char * funcname,bool as_text)1673 each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
1674 {
1675 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
1676 	ReturnSetInfo *rsi;
1677 	Tuplestorestate *tuple_store;
1678 	TupleDesc	tupdesc;
1679 	TupleDesc	ret_tdesc;
1680 	MemoryContext old_cxt,
1681 				tmp_cxt;
1682 	bool		skipNested = false;
1683 	JsonbIterator *it;
1684 	JsonbValue	v;
1685 	JsonbIteratorToken r;
1686 
1687 	if (!JB_ROOT_IS_OBJECT(jb))
1688 		ereport(ERROR,
1689 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1690 				 errmsg("cannot call %s on a non-object",
1691 						funcname)));
1692 
1693 	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
1694 
1695 	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
1696 		(rsi->allowedModes & SFRM_Materialize) == 0 ||
1697 		rsi->expectedDesc == NULL)
1698 		ereport(ERROR,
1699 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1700 				 errmsg("set-valued function called in context that "
1701 						"cannot accept a set")));
1702 
1703 	rsi->returnMode = SFRM_Materialize;
1704 
1705 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1706 		ereport(ERROR,
1707 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1708 				 errmsg("function returning record called in context "
1709 						"that cannot accept type record")));
1710 
1711 	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
1712 
1713 	ret_tdesc = CreateTupleDescCopy(tupdesc);
1714 	BlessTupleDesc(ret_tdesc);
1715 	tuple_store =
1716 		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
1717 							  false, work_mem);
1718 
1719 	MemoryContextSwitchTo(old_cxt);
1720 
1721 	tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
1722 									"jsonb_each temporary cxt",
1723 									ALLOCSET_DEFAULT_SIZES);
1724 
1725 	it = JsonbIteratorInit(&jb->root);
1726 
1727 	while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
1728 	{
1729 		skipNested = true;
1730 
1731 		if (r == WJB_KEY)
1732 		{
1733 			text	   *key;
1734 			HeapTuple	tuple;
1735 			Datum		values[2];
1736 			bool		nulls[2] = {false, false};
1737 
1738 			/* Use the tmp context so we can clean up after each tuple is done */
1739 			old_cxt = MemoryContextSwitchTo(tmp_cxt);
1740 
1741 			key = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
1742 
1743 			/*
1744 			 * The next thing the iterator fetches should be the value, no
1745 			 * matter what shape it is.
1746 			 */
1747 			r = JsonbIteratorNext(&it, &v, skipNested);
1748 			Assert(r != WJB_DONE);
1749 
1750 			values[0] = PointerGetDatum(key);
1751 
1752 			if (as_text)
1753 			{
1754 				if (v.type == jbvNull)
1755 				{
1756 					/* a json null is an sql null in text mode */
1757 					nulls[1] = true;
1758 					values[1] = (Datum) NULL;
1759 				}
1760 				else
1761 				{
1762 					text	   *sv;
1763 
1764 					if (v.type == jbvString)
1765 					{
1766 						/* In text mode, scalar strings should be dequoted */
1767 						sv = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
1768 					}
1769 					else
1770 					{
1771 						/* Turn anything else into a json string */
1772 						StringInfo	jtext = makeStringInfo();
1773 						Jsonb	   *jb = JsonbValueToJsonb(&v);
1774 
1775 						(void) JsonbToCString(jtext, &jb->root, 0);
1776 						sv = cstring_to_text_with_len(jtext->data, jtext->len);
1777 					}
1778 
1779 					values[1] = PointerGetDatum(sv);
1780 				}
1781 			}
1782 			else
1783 			{
1784 				/* Not in text mode, just return the Jsonb */
1785 				Jsonb	   *val = JsonbValueToJsonb(&v);
1786 
1787 				values[1] = PointerGetDatum(val);
1788 			}
1789 
1790 			tuple = heap_form_tuple(ret_tdesc, values, nulls);
1791 
1792 			tuplestore_puttuple(tuple_store, tuple);
1793 
1794 			/* clean up and switch back */
1795 			MemoryContextSwitchTo(old_cxt);
1796 			MemoryContextReset(tmp_cxt);
1797 		}
1798 	}
1799 
1800 	MemoryContextDelete(tmp_cxt);
1801 
1802 	rsi->setResult = tuple_store;
1803 	rsi->setDesc = ret_tdesc;
1804 
1805 	PG_RETURN_NULL();
1806 }
1807 
1808 
1809 static Datum
each_worker(FunctionCallInfo fcinfo,bool as_text)1810 each_worker(FunctionCallInfo fcinfo, bool as_text)
1811 {
1812 	text	   *json = PG_GETARG_TEXT_PP(0);
1813 	JsonLexContext *lex;
1814 	JsonSemAction *sem;
1815 	ReturnSetInfo *rsi;
1816 	MemoryContext old_cxt;
1817 	TupleDesc	tupdesc;
1818 	EachState  *state;
1819 
1820 	lex = makeJsonLexContext(json, true);
1821 	state = palloc0(sizeof(EachState));
1822 	sem = palloc0(sizeof(JsonSemAction));
1823 
1824 	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
1825 
1826 	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
1827 		(rsi->allowedModes & SFRM_Materialize) == 0 ||
1828 		rsi->expectedDesc == NULL)
1829 		ereport(ERROR,
1830 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1831 				 errmsg("set-valued function called in context that "
1832 						"cannot accept a set")));
1833 
1834 	rsi->returnMode = SFRM_Materialize;
1835 
1836 	(void) get_call_result_type(fcinfo, NULL, &tupdesc);
1837 
1838 	/* make these in a sufficiently long-lived memory context */
1839 	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
1840 
1841 	state->ret_tdesc = CreateTupleDescCopy(tupdesc);
1842 	BlessTupleDesc(state->ret_tdesc);
1843 	state->tuple_store =
1844 		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
1845 							  false, work_mem);
1846 
1847 	MemoryContextSwitchTo(old_cxt);
1848 
1849 	sem->semstate = (void *) state;
1850 	sem->array_start = each_array_start;
1851 	sem->scalar = each_scalar;
1852 	sem->object_field_start = each_object_field_start;
1853 	sem->object_field_end = each_object_field_end;
1854 
1855 	state->normalize_results = as_text;
1856 	state->next_scalar = false;
1857 	state->lex = lex;
1858 	state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
1859 										   "json_each temporary cxt",
1860 										   ALLOCSET_DEFAULT_SIZES);
1861 
1862 	pg_parse_json(lex, sem);
1863 
1864 	MemoryContextDelete(state->tmp_cxt);
1865 
1866 	rsi->setResult = state->tuple_store;
1867 	rsi->setDesc = state->ret_tdesc;
1868 
1869 	PG_RETURN_NULL();
1870 }
1871 
1872 
1873 static void
each_object_field_start(void * state,char * fname,bool isnull)1874 each_object_field_start(void *state, char *fname, bool isnull)
1875 {
1876 	EachState  *_state = (EachState *) state;
1877 
1878 	/* save a pointer to where the value starts */
1879 	if (_state->lex->lex_level == 1)
1880 	{
1881 		/*
1882 		 * next_scalar will be reset in the object_field_end handler, and
1883 		 * since we know the value is a scalar there is no danger of it being
1884 		 * on while recursing down the tree.
1885 		 */
1886 		if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
1887 			_state->next_scalar = true;
1888 		else
1889 			_state->result_start = _state->lex->token_start;
1890 	}
1891 }
1892 
1893 static void
each_object_field_end(void * state,char * fname,bool isnull)1894 each_object_field_end(void *state, char *fname, bool isnull)
1895 {
1896 	EachState  *_state = (EachState *) state;
1897 	MemoryContext old_cxt;
1898 	int			len;
1899 	text	   *val;
1900 	HeapTuple	tuple;
1901 	Datum		values[2];
1902 	bool		nulls[2] = {false, false};
1903 
1904 	/* skip over nested objects */
1905 	if (_state->lex->lex_level != 1)
1906 		return;
1907 
1908 	/* use the tmp context so we can clean up after each tuple is done */
1909 	old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
1910 
1911 	values[0] = CStringGetTextDatum(fname);
1912 
1913 	if (isnull && _state->normalize_results)
1914 	{
1915 		nulls[1] = true;
1916 		values[1] = (Datum) 0;
1917 	}
1918 	else if (_state->next_scalar)
1919 	{
1920 		values[1] = CStringGetTextDatum(_state->normalized_scalar);
1921 		_state->next_scalar = false;
1922 	}
1923 	else
1924 	{
1925 		len = _state->lex->prev_token_terminator - _state->result_start;
1926 		val = cstring_to_text_with_len(_state->result_start, len);
1927 		values[1] = PointerGetDatum(val);
1928 	}
1929 
1930 	tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
1931 
1932 	tuplestore_puttuple(_state->tuple_store, tuple);
1933 
1934 	/* clean up and switch back */
1935 	MemoryContextSwitchTo(old_cxt);
1936 	MemoryContextReset(_state->tmp_cxt);
1937 }
1938 
1939 static void
each_array_start(void * state)1940 each_array_start(void *state)
1941 {
1942 	EachState  *_state = (EachState *) state;
1943 
1944 	/* json structure check */
1945 	if (_state->lex->lex_level == 0)
1946 		ereport(ERROR,
1947 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1948 				 errmsg("cannot deconstruct an array as an object")));
1949 }
1950 
1951 static void
each_scalar(void * state,char * token,JsonTokenType tokentype)1952 each_scalar(void *state, char *token, JsonTokenType tokentype)
1953 {
1954 	EachState  *_state = (EachState *) state;
1955 
1956 	/* json structure check */
1957 	if (_state->lex->lex_level == 0)
1958 		ereport(ERROR,
1959 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1960 				 errmsg("cannot deconstruct a scalar")));
1961 
1962 	/* supply de-escaped value if required */
1963 	if (_state->next_scalar)
1964 		_state->normalized_scalar = token;
1965 }
1966 
1967 /*
1968  * SQL functions json_array_elements and json_array_elements_text
1969  *
1970  * get the elements from a json array
1971  *
1972  * a lot of this processing is similar to the json_each* functions
1973  */
1974 
1975 Datum
jsonb_array_elements(PG_FUNCTION_ARGS)1976 jsonb_array_elements(PG_FUNCTION_ARGS)
1977 {
1978 	return elements_worker_jsonb(fcinfo, "jsonb_array_elements", false);
1979 }
1980 
1981 Datum
jsonb_array_elements_text(PG_FUNCTION_ARGS)1982 jsonb_array_elements_text(PG_FUNCTION_ARGS)
1983 {
1984 	return elements_worker_jsonb(fcinfo, "jsonb_array_elements_text", true);
1985 }
1986 
1987 static Datum
elements_worker_jsonb(FunctionCallInfo fcinfo,const char * funcname,bool as_text)1988 elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
1989 					  bool as_text)
1990 {
1991 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
1992 	ReturnSetInfo *rsi;
1993 	Tuplestorestate *tuple_store;
1994 	TupleDesc	tupdesc;
1995 	TupleDesc	ret_tdesc;
1996 	MemoryContext old_cxt,
1997 				tmp_cxt;
1998 	bool		skipNested = false;
1999 	JsonbIterator *it;
2000 	JsonbValue	v;
2001 	JsonbIteratorToken r;
2002 
2003 	if (JB_ROOT_IS_SCALAR(jb))
2004 		ereport(ERROR,
2005 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2006 				 errmsg("cannot extract elements from a scalar")));
2007 	else if (!JB_ROOT_IS_ARRAY(jb))
2008 		ereport(ERROR,
2009 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2010 				 errmsg("cannot extract elements from an object")));
2011 
2012 	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
2013 
2014 	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
2015 		(rsi->allowedModes & SFRM_Materialize) == 0 ||
2016 		rsi->expectedDesc == NULL)
2017 		ereport(ERROR,
2018 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2019 				 errmsg("set-valued function called in context that "
2020 						"cannot accept a set")));
2021 
2022 	rsi->returnMode = SFRM_Materialize;
2023 
2024 	/* it's a simple type, so don't use get_call_result_type() */
2025 	tupdesc = rsi->expectedDesc;
2026 
2027 	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
2028 
2029 	ret_tdesc = CreateTupleDescCopy(tupdesc);
2030 	BlessTupleDesc(ret_tdesc);
2031 	tuple_store =
2032 		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
2033 							  false, work_mem);
2034 
2035 	MemoryContextSwitchTo(old_cxt);
2036 
2037 	tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
2038 									"jsonb_array_elements temporary cxt",
2039 									ALLOCSET_DEFAULT_SIZES);
2040 
2041 	it = JsonbIteratorInit(&jb->root);
2042 
2043 	while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
2044 	{
2045 		skipNested = true;
2046 
2047 		if (r == WJB_ELEM)
2048 		{
2049 			HeapTuple	tuple;
2050 			Datum		values[1];
2051 			bool		nulls[1] = {false};
2052 
2053 			/* use the tmp context so we can clean up after each tuple is done */
2054 			old_cxt = MemoryContextSwitchTo(tmp_cxt);
2055 
2056 			if (!as_text)
2057 			{
2058 				Jsonb	   *val = JsonbValueToJsonb(&v);
2059 
2060 				values[0] = PointerGetDatum(val);
2061 			}
2062 			else
2063 			{
2064 				if (v.type == jbvNull)
2065 				{
2066 					/* a json null is an sql null in text mode */
2067 					nulls[0] = true;
2068 					values[0] = (Datum) NULL;
2069 				}
2070 				else
2071 				{
2072 					text	   *sv;
2073 
2074 					if (v.type == jbvString)
2075 					{
2076 						/* in text mode scalar strings should be dequoted */
2077 						sv = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
2078 					}
2079 					else
2080 					{
2081 						/* turn anything else into a json string */
2082 						StringInfo	jtext = makeStringInfo();
2083 						Jsonb	   *jb = JsonbValueToJsonb(&v);
2084 
2085 						(void) JsonbToCString(jtext, &jb->root, 0);
2086 						sv = cstring_to_text_with_len(jtext->data, jtext->len);
2087 					}
2088 
2089 					values[0] = PointerGetDatum(sv);
2090 				}
2091 			}
2092 
2093 			tuple = heap_form_tuple(ret_tdesc, values, nulls);
2094 
2095 			tuplestore_puttuple(tuple_store, tuple);
2096 
2097 			/* clean up and switch back */
2098 			MemoryContextSwitchTo(old_cxt);
2099 			MemoryContextReset(tmp_cxt);
2100 		}
2101 	}
2102 
2103 	MemoryContextDelete(tmp_cxt);
2104 
2105 	rsi->setResult = tuple_store;
2106 	rsi->setDesc = ret_tdesc;
2107 
2108 	PG_RETURN_NULL();
2109 }
2110 
2111 Datum
json_array_elements(PG_FUNCTION_ARGS)2112 json_array_elements(PG_FUNCTION_ARGS)
2113 {
2114 	return elements_worker(fcinfo, "json_array_elements", false);
2115 }
2116 
2117 Datum
json_array_elements_text(PG_FUNCTION_ARGS)2118 json_array_elements_text(PG_FUNCTION_ARGS)
2119 {
2120 	return elements_worker(fcinfo, "json_array_elements_text", true);
2121 }
2122 
2123 static Datum
elements_worker(FunctionCallInfo fcinfo,const char * funcname,bool as_text)2124 elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
2125 {
2126 	text	   *json = PG_GETARG_TEXT_PP(0);
2127 
2128 	/* elements only needs escaped strings when as_text */
2129 	JsonLexContext *lex = makeJsonLexContext(json, as_text);
2130 	JsonSemAction *sem;
2131 	ReturnSetInfo *rsi;
2132 	MemoryContext old_cxt;
2133 	TupleDesc	tupdesc;
2134 	ElementsState *state;
2135 
2136 	state = palloc0(sizeof(ElementsState));
2137 	sem = palloc0(sizeof(JsonSemAction));
2138 
2139 	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
2140 
2141 	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
2142 		(rsi->allowedModes & SFRM_Materialize) == 0 ||
2143 		rsi->expectedDesc == NULL)
2144 		ereport(ERROR,
2145 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2146 				 errmsg("set-valued function called in context that "
2147 						"cannot accept a set")));
2148 
2149 	rsi->returnMode = SFRM_Materialize;
2150 
2151 	/* it's a simple type, so don't use get_call_result_type() */
2152 	tupdesc = rsi->expectedDesc;
2153 
2154 	/* make these in a sufficiently long-lived memory context */
2155 	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
2156 
2157 	state->ret_tdesc = CreateTupleDescCopy(tupdesc);
2158 	BlessTupleDesc(state->ret_tdesc);
2159 	state->tuple_store =
2160 		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
2161 							  false, work_mem);
2162 
2163 	MemoryContextSwitchTo(old_cxt);
2164 
2165 	sem->semstate = (void *) state;
2166 	sem->object_start = elements_object_start;
2167 	sem->scalar = elements_scalar;
2168 	sem->array_element_start = elements_array_element_start;
2169 	sem->array_element_end = elements_array_element_end;
2170 
2171 	state->function_name = funcname;
2172 	state->normalize_results = as_text;
2173 	state->next_scalar = false;
2174 	state->lex = lex;
2175 	state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
2176 										   "json_array_elements temporary cxt",
2177 										   ALLOCSET_DEFAULT_SIZES);
2178 
2179 	pg_parse_json(lex, sem);
2180 
2181 	MemoryContextDelete(state->tmp_cxt);
2182 
2183 	rsi->setResult = state->tuple_store;
2184 	rsi->setDesc = state->ret_tdesc;
2185 
2186 	PG_RETURN_NULL();
2187 }
2188 
2189 static void
elements_array_element_start(void * state,bool isnull)2190 elements_array_element_start(void *state, bool isnull)
2191 {
2192 	ElementsState *_state = (ElementsState *) state;
2193 
2194 	/* save a pointer to where the value starts */
2195 	if (_state->lex->lex_level == 1)
2196 	{
2197 		/*
2198 		 * next_scalar will be reset in the array_element_end handler, and
2199 		 * since we know the value is a scalar there is no danger of it being
2200 		 * on while recursing down the tree.
2201 		 */
2202 		if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
2203 			_state->next_scalar = true;
2204 		else
2205 			_state->result_start = _state->lex->token_start;
2206 	}
2207 }
2208 
2209 static void
elements_array_element_end(void * state,bool isnull)2210 elements_array_element_end(void *state, bool isnull)
2211 {
2212 	ElementsState *_state = (ElementsState *) state;
2213 	MemoryContext old_cxt;
2214 	int			len;
2215 	text	   *val;
2216 	HeapTuple	tuple;
2217 	Datum		values[1];
2218 	bool		nulls[1] = {false};
2219 
2220 	/* skip over nested objects */
2221 	if (_state->lex->lex_level != 1)
2222 		return;
2223 
2224 	/* use the tmp context so we can clean up after each tuple is done */
2225 	old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
2226 
2227 	if (isnull && _state->normalize_results)
2228 	{
2229 		nulls[0] = true;
2230 		values[0] = (Datum) NULL;
2231 	}
2232 	else if (_state->next_scalar)
2233 	{
2234 		values[0] = CStringGetTextDatum(_state->normalized_scalar);
2235 		_state->next_scalar = false;
2236 	}
2237 	else
2238 	{
2239 		len = _state->lex->prev_token_terminator - _state->result_start;
2240 		val = cstring_to_text_with_len(_state->result_start, len);
2241 		values[0] = PointerGetDatum(val);
2242 	}
2243 
2244 	tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
2245 
2246 	tuplestore_puttuple(_state->tuple_store, tuple);
2247 
2248 	/* clean up and switch back */
2249 	MemoryContextSwitchTo(old_cxt);
2250 	MemoryContextReset(_state->tmp_cxt);
2251 }
2252 
2253 static void
elements_object_start(void * state)2254 elements_object_start(void *state)
2255 {
2256 	ElementsState *_state = (ElementsState *) state;
2257 
2258 	/* json structure check */
2259 	if (_state->lex->lex_level == 0)
2260 		ereport(ERROR,
2261 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2262 				 errmsg("cannot call %s on a non-array",
2263 						_state->function_name)));
2264 }
2265 
2266 static void
elements_scalar(void * state,char * token,JsonTokenType tokentype)2267 elements_scalar(void *state, char *token, JsonTokenType tokentype)
2268 {
2269 	ElementsState *_state = (ElementsState *) state;
2270 
2271 	/* json structure check */
2272 	if (_state->lex->lex_level == 0)
2273 		ereport(ERROR,
2274 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2275 				 errmsg("cannot call %s on a scalar",
2276 						_state->function_name)));
2277 
2278 	/* supply de-escaped value if required */
2279 	if (_state->next_scalar)
2280 		_state->normalized_scalar = token;
2281 }
2282 
2283 /*
2284  * SQL function json_populate_record
2285  *
2286  * set fields in a record from the argument json
2287  *
2288  * Code adapted shamelessly from hstore's populate_record
2289  * which is in turn partly adapted from record_out.
2290  *
2291  * The json is decomposed into a hash table, in which each
2292  * field in the record is then looked up by name. For jsonb
2293  * we fetch the values direct from the object.
2294  */
2295 Datum
jsonb_populate_record(PG_FUNCTION_ARGS)2296 jsonb_populate_record(PG_FUNCTION_ARGS)
2297 {
2298 	return populate_record_worker(fcinfo, "jsonb_populate_record",
2299 								  false, true);
2300 }
2301 
2302 Datum
jsonb_to_record(PG_FUNCTION_ARGS)2303 jsonb_to_record(PG_FUNCTION_ARGS)
2304 {
2305 	return populate_record_worker(fcinfo, "jsonb_to_record",
2306 								  false, false);
2307 }
2308 
2309 Datum
json_populate_record(PG_FUNCTION_ARGS)2310 json_populate_record(PG_FUNCTION_ARGS)
2311 {
2312 	return populate_record_worker(fcinfo, "json_populate_record",
2313 								  true, true);
2314 }
2315 
2316 Datum
json_to_record(PG_FUNCTION_ARGS)2317 json_to_record(PG_FUNCTION_ARGS)
2318 {
2319 	return populate_record_worker(fcinfo, "json_to_record",
2320 								  true, false);
2321 }
2322 
2323 /* helper function for diagnostics */
2324 static void
populate_array_report_expected_array(PopulateArrayContext * ctx,int ndim)2325 populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim)
2326 {
2327 	if (ndim <= 0)
2328 	{
2329 		if (ctx->colname)
2330 			ereport(ERROR,
2331 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2332 					 errmsg("expected JSON array"),
2333 					 errhint("See the value of key \"%s\".", ctx->colname)));
2334 		else
2335 			ereport(ERROR,
2336 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2337 					 errmsg("expected JSON array")));
2338 	}
2339 	else
2340 	{
2341 		StringInfoData indices;
2342 		int			i;
2343 
2344 		initStringInfo(&indices);
2345 
2346 		Assert(ctx->ndims > 0 && ndim < ctx->ndims);
2347 
2348 		for (i = 0; i < ndim; i++)
2349 			appendStringInfo(&indices, "[%d]", ctx->sizes[i]);
2350 
2351 		if (ctx->colname)
2352 			ereport(ERROR,
2353 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2354 					 errmsg("expected JSON array"),
2355 					 errhint("See the array element %s of key \"%s\".",
2356 							 indices.data, ctx->colname)));
2357 		else
2358 			ereport(ERROR,
2359 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2360 					 errmsg("expected JSON array"),
2361 					 errhint("See the array element %s.",
2362 							 indices.data)));
2363 	}
2364 }
2365 
2366 /* set the number of dimensions of the populated array when it becomes known */
2367 static void
populate_array_assign_ndims(PopulateArrayContext * ctx,int ndims)2368 populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims)
2369 {
2370 	int			i;
2371 
2372 	Assert(ctx->ndims <= 0);
2373 
2374 	if (ndims <= 0)
2375 		populate_array_report_expected_array(ctx, ndims);
2376 
2377 	ctx->ndims = ndims;
2378 	ctx->dims = palloc(sizeof(int) * ndims);
2379 	ctx->sizes = palloc0(sizeof(int) * ndims);
2380 
2381 	for (i = 0; i < ndims; i++)
2382 		ctx->dims[i] = -1;		/* dimensions are unknown yet */
2383 }
2384 
2385 /* check the populated subarray dimension */
2386 static void
populate_array_check_dimension(PopulateArrayContext * ctx,int ndim)2387 populate_array_check_dimension(PopulateArrayContext *ctx, int ndim)
2388 {
2389 	int			dim = ctx->sizes[ndim]; /* current dimension counter */
2390 
2391 	if (ctx->dims[ndim] == -1)
2392 		ctx->dims[ndim] = dim;	/* assign dimension if not yet known */
2393 	else if (ctx->dims[ndim] != dim)
2394 		ereport(ERROR,
2395 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2396 				 errmsg("malformed JSON array"),
2397 				 errdetail("Multidimensional arrays must have "
2398 						   "sub-arrays with matching dimensions.")));
2399 
2400 	/* reset the current array dimension size counter */
2401 	ctx->sizes[ndim] = 0;
2402 
2403 	/* increment the parent dimension counter if it is a nested sub-array */
2404 	if (ndim > 0)
2405 		ctx->sizes[ndim - 1]++;
2406 }
2407 
2408 static void
populate_array_element(PopulateArrayContext * ctx,int ndim,JsValue * jsv)2409 populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv)
2410 {
2411 	Datum		element;
2412 	bool		element_isnull;
2413 
2414 	/* populate the array element */
2415 	element = populate_record_field(ctx->aio->element_info,
2416 									ctx->aio->element_type,
2417 									ctx->aio->element_typmod,
2418 									NULL, ctx->mcxt, PointerGetDatum(NULL),
2419 									jsv, &element_isnull);
2420 
2421 	accumArrayResult(ctx->astate, element, element_isnull,
2422 					 ctx->aio->element_type, ctx->acxt);
2423 
2424 	Assert(ndim > 0);
2425 	ctx->sizes[ndim - 1]++;		/* increment current dimension counter */
2426 }
2427 
2428 /* json object start handler for populate_array_json() */
2429 static void
populate_array_object_start(void * _state)2430 populate_array_object_start(void *_state)
2431 {
2432 	PopulateArrayState *state = (PopulateArrayState *) _state;
2433 	int			ndim = state->lex->lex_level;
2434 
2435 	if (state->ctx->ndims <= 0)
2436 		populate_array_assign_ndims(state->ctx, ndim);
2437 	else if (ndim < state->ctx->ndims)
2438 		populate_array_report_expected_array(state->ctx, ndim);
2439 }
2440 
2441 /* json array end handler for populate_array_json() */
2442 static void
populate_array_array_end(void * _state)2443 populate_array_array_end(void *_state)
2444 {
2445 	PopulateArrayState *state = (PopulateArrayState *) _state;
2446 	PopulateArrayContext *ctx = state->ctx;
2447 	int			ndim = state->lex->lex_level;
2448 
2449 	if (ctx->ndims <= 0)
2450 		populate_array_assign_ndims(ctx, ndim + 1);
2451 
2452 	if (ndim < ctx->ndims)
2453 		populate_array_check_dimension(ctx, ndim);
2454 }
2455 
2456 /* json array element start handler for populate_array_json() */
2457 static void
populate_array_element_start(void * _state,bool isnull)2458 populate_array_element_start(void *_state, bool isnull)
2459 {
2460 	PopulateArrayState *state = (PopulateArrayState *) _state;
2461 	int			ndim = state->lex->lex_level;
2462 
2463 	if (state->ctx->ndims <= 0 || ndim == state->ctx->ndims)
2464 	{
2465 		/* remember current array element start */
2466 		state->element_start = state->lex->token_start;
2467 		state->element_type = state->lex->token_type;
2468 		state->element_scalar = NULL;
2469 	}
2470 }
2471 
2472 /* json array element end handler for populate_array_json() */
2473 static void
populate_array_element_end(void * _state,bool isnull)2474 populate_array_element_end(void *_state, bool isnull)
2475 {
2476 	PopulateArrayState *state = (PopulateArrayState *) _state;
2477 	PopulateArrayContext *ctx = state->ctx;
2478 	int			ndim = state->lex->lex_level;
2479 
2480 	Assert(ctx->ndims > 0);
2481 
2482 	if (ndim == ctx->ndims)
2483 	{
2484 		JsValue		jsv;
2485 
2486 		jsv.is_json = true;
2487 		jsv.val.json.type = state->element_type;
2488 
2489 		if (isnull)
2490 		{
2491 			Assert(jsv.val.json.type == JSON_TOKEN_NULL);
2492 			jsv.val.json.str = NULL;
2493 			jsv.val.json.len = 0;
2494 		}
2495 		else if (state->element_scalar)
2496 		{
2497 			jsv.val.json.str = state->element_scalar;
2498 			jsv.val.json.len = -1;	/* null-terminated */
2499 		}
2500 		else
2501 		{
2502 			jsv.val.json.str = state->element_start;
2503 			jsv.val.json.len = (state->lex->prev_token_terminator -
2504 								state->element_start) * sizeof(char);
2505 		}
2506 
2507 		populate_array_element(ctx, ndim, &jsv);
2508 	}
2509 }
2510 
2511 /* json scalar handler for populate_array_json() */
2512 static void
populate_array_scalar(void * _state,char * token,JsonTokenType tokentype)2513 populate_array_scalar(void *_state, char *token, JsonTokenType tokentype)
2514 {
2515 	PopulateArrayState *state = (PopulateArrayState *) _state;
2516 	PopulateArrayContext *ctx = state->ctx;
2517 	int			ndim = state->lex->lex_level;
2518 
2519 	if (ctx->ndims <= 0)
2520 		populate_array_assign_ndims(ctx, ndim);
2521 	else if (ndim < ctx->ndims)
2522 		populate_array_report_expected_array(ctx, ndim);
2523 
2524 	if (ndim == ctx->ndims)
2525 	{
2526 		/* remember the scalar element token */
2527 		state->element_scalar = token;
2528 		/* element_type must already be set in populate_array_element_start() */
2529 		Assert(state->element_type == tokentype);
2530 	}
2531 }
2532 
2533 /* parse a json array and populate array */
2534 static void
populate_array_json(PopulateArrayContext * ctx,char * json,int len)2535 populate_array_json(PopulateArrayContext *ctx, char *json, int len)
2536 {
2537 	PopulateArrayState state;
2538 	JsonSemAction sem;
2539 
2540 	state.lex = makeJsonLexContextCstringLen(json, len, true);
2541 	state.ctx = ctx;
2542 
2543 	memset(&sem, 0, sizeof(sem));
2544 	sem.semstate = (void *) &state;
2545 	sem.object_start = populate_array_object_start;
2546 	sem.array_end = populate_array_array_end;
2547 	sem.array_element_start = populate_array_element_start;
2548 	sem.array_element_end = populate_array_element_end;
2549 	sem.scalar = populate_array_scalar;
2550 
2551 	pg_parse_json(state.lex, &sem);
2552 
2553 	/* number of dimensions should be already known */
2554 	Assert(ctx->ndims > 0 && ctx->dims);
2555 
2556 	pfree(state.lex);
2557 }
2558 
2559 /*
2560  * populate_array_dim_jsonb() -- Iterate recursively through jsonb sub-array
2561  *		elements and accumulate result using given ArrayBuildState.
2562  */
2563 static void
populate_array_dim_jsonb(PopulateArrayContext * ctx,JsonbValue * jbv,int ndim)2564 populate_array_dim_jsonb(PopulateArrayContext *ctx, /* context */
2565 						 JsonbValue *jbv,	/* jsonb sub-array */
2566 						 int ndim)	/* current dimension */
2567 {
2568 	JsonbContainer *jbc = jbv->val.binary.data;
2569 	JsonbIterator *it;
2570 	JsonbIteratorToken tok;
2571 	JsonbValue	val;
2572 	JsValue		jsv;
2573 
2574 	check_stack_depth();
2575 
2576 	if (jbv->type != jbvBinary || !JsonContainerIsArray(jbc))
2577 		populate_array_report_expected_array(ctx, ndim - 1);
2578 
2579 	Assert(!JsonContainerIsScalar(jbc));
2580 
2581 	it = JsonbIteratorInit(jbc);
2582 
2583 	tok = JsonbIteratorNext(&it, &val, true);
2584 	Assert(tok == WJB_BEGIN_ARRAY);
2585 
2586 	tok = JsonbIteratorNext(&it, &val, true);
2587 
2588 	/*
2589 	 * If the number of dimensions is not yet known and we have found end of
2590 	 * the array, or the first child element is not an array, then assign the
2591 	 * number of dimensions now.
2592 	 */
2593 	if (ctx->ndims <= 0 &&
2594 		(tok == WJB_END_ARRAY ||
2595 		 (tok == WJB_ELEM &&
2596 		  (val.type != jbvBinary ||
2597 		   !JsonContainerIsArray(val.val.binary.data)))))
2598 		populate_array_assign_ndims(ctx, ndim);
2599 
2600 	jsv.is_json = false;
2601 	jsv.val.jsonb = &val;
2602 
2603 	/* process all the array elements */
2604 	while (tok == WJB_ELEM)
2605 	{
2606 		/*
2607 		 * Recurse only if the dimensions of dimensions is still unknown or if
2608 		 * it is not the innermost dimension.
2609 		 */
2610 		if (ctx->ndims > 0 && ndim >= ctx->ndims)
2611 			populate_array_element(ctx, ndim, &jsv);
2612 		else
2613 		{
2614 			/* populate child sub-array */
2615 			populate_array_dim_jsonb(ctx, &val, ndim + 1);
2616 
2617 			/* number of dimensions should be already known */
2618 			Assert(ctx->ndims > 0 && ctx->dims);
2619 
2620 			populate_array_check_dimension(ctx, ndim);
2621 		}
2622 
2623 		tok = JsonbIteratorNext(&it, &val, true);
2624 	}
2625 
2626 	Assert(tok == WJB_END_ARRAY);
2627 
2628 	/* free iterator, iterating until WJB_DONE */
2629 	tok = JsonbIteratorNext(&it, &val, true);
2630 	Assert(tok == WJB_DONE && !it);
2631 }
2632 
2633 /* recursively populate an array from json/jsonb */
2634 static Datum
populate_array(ArrayIOData * aio,const char * colname,MemoryContext mcxt,JsValue * jsv)2635 populate_array(ArrayIOData *aio,
2636 			   const char *colname,
2637 			   MemoryContext mcxt,
2638 			   JsValue *jsv)
2639 {
2640 	PopulateArrayContext ctx;
2641 	Datum		result;
2642 	int		   *lbs;
2643 	int			i;
2644 
2645 	ctx.aio = aio;
2646 	ctx.mcxt = mcxt;
2647 	ctx.acxt = CurrentMemoryContext;
2648 	ctx.astate = initArrayResult(aio->element_type, ctx.acxt, true);
2649 	ctx.colname = colname;
2650 	ctx.ndims = 0;				/* unknown yet */
2651 	ctx.dims = NULL;
2652 	ctx.sizes = NULL;
2653 
2654 	if (jsv->is_json)
2655 		populate_array_json(&ctx, jsv->val.json.str,
2656 							jsv->val.json.len >= 0 ? jsv->val.json.len
2657 							: strlen(jsv->val.json.str));
2658 	else
2659 	{
2660 		populate_array_dim_jsonb(&ctx, jsv->val.jsonb, 1);
2661 		ctx.dims[0] = ctx.sizes[0];
2662 	}
2663 
2664 	Assert(ctx.ndims > 0);
2665 
2666 	lbs = palloc(sizeof(int) * ctx.ndims);
2667 
2668 	for (i = 0; i < ctx.ndims; i++)
2669 		lbs[i] = 1;
2670 
2671 	result = makeMdArrayResult(ctx.astate, ctx.ndims, ctx.dims, lbs,
2672 							   ctx.acxt, true);
2673 
2674 	pfree(ctx.dims);
2675 	pfree(ctx.sizes);
2676 	pfree(lbs);
2677 
2678 	return result;
2679 }
2680 
2681 static void
JsValueToJsObject(JsValue * jsv,JsObject * jso)2682 JsValueToJsObject(JsValue *jsv, JsObject *jso)
2683 {
2684 	jso->is_json = jsv->is_json;
2685 
2686 	if (jsv->is_json)
2687 	{
2688 		/* convert plain-text json into a hash table */
2689 		jso->val.json_hash =
2690 			get_json_object_as_hash(jsv->val.json.str,
2691 									jsv->val.json.len >= 0
2692 									? jsv->val.json.len
2693 									: strlen(jsv->val.json.str),
2694 									"populate_composite");
2695 	}
2696 	else
2697 	{
2698 		JsonbValue *jbv = jsv->val.jsonb;
2699 
2700 		if (jbv->type == jbvBinary &&
2701 			JsonContainerIsObject(jbv->val.binary.data))
2702 		{
2703 			jso->val.jsonb_cont = jbv->val.binary.data;
2704 		}
2705 		else
2706 		{
2707 			bool		is_scalar;
2708 
2709 			is_scalar = IsAJsonbScalar(jbv) ||
2710 				(jbv->type == jbvBinary &&
2711 				 JsonContainerIsScalar(jbv->val.binary.data));
2712 			ereport(ERROR,
2713 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2714 					 is_scalar
2715 					 ? errmsg("cannot call %s on a scalar",
2716 							  "populate_composite")
2717 					 : errmsg("cannot call %s on an array",
2718 							  "populate_composite")));
2719 		}
2720 	}
2721 }
2722 
2723 /* acquire or update cached tuple descriptor for a composite type */
2724 static void
update_cached_tupdesc(CompositeIOData * io,MemoryContext mcxt)2725 update_cached_tupdesc(CompositeIOData *io, MemoryContext mcxt)
2726 {
2727 	if (!io->tupdesc ||
2728 		io->tupdesc->tdtypeid != io->base_typid ||
2729 		io->tupdesc->tdtypmod != io->base_typmod)
2730 	{
2731 		TupleDesc	tupdesc = lookup_rowtype_tupdesc(io->base_typid,
2732 													 io->base_typmod);
2733 		MemoryContext oldcxt;
2734 
2735 		if (io->tupdesc)
2736 			FreeTupleDesc(io->tupdesc);
2737 
2738 		/* copy tuple desc without constraints into cache memory context */
2739 		oldcxt = MemoryContextSwitchTo(mcxt);
2740 		io->tupdesc = CreateTupleDescCopy(tupdesc);
2741 		MemoryContextSwitchTo(oldcxt);
2742 
2743 		ReleaseTupleDesc(tupdesc);
2744 	}
2745 }
2746 
2747 /* recursively populate a composite (row type) value from json/jsonb */
2748 static Datum
populate_composite(CompositeIOData * io,Oid typid,const char * colname,MemoryContext mcxt,HeapTupleHeader defaultval,JsValue * jsv,bool isnull)2749 populate_composite(CompositeIOData *io,
2750 				   Oid typid,
2751 				   const char *colname,
2752 				   MemoryContext mcxt,
2753 				   HeapTupleHeader defaultval,
2754 				   JsValue *jsv,
2755 				   bool isnull)
2756 {
2757 	Datum		result;
2758 
2759 	/* acquire/update cached tuple descriptor */
2760 	update_cached_tupdesc(io, mcxt);
2761 
2762 	if (isnull)
2763 		result = (Datum) 0;
2764 	else
2765 	{
2766 		HeapTupleHeader tuple;
2767 		JsObject	jso;
2768 
2769 		/* prepare input value */
2770 		JsValueToJsObject(jsv, &jso);
2771 
2772 		/* populate resulting record tuple */
2773 		tuple = populate_record(io->tupdesc, &io->record_io,
2774 								defaultval, mcxt, &jso);
2775 		result = HeapTupleHeaderGetDatum(tuple);
2776 
2777 		JsObjectFree(&jso);
2778 	}
2779 
2780 	/*
2781 	 * If it's domain over composite, check domain constraints.  (This should
2782 	 * probably get refactored so that we can see the TYPECAT value, but for
2783 	 * now, we can tell by comparing typid to base_typid.)
2784 	 */
2785 	if (typid != io->base_typid && typid != RECORDOID)
2786 		domain_check(result, isnull, typid, &io->domain_info, mcxt);
2787 
2788 	return result;
2789 }
2790 
2791 /* populate non-null scalar value from json/jsonb value */
2792 static Datum
populate_scalar(ScalarIOData * io,Oid typid,int32 typmod,JsValue * jsv)2793 populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv)
2794 {
2795 	Datum		res;
2796 	char	   *str = NULL;
2797 	char	   *json = NULL;
2798 
2799 	if (jsv->is_json)
2800 	{
2801 		int			len = jsv->val.json.len;
2802 
2803 		json = jsv->val.json.str;
2804 		Assert(json);
2805 		if (len >= 0)
2806 		{
2807 			/* Need to copy non-null-terminated string */
2808 			str = palloc(len + 1 * sizeof(char));
2809 			memcpy(str, json, len);
2810 			str[len] = '\0';
2811 		}
2812 		else
2813 			str = json;			/* string is already null-terminated */
2814 
2815 		/* If converting to json/jsonb, make string into valid JSON literal */
2816 		if ((typid == JSONOID || typid == JSONBOID) &&
2817 			jsv->val.json.type == JSON_TOKEN_STRING)
2818 		{
2819 			StringInfoData buf;
2820 
2821 			initStringInfo(&buf);
2822 			escape_json(&buf, str);
2823 			/* free temporary buffer */
2824 			if (str != json)
2825 				pfree(str);
2826 			str = buf.data;
2827 		}
2828 	}
2829 	else
2830 	{
2831 		JsonbValue *jbv = jsv->val.jsonb;
2832 
2833 		if (typid == JSONBOID)
2834 		{
2835 			Jsonb	   *jsonb = JsonbValueToJsonb(jbv); /* directly use jsonb */
2836 
2837 			return JsonbPGetDatum(jsonb);
2838 		}
2839 		/* convert jsonb to string for typio call */
2840 		else if (typid == JSONOID && jbv->type != jbvBinary)
2841 		{
2842 			/*
2843 			 * Convert scalar jsonb (non-scalars are passed here as jbvBinary)
2844 			 * to json string, preserving quotes around top-level strings.
2845 			 */
2846 			Jsonb	   *jsonb = JsonbValueToJsonb(jbv);
2847 
2848 			str = JsonbToCString(NULL, &jsonb->root, VARSIZE(jsonb));
2849 		}
2850 		else if (jbv->type == jbvString)	/* quotes are stripped */
2851 			str = pnstrdup(jbv->val.string.val, jbv->val.string.len);
2852 		else if (jbv->type == jbvBool)
2853 			str = pstrdup(jbv->val.boolean ? "true" : "false");
2854 		else if (jbv->type == jbvNumeric)
2855 			str = DatumGetCString(DirectFunctionCall1(numeric_out,
2856 													  PointerGetDatum(jbv->val.numeric)));
2857 		else if (jbv->type == jbvBinary)
2858 			str = JsonbToCString(NULL, jbv->val.binary.data,
2859 								 jbv->val.binary.len);
2860 		else
2861 			elog(ERROR, "unrecognized jsonb type: %d", (int) jbv->type);
2862 	}
2863 
2864 	res = InputFunctionCall(&io->typiofunc, str, io->typioparam, typmod);
2865 
2866 	/* free temporary buffer */
2867 	if (str != json)
2868 		pfree(str);
2869 
2870 	return res;
2871 }
2872 
2873 static Datum
populate_domain(DomainIOData * io,Oid typid,const char * colname,MemoryContext mcxt,JsValue * jsv,bool isnull)2874 populate_domain(DomainIOData *io,
2875 				Oid typid,
2876 				const char *colname,
2877 				MemoryContext mcxt,
2878 				JsValue *jsv,
2879 				bool isnull)
2880 {
2881 	Datum		res;
2882 
2883 	if (isnull)
2884 		res = (Datum) 0;
2885 	else
2886 	{
2887 		res = populate_record_field(io->base_io,
2888 									io->base_typid, io->base_typmod,
2889 									colname, mcxt, PointerGetDatum(NULL),
2890 									jsv, &isnull);
2891 		Assert(!isnull);
2892 	}
2893 
2894 	domain_check(res, isnull, typid, &io->domain_info, mcxt);
2895 
2896 	return res;
2897 }
2898 
2899 /* prepare column metadata cache for the given type */
2900 static void
prepare_column_cache(ColumnIOData * column,Oid typid,int32 typmod,MemoryContext mcxt,bool need_scalar)2901 prepare_column_cache(ColumnIOData *column,
2902 					 Oid typid,
2903 					 int32 typmod,
2904 					 MemoryContext mcxt,
2905 					 bool need_scalar)
2906 {
2907 	HeapTuple	tup;
2908 	Form_pg_type type;
2909 
2910 	column->typid = typid;
2911 	column->typmod = typmod;
2912 
2913 	tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
2914 	if (!HeapTupleIsValid(tup))
2915 		elog(ERROR, "cache lookup failed for type %u", typid);
2916 
2917 	type = (Form_pg_type) GETSTRUCT(tup);
2918 
2919 	if (type->typtype == TYPTYPE_DOMAIN)
2920 	{
2921 		/*
2922 		 * We can move directly to the bottom base type; domain_check() will
2923 		 * take care of checking all constraints for a stack of domains.
2924 		 */
2925 		Oid			base_typid;
2926 		int32		base_typmod = typmod;
2927 
2928 		base_typid = getBaseTypeAndTypmod(typid, &base_typmod);
2929 		if (get_typtype(base_typid) == TYPTYPE_COMPOSITE)
2930 		{
2931 			/* domain over composite has its own code path */
2932 			column->typcat = TYPECAT_COMPOSITE_DOMAIN;
2933 			column->io.composite.record_io = NULL;
2934 			column->io.composite.tupdesc = NULL;
2935 			column->io.composite.base_typid = base_typid;
2936 			column->io.composite.base_typmod = base_typmod;
2937 			column->io.composite.domain_info = NULL;
2938 		}
2939 		else
2940 		{
2941 			/* domain over anything else */
2942 			column->typcat = TYPECAT_DOMAIN;
2943 			column->io.domain.base_typid = base_typid;
2944 			column->io.domain.base_typmod = base_typmod;
2945 			column->io.domain.base_io =
2946 				MemoryContextAllocZero(mcxt, sizeof(ColumnIOData));
2947 			column->io.domain.domain_info = NULL;
2948 		}
2949 	}
2950 	else if (type->typtype == TYPTYPE_COMPOSITE || typid == RECORDOID)
2951 	{
2952 		column->typcat = TYPECAT_COMPOSITE;
2953 		column->io.composite.record_io = NULL;
2954 		column->io.composite.tupdesc = NULL;
2955 		column->io.composite.base_typid = typid;
2956 		column->io.composite.base_typmod = typmod;
2957 		column->io.composite.domain_info = NULL;
2958 	}
2959 	else if (type->typlen == -1 && OidIsValid(type->typelem))
2960 	{
2961 		column->typcat = TYPECAT_ARRAY;
2962 		column->io.array.element_info = MemoryContextAllocZero(mcxt,
2963 															   sizeof(ColumnIOData));
2964 		column->io.array.element_type = type->typelem;
2965 		/* array element typemod stored in attribute's typmod */
2966 		column->io.array.element_typmod = typmod;
2967 	}
2968 	else
2969 	{
2970 		column->typcat = TYPECAT_SCALAR;
2971 		need_scalar = true;
2972 	}
2973 
2974 	/* caller can force us to look up scalar_io info even for non-scalars */
2975 	if (need_scalar)
2976 	{
2977 		Oid			typioproc;
2978 
2979 		getTypeInputInfo(typid, &typioproc, &column->scalar_io.typioparam);
2980 		fmgr_info_cxt(typioproc, &column->scalar_io.typiofunc, mcxt);
2981 	}
2982 
2983 	ReleaseSysCache(tup);
2984 }
2985 
2986 /* recursively populate a record field or an array element from a json/jsonb value */
2987 static Datum
populate_record_field(ColumnIOData * col,Oid typid,int32 typmod,const char * colname,MemoryContext mcxt,Datum defaultval,JsValue * jsv,bool * isnull)2988 populate_record_field(ColumnIOData *col,
2989 					  Oid typid,
2990 					  int32 typmod,
2991 					  const char *colname,
2992 					  MemoryContext mcxt,
2993 					  Datum defaultval,
2994 					  JsValue *jsv,
2995 					  bool *isnull)
2996 {
2997 	TypeCat		typcat;
2998 
2999 	check_stack_depth();
3000 
3001 	/*
3002 	 * Prepare column metadata cache for the given type.  Force lookup of the
3003 	 * scalar_io data so that the json string hack below will work.
3004 	 */
3005 	if (col->typid != typid || col->typmod != typmod)
3006 		prepare_column_cache(col, typid, typmod, mcxt, true);
3007 
3008 	*isnull = JsValueIsNull(jsv);
3009 
3010 	typcat = col->typcat;
3011 
3012 	/* try to convert json string to a non-scalar type through input function */
3013 	if (JsValueIsString(jsv) &&
3014 		(typcat == TYPECAT_ARRAY ||
3015 		 typcat == TYPECAT_COMPOSITE ||
3016 		 typcat == TYPECAT_COMPOSITE_DOMAIN))
3017 		typcat = TYPECAT_SCALAR;
3018 
3019 	/* we must perform domain checks for NULLs, otherwise exit immediately */
3020 	if (*isnull &&
3021 		typcat != TYPECAT_DOMAIN &&
3022 		typcat != TYPECAT_COMPOSITE_DOMAIN)
3023 		return (Datum) 0;
3024 
3025 	switch (typcat)
3026 	{
3027 		case TYPECAT_SCALAR:
3028 			return populate_scalar(&col->scalar_io, typid, typmod, jsv);
3029 
3030 		case TYPECAT_ARRAY:
3031 			return populate_array(&col->io.array, colname, mcxt, jsv);
3032 
3033 		case TYPECAT_COMPOSITE:
3034 		case TYPECAT_COMPOSITE_DOMAIN:
3035 			return populate_composite(&col->io.composite, typid,
3036 									  colname, mcxt,
3037 									  DatumGetPointer(defaultval)
3038 									  ? DatumGetHeapTupleHeader(defaultval)
3039 									  : NULL,
3040 									  jsv, *isnull);
3041 
3042 		case TYPECAT_DOMAIN:
3043 			return populate_domain(&col->io.domain, typid, colname, mcxt,
3044 								   jsv, *isnull);
3045 
3046 		default:
3047 			elog(ERROR, "unrecognized type category '%c'", typcat);
3048 			return (Datum) 0;
3049 	}
3050 }
3051 
3052 static RecordIOData *
allocate_record_info(MemoryContext mcxt,int ncolumns)3053 allocate_record_info(MemoryContext mcxt, int ncolumns)
3054 {
3055 	RecordIOData *data = (RecordIOData *)
3056 	MemoryContextAlloc(mcxt,
3057 					   offsetof(RecordIOData, columns) +
3058 					   ncolumns * sizeof(ColumnIOData));
3059 
3060 	data->record_type = InvalidOid;
3061 	data->record_typmod = 0;
3062 	data->ncolumns = ncolumns;
3063 	MemSet(data->columns, 0, sizeof(ColumnIOData) * ncolumns);
3064 
3065 	return data;
3066 }
3067 
3068 static bool
JsObjectGetField(JsObject * obj,char * field,JsValue * jsv)3069 JsObjectGetField(JsObject *obj, char *field, JsValue *jsv)
3070 {
3071 	jsv->is_json = obj->is_json;
3072 
3073 	if (jsv->is_json)
3074 	{
3075 		JsonHashEntry *hashentry = hash_search(obj->val.json_hash, field,
3076 											   HASH_FIND, NULL);
3077 
3078 		jsv->val.json.type = hashentry ? hashentry->type : JSON_TOKEN_NULL;
3079 		jsv->val.json.str = jsv->val.json.type == JSON_TOKEN_NULL ? NULL :
3080 			hashentry->val;
3081 		jsv->val.json.len = jsv->val.json.str ? -1 : 0; /* null-terminated */
3082 
3083 		return hashentry != NULL;
3084 	}
3085 	else
3086 	{
3087 		jsv->val.jsonb = !obj->val.jsonb_cont ? NULL :
3088 			findJsonbValueFromContainerLen(obj->val.jsonb_cont, JB_FOBJECT,
3089 										   field, strlen(field));
3090 
3091 		return jsv->val.jsonb != NULL;
3092 	}
3093 }
3094 
3095 /* populate a record tuple from json/jsonb value */
3096 static HeapTupleHeader
populate_record(TupleDesc tupdesc,RecordIOData ** record_p,HeapTupleHeader defaultval,MemoryContext mcxt,JsObject * obj)3097 populate_record(TupleDesc tupdesc,
3098 				RecordIOData **record_p,
3099 				HeapTupleHeader defaultval,
3100 				MemoryContext mcxt,
3101 				JsObject *obj)
3102 {
3103 	RecordIOData *record = *record_p;
3104 	Datum	   *values;
3105 	bool	   *nulls;
3106 	HeapTuple	res;
3107 	int			ncolumns = tupdesc->natts;
3108 	int			i;
3109 
3110 	/*
3111 	 * if the input json is empty, we can only skip the rest if we were passed
3112 	 * in a non-null record, since otherwise there may be issues with domain
3113 	 * nulls.
3114 	 */
3115 	if (defaultval && JsObjectIsEmpty(obj))
3116 		return defaultval;
3117 
3118 	/* (re)allocate metadata cache */
3119 	if (record == NULL ||
3120 		record->ncolumns != ncolumns)
3121 		*record_p = record = allocate_record_info(mcxt, ncolumns);
3122 
3123 	/* invalidate metadata cache if the record type has changed */
3124 	if (record->record_type != tupdesc->tdtypeid ||
3125 		record->record_typmod != tupdesc->tdtypmod)
3126 	{
3127 		MemSet(record, 0, offsetof(RecordIOData, columns) +
3128 			   ncolumns * sizeof(ColumnIOData));
3129 		record->record_type = tupdesc->tdtypeid;
3130 		record->record_typmod = tupdesc->tdtypmod;
3131 		record->ncolumns = ncolumns;
3132 	}
3133 
3134 	values = (Datum *) palloc(ncolumns * sizeof(Datum));
3135 	nulls = (bool *) palloc(ncolumns * sizeof(bool));
3136 
3137 	if (defaultval)
3138 	{
3139 		HeapTupleData tuple;
3140 
3141 		/* Build a temporary HeapTuple control structure */
3142 		tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval);
3143 		ItemPointerSetInvalid(&(tuple.t_self));
3144 		tuple.t_tableOid = InvalidOid;
3145 		tuple.t_data = defaultval;
3146 
3147 		/* Break down the tuple into fields */
3148 		heap_deform_tuple(&tuple, tupdesc, values, nulls);
3149 	}
3150 	else
3151 	{
3152 		for (i = 0; i < ncolumns; ++i)
3153 		{
3154 			values[i] = (Datum) 0;
3155 			nulls[i] = true;
3156 		}
3157 	}
3158 
3159 	for (i = 0; i < ncolumns; ++i)
3160 	{
3161 		Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3162 		char	   *colname = NameStr(att->attname);
3163 		JsValue		field = {0};
3164 		bool		found;
3165 
3166 		/* Ignore dropped columns in datatype */
3167 		if (att->attisdropped)
3168 		{
3169 			nulls[i] = true;
3170 			continue;
3171 		}
3172 
3173 		found = JsObjectGetField(obj, colname, &field);
3174 
3175 		/*
3176 		 * we can't just skip here if the key wasn't found since we might have
3177 		 * a domain to deal with. If we were passed in a non-null record
3178 		 * datum, we assume that the existing values are valid (if they're
3179 		 * not, then it's not our fault), but if we were passed in a null,
3180 		 * then every field which we don't populate needs to be run through
3181 		 * the input function just in case it's a domain type.
3182 		 */
3183 		if (defaultval && !found)
3184 			continue;
3185 
3186 		values[i] = populate_record_field(&record->columns[i],
3187 										  att->atttypid,
3188 										  att->atttypmod,
3189 										  colname,
3190 										  mcxt,
3191 										  nulls[i] ? (Datum) 0 : values[i],
3192 										  &field,
3193 										  &nulls[i]);
3194 	}
3195 
3196 	res = heap_form_tuple(tupdesc, values, nulls);
3197 
3198 	pfree(values);
3199 	pfree(nulls);
3200 
3201 	return res->t_data;
3202 }
3203 
3204 /*
3205  * Setup for json{b}_populate_record{set}: result type will be same as first
3206  * argument's type --- unless first argument is "null::record", which we can't
3207  * extract type info from; we handle that later.
3208  */
3209 static void
get_record_type_from_argument(FunctionCallInfo fcinfo,const char * funcname,PopulateRecordCache * cache)3210 get_record_type_from_argument(FunctionCallInfo fcinfo,
3211 							  const char *funcname,
3212 							  PopulateRecordCache *cache)
3213 {
3214 	cache->argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
3215 	prepare_column_cache(&cache->c,
3216 						 cache->argtype, -1,
3217 						 cache->fn_mcxt, false);
3218 	if (cache->c.typcat != TYPECAT_COMPOSITE &&
3219 		cache->c.typcat != TYPECAT_COMPOSITE_DOMAIN)
3220 		ereport(ERROR,
3221 				(errcode(ERRCODE_DATATYPE_MISMATCH),
3222 		/* translator: %s is a function name, eg json_to_record */
3223 				 errmsg("first argument of %s must be a row type",
3224 						funcname)));
3225 }
3226 
3227 /*
3228  * Setup for json{b}_to_record{set}: result type is specified by calling
3229  * query.  We'll also use this code for json{b}_populate_record{set},
3230  * if we discover that the first argument is a null of type RECORD.
3231  *
3232  * Here it is syntactically impossible to specify the target type
3233  * as domain-over-composite.
3234  */
3235 static void
get_record_type_from_query(FunctionCallInfo fcinfo,const char * funcname,PopulateRecordCache * cache)3236 get_record_type_from_query(FunctionCallInfo fcinfo,
3237 						   const char *funcname,
3238 						   PopulateRecordCache *cache)
3239 {
3240 	TupleDesc	tupdesc;
3241 	MemoryContext old_cxt;
3242 
3243 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
3244 		ereport(ERROR,
3245 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3246 		/* translator: %s is a function name, eg json_to_record */
3247 				 errmsg("could not determine row type for result of %s",
3248 						funcname),
3249 				 errhint("Provide a non-null record argument, "
3250 						 "or call the function in the FROM clause "
3251 						 "using a column definition list.")));
3252 
3253 	Assert(tupdesc);
3254 	cache->argtype = tupdesc->tdtypeid;
3255 
3256 	/* If we go through this more than once, avoid memory leak */
3257 	if (cache->c.io.composite.tupdesc)
3258 		FreeTupleDesc(cache->c.io.composite.tupdesc);
3259 
3260 	/* Save identified tupdesc */
3261 	old_cxt = MemoryContextSwitchTo(cache->fn_mcxt);
3262 	cache->c.io.composite.tupdesc = CreateTupleDescCopy(tupdesc);
3263 	cache->c.io.composite.base_typid = tupdesc->tdtypeid;
3264 	cache->c.io.composite.base_typmod = tupdesc->tdtypmod;
3265 	MemoryContextSwitchTo(old_cxt);
3266 }
3267 
3268 /*
3269  * common worker for json{b}_populate_record() and json{b}_to_record()
3270  * is_json and have_record_arg identify the specific function
3271  */
3272 static Datum
populate_record_worker(FunctionCallInfo fcinfo,const char * funcname,bool is_json,bool have_record_arg)3273 populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
3274 					   bool is_json, bool have_record_arg)
3275 {
3276 	int			json_arg_num = have_record_arg ? 1 : 0;
3277 	JsValue		jsv = {0};
3278 	HeapTupleHeader rec;
3279 	Datum		rettuple;
3280 	JsonbValue	jbv;
3281 	MemoryContext fnmcxt = fcinfo->flinfo->fn_mcxt;
3282 	PopulateRecordCache *cache = fcinfo->flinfo->fn_extra;
3283 
3284 	/*
3285 	 * If first time through, identify input/result record type.  Note that
3286 	 * this stanza looks only at fcinfo context, which can't change during the
3287 	 * query; so we may not be able to fully resolve a RECORD input type yet.
3288 	 */
3289 	if (!cache)
3290 	{
3291 		fcinfo->flinfo->fn_extra = cache =
3292 			MemoryContextAllocZero(fnmcxt, sizeof(*cache));
3293 		cache->fn_mcxt = fnmcxt;
3294 
3295 		if (have_record_arg)
3296 			get_record_type_from_argument(fcinfo, funcname, cache);
3297 		else
3298 			get_record_type_from_query(fcinfo, funcname, cache);
3299 	}
3300 
3301 	/* Collect record arg if we have one */
3302 	if (!have_record_arg)
3303 		rec = NULL;				/* it's json{b}_to_record() */
3304 	else if (!PG_ARGISNULL(0))
3305 	{
3306 		rec = PG_GETARG_HEAPTUPLEHEADER(0);
3307 
3308 		/*
3309 		 * When declared arg type is RECORD, identify actual record type from
3310 		 * the tuple itself.
3311 		 */
3312 		if (cache->argtype == RECORDOID)
3313 		{
3314 			cache->c.io.composite.base_typid = HeapTupleHeaderGetTypeId(rec);
3315 			cache->c.io.composite.base_typmod = HeapTupleHeaderGetTypMod(rec);
3316 		}
3317 	}
3318 	else
3319 	{
3320 		rec = NULL;
3321 
3322 		/*
3323 		 * When declared arg type is RECORD, identify actual record type from
3324 		 * calling query, or fail if we can't.
3325 		 */
3326 		if (cache->argtype == RECORDOID)
3327 		{
3328 			get_record_type_from_query(fcinfo, funcname, cache);
3329 			/* This can't change argtype, which is important for next time */
3330 			Assert(cache->argtype == RECORDOID);
3331 		}
3332 	}
3333 
3334 	/* If no JSON argument, just return the record (if any) unchanged */
3335 	if (PG_ARGISNULL(json_arg_num))
3336 	{
3337 		if (rec)
3338 			PG_RETURN_POINTER(rec);
3339 		else
3340 			PG_RETURN_NULL();
3341 	}
3342 
3343 	jsv.is_json = is_json;
3344 
3345 	if (is_json)
3346 	{
3347 		text	   *json = PG_GETARG_TEXT_PP(json_arg_num);
3348 
3349 		jsv.val.json.str = VARDATA_ANY(json);
3350 		jsv.val.json.len = VARSIZE_ANY_EXHDR(json);
3351 		jsv.val.json.type = JSON_TOKEN_INVALID; /* not used in
3352 												 * populate_composite() */
3353 	}
3354 	else
3355 	{
3356 		Jsonb	   *jb = PG_GETARG_JSONB_P(json_arg_num);
3357 
3358 		jsv.val.jsonb = &jbv;
3359 
3360 		/* fill binary jsonb value pointing to jb */
3361 		jbv.type = jbvBinary;
3362 		jbv.val.binary.data = &jb->root;
3363 		jbv.val.binary.len = VARSIZE(jb) - VARHDRSZ;
3364 	}
3365 
3366 	rettuple = populate_composite(&cache->c.io.composite, cache->argtype,
3367 								  NULL, fnmcxt, rec, &jsv, false);
3368 
3369 	PG_RETURN_DATUM(rettuple);
3370 }
3371 
3372 /*
3373  * get_json_object_as_hash
3374  *
3375  * decompose a json object into a hash table.
3376  */
3377 static HTAB *
get_json_object_as_hash(char * json,int len,const char * funcname)3378 get_json_object_as_hash(char *json, int len, const char *funcname)
3379 {
3380 	HASHCTL		ctl;
3381 	HTAB	   *tab;
3382 	JHashState *state;
3383 	JsonLexContext *lex = makeJsonLexContextCstringLen(json, len, true);
3384 	JsonSemAction *sem;
3385 
3386 	memset(&ctl, 0, sizeof(ctl));
3387 	ctl.keysize = NAMEDATALEN;
3388 	ctl.entrysize = sizeof(JsonHashEntry);
3389 	ctl.hcxt = CurrentMemoryContext;
3390 	tab = hash_create("json object hashtable",
3391 					  100,
3392 					  &ctl,
3393 					  HASH_ELEM | HASH_CONTEXT);
3394 
3395 	state = palloc0(sizeof(JHashState));
3396 	sem = palloc0(sizeof(JsonSemAction));
3397 
3398 	state->function_name = funcname;
3399 	state->hash = tab;
3400 	state->lex = lex;
3401 
3402 	sem->semstate = (void *) state;
3403 	sem->array_start = hash_array_start;
3404 	sem->scalar = hash_scalar;
3405 	sem->object_field_start = hash_object_field_start;
3406 	sem->object_field_end = hash_object_field_end;
3407 
3408 	pg_parse_json(lex, sem);
3409 
3410 	return tab;
3411 }
3412 
3413 static void
hash_object_field_start(void * state,char * fname,bool isnull)3414 hash_object_field_start(void *state, char *fname, bool isnull)
3415 {
3416 	JHashState *_state = (JHashState *) state;
3417 
3418 	if (_state->lex->lex_level > 1)
3419 		return;
3420 
3421 	/* remember token type */
3422 	_state->saved_token_type = _state->lex->token_type;
3423 
3424 	if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
3425 		_state->lex->token_type == JSON_TOKEN_OBJECT_START)
3426 	{
3427 		/* remember start position of the whole text of the subobject */
3428 		_state->save_json_start = _state->lex->token_start;
3429 	}
3430 	else
3431 	{
3432 		/* must be a scalar */
3433 		_state->save_json_start = NULL;
3434 	}
3435 }
3436 
3437 static void
hash_object_field_end(void * state,char * fname,bool isnull)3438 hash_object_field_end(void *state, char *fname, bool isnull)
3439 {
3440 	JHashState *_state = (JHashState *) state;
3441 	JsonHashEntry *hashentry;
3442 	bool		found;
3443 
3444 	/*
3445 	 * Ignore nested fields.
3446 	 */
3447 	if (_state->lex->lex_level > 1)
3448 		return;
3449 
3450 	/*
3451 	 * Ignore field names >= NAMEDATALEN - they can't match a record field.
3452 	 * (Note: without this test, the hash code would truncate the string at
3453 	 * NAMEDATALEN-1, and could then match against a similarly-truncated
3454 	 * record field name.  That would be a reasonable behavior, but this code
3455 	 * has previously insisted on exact equality, so we keep this behavior.)
3456 	 */
3457 	if (strlen(fname) >= NAMEDATALEN)
3458 		return;
3459 
3460 	hashentry = hash_search(_state->hash, fname, HASH_ENTER, &found);
3461 
3462 	/*
3463 	 * found being true indicates a duplicate. We don't do anything about
3464 	 * that, a later field with the same name overrides the earlier field.
3465 	 */
3466 
3467 	hashentry->type = _state->saved_token_type;
3468 	Assert(isnull == (hashentry->type == JSON_TOKEN_NULL));
3469 
3470 	if (_state->save_json_start != NULL)
3471 	{
3472 		int			len = _state->lex->prev_token_terminator - _state->save_json_start;
3473 		char	   *val = palloc((len + 1) * sizeof(char));
3474 
3475 		memcpy(val, _state->save_json_start, len);
3476 		val[len] = '\0';
3477 		hashentry->val = val;
3478 	}
3479 	else
3480 	{
3481 		/* must have had a scalar instead */
3482 		hashentry->val = _state->saved_scalar;
3483 	}
3484 }
3485 
3486 static void
hash_array_start(void * state)3487 hash_array_start(void *state)
3488 {
3489 	JHashState *_state = (JHashState *) state;
3490 
3491 	if (_state->lex->lex_level == 0)
3492 		ereport(ERROR,
3493 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3494 				 errmsg("cannot call %s on an array", _state->function_name)));
3495 }
3496 
3497 static void
hash_scalar(void * state,char * token,JsonTokenType tokentype)3498 hash_scalar(void *state, char *token, JsonTokenType tokentype)
3499 {
3500 	JHashState *_state = (JHashState *) state;
3501 
3502 	if (_state->lex->lex_level == 0)
3503 		ereport(ERROR,
3504 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3505 				 errmsg("cannot call %s on a scalar", _state->function_name)));
3506 
3507 	if (_state->lex->lex_level == 1)
3508 	{
3509 		_state->saved_scalar = token;
3510 		/* saved_token_type must already be set in hash_object_field_start() */
3511 		Assert(_state->saved_token_type == tokentype);
3512 	}
3513 }
3514 
3515 
3516 /*
3517  * SQL function json_populate_recordset
3518  *
3519  * set fields in a set of records from the argument json,
3520  * which must be an array of objects.
3521  *
3522  * similar to json_populate_record, but the tuple-building code
3523  * is pushed down into the semantic action handlers so it's done
3524  * per object in the array.
3525  */
3526 Datum
jsonb_populate_recordset(PG_FUNCTION_ARGS)3527 jsonb_populate_recordset(PG_FUNCTION_ARGS)
3528 {
3529 	return populate_recordset_worker(fcinfo, "jsonb_populate_recordset",
3530 									 false, true);
3531 }
3532 
3533 Datum
jsonb_to_recordset(PG_FUNCTION_ARGS)3534 jsonb_to_recordset(PG_FUNCTION_ARGS)
3535 {
3536 	return populate_recordset_worker(fcinfo, "jsonb_to_recordset",
3537 									 false, false);
3538 }
3539 
3540 Datum
json_populate_recordset(PG_FUNCTION_ARGS)3541 json_populate_recordset(PG_FUNCTION_ARGS)
3542 {
3543 	return populate_recordset_worker(fcinfo, "json_populate_recordset",
3544 									 true, true);
3545 }
3546 
3547 Datum
json_to_recordset(PG_FUNCTION_ARGS)3548 json_to_recordset(PG_FUNCTION_ARGS)
3549 {
3550 	return populate_recordset_worker(fcinfo, "json_to_recordset",
3551 									 true, false);
3552 }
3553 
3554 static void
populate_recordset_record(PopulateRecordsetState * state,JsObject * obj)3555 populate_recordset_record(PopulateRecordsetState *state, JsObject *obj)
3556 {
3557 	PopulateRecordCache *cache = state->cache;
3558 	HeapTupleHeader tuphead;
3559 	HeapTupleData tuple;
3560 
3561 	/* acquire/update cached tuple descriptor */
3562 	update_cached_tupdesc(&cache->c.io.composite, cache->fn_mcxt);
3563 
3564 	/* replace record fields from json */
3565 	tuphead = populate_record(cache->c.io.composite.tupdesc,
3566 							  &cache->c.io.composite.record_io,
3567 							  state->rec,
3568 							  cache->fn_mcxt,
3569 							  obj);
3570 
3571 	/* if it's domain over composite, check domain constraints */
3572 	if (cache->c.typcat == TYPECAT_COMPOSITE_DOMAIN)
3573 		domain_check(HeapTupleHeaderGetDatum(tuphead), false,
3574 					 cache->argtype,
3575 					 &cache->c.io.composite.domain_info,
3576 					 cache->fn_mcxt);
3577 
3578 	/* ok, save into tuplestore */
3579 	tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead);
3580 	ItemPointerSetInvalid(&(tuple.t_self));
3581 	tuple.t_tableOid = InvalidOid;
3582 	tuple.t_data = tuphead;
3583 
3584 	tuplestore_puttuple(state->tuple_store, &tuple);
3585 }
3586 
3587 /*
3588  * common worker for json{b}_populate_recordset() and json{b}_to_recordset()
3589  * is_json and have_record_arg identify the specific function
3590  */
3591 static Datum
populate_recordset_worker(FunctionCallInfo fcinfo,const char * funcname,bool is_json,bool have_record_arg)3592 populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
3593 						  bool is_json, bool have_record_arg)
3594 {
3595 	int			json_arg_num = have_record_arg ? 1 : 0;
3596 	ReturnSetInfo *rsi;
3597 	MemoryContext old_cxt;
3598 	HeapTupleHeader rec;
3599 	PopulateRecordCache *cache = fcinfo->flinfo->fn_extra;
3600 	PopulateRecordsetState *state;
3601 
3602 	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
3603 
3604 	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
3605 		(rsi->allowedModes & SFRM_Materialize) == 0)
3606 		ereport(ERROR,
3607 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3608 				 errmsg("set-valued function called in context that "
3609 						"cannot accept a set")));
3610 
3611 	rsi->returnMode = SFRM_Materialize;
3612 
3613 	/*
3614 	 * If first time through, identify input/result record type.  Note that
3615 	 * this stanza looks only at fcinfo context, which can't change during the
3616 	 * query; so we may not be able to fully resolve a RECORD input type yet.
3617 	 */
3618 	if (!cache)
3619 	{
3620 		fcinfo->flinfo->fn_extra = cache =
3621 			MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, sizeof(*cache));
3622 		cache->fn_mcxt = fcinfo->flinfo->fn_mcxt;
3623 
3624 		if (have_record_arg)
3625 			get_record_type_from_argument(fcinfo, funcname, cache);
3626 		else
3627 			get_record_type_from_query(fcinfo, funcname, cache);
3628 	}
3629 
3630 	/* Collect record arg if we have one */
3631 	if (!have_record_arg)
3632 		rec = NULL;				/* it's json{b}_to_recordset() */
3633 	else if (!PG_ARGISNULL(0))
3634 	{
3635 		rec = PG_GETARG_HEAPTUPLEHEADER(0);
3636 
3637 		/*
3638 		 * When declared arg type is RECORD, identify actual record type from
3639 		 * the tuple itself.
3640 		 */
3641 		if (cache->argtype == RECORDOID)
3642 		{
3643 			cache->c.io.composite.base_typid = HeapTupleHeaderGetTypeId(rec);
3644 			cache->c.io.composite.base_typmod = HeapTupleHeaderGetTypMod(rec);
3645 		}
3646 	}
3647 	else
3648 	{
3649 		rec = NULL;
3650 
3651 		/*
3652 		 * When declared arg type is RECORD, identify actual record type from
3653 		 * calling query, or fail if we can't.
3654 		 */
3655 		if (cache->argtype == RECORDOID)
3656 		{
3657 			get_record_type_from_query(fcinfo, funcname, cache);
3658 			/* This can't change argtype, which is important for next time */
3659 			Assert(cache->argtype == RECORDOID);
3660 		}
3661 	}
3662 
3663 	/* if the json is null send back an empty set */
3664 	if (PG_ARGISNULL(json_arg_num))
3665 		PG_RETURN_NULL();
3666 
3667 	/*
3668 	 * Forcibly update the cached tupdesc, to ensure we have the right tupdesc
3669 	 * to return even if the JSON contains no rows.
3670 	 */
3671 	update_cached_tupdesc(&cache->c.io.composite, cache->fn_mcxt);
3672 
3673 	state = palloc0(sizeof(PopulateRecordsetState));
3674 
3675 	/* make tuplestore in a sufficiently long-lived memory context */
3676 	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
3677 	state->tuple_store = tuplestore_begin_heap(rsi->allowedModes &
3678 											   SFRM_Materialize_Random,
3679 											   false, work_mem);
3680 	MemoryContextSwitchTo(old_cxt);
3681 
3682 	state->function_name = funcname;
3683 	state->cache = cache;
3684 	state->rec = rec;
3685 
3686 	if (is_json)
3687 	{
3688 		text	   *json = PG_GETARG_TEXT_PP(json_arg_num);
3689 		JsonLexContext *lex;
3690 		JsonSemAction *sem;
3691 
3692 		sem = palloc0(sizeof(JsonSemAction));
3693 
3694 		lex = makeJsonLexContext(json, true);
3695 
3696 		sem->semstate = (void *) state;
3697 		sem->array_start = populate_recordset_array_start;
3698 		sem->array_element_start = populate_recordset_array_element_start;
3699 		sem->scalar = populate_recordset_scalar;
3700 		sem->object_field_start = populate_recordset_object_field_start;
3701 		sem->object_field_end = populate_recordset_object_field_end;
3702 		sem->object_start = populate_recordset_object_start;
3703 		sem->object_end = populate_recordset_object_end;
3704 
3705 		state->lex = lex;
3706 
3707 		pg_parse_json(lex, sem);
3708 	}
3709 	else
3710 	{
3711 		Jsonb	   *jb = PG_GETARG_JSONB_P(json_arg_num);
3712 		JsonbIterator *it;
3713 		JsonbValue	v;
3714 		bool		skipNested = false;
3715 		JsonbIteratorToken r;
3716 
3717 		if (JB_ROOT_IS_SCALAR(jb) || !JB_ROOT_IS_ARRAY(jb))
3718 			ereport(ERROR,
3719 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3720 					 errmsg("cannot call %s on a non-array",
3721 							funcname)));
3722 
3723 		it = JsonbIteratorInit(&jb->root);
3724 
3725 		while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
3726 		{
3727 			skipNested = true;
3728 
3729 			if (r == WJB_ELEM)
3730 			{
3731 				JsObject	obj;
3732 
3733 				if (v.type != jbvBinary ||
3734 					!JsonContainerIsObject(v.val.binary.data))
3735 					ereport(ERROR,
3736 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3737 							 errmsg("argument of %s must be an array of objects",
3738 									funcname)));
3739 
3740 				obj.is_json = false;
3741 				obj.val.jsonb_cont = v.val.binary.data;
3742 
3743 				populate_recordset_record(state, &obj);
3744 			}
3745 		}
3746 	}
3747 
3748 	/*
3749 	 * Note: we must copy the cached tupdesc because the executor will free
3750 	 * the passed-back setDesc, but we want to hang onto the cache in case
3751 	 * we're called again in the same query.
3752 	 */
3753 	rsi->setResult = state->tuple_store;
3754 	rsi->setDesc = CreateTupleDescCopy(cache->c.io.composite.tupdesc);
3755 
3756 	PG_RETURN_NULL();
3757 }
3758 
3759 static void
populate_recordset_object_start(void * state)3760 populate_recordset_object_start(void *state)
3761 {
3762 	PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
3763 	int			lex_level = _state->lex->lex_level;
3764 	HASHCTL		ctl;
3765 
3766 	/* Reject object at top level: we must have an array at level 0 */
3767 	if (lex_level == 0)
3768 		ereport(ERROR,
3769 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3770 				 errmsg("cannot call %s on an object",
3771 						_state->function_name)));
3772 
3773 	/* Nested objects require no special processing */
3774 	if (lex_level > 1)
3775 		return;
3776 
3777 	/* Object at level 1: set up a new hash table for this object */
3778 	memset(&ctl, 0, sizeof(ctl));
3779 	ctl.keysize = NAMEDATALEN;
3780 	ctl.entrysize = sizeof(JsonHashEntry);
3781 	ctl.hcxt = CurrentMemoryContext;
3782 	_state->json_hash = hash_create("json object hashtable",
3783 									100,
3784 									&ctl,
3785 									HASH_ELEM | HASH_CONTEXT);
3786 }
3787 
3788 static void
populate_recordset_object_end(void * state)3789 populate_recordset_object_end(void *state)
3790 {
3791 	PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
3792 	JsObject	obj;
3793 
3794 	/* Nested objects require no special processing */
3795 	if (_state->lex->lex_level > 1)
3796 		return;
3797 
3798 	obj.is_json = true;
3799 	obj.val.json_hash = _state->json_hash;
3800 
3801 	/* Otherwise, construct and return a tuple based on this level-1 object */
3802 	populate_recordset_record(_state, &obj);
3803 
3804 	/* Done with hash for this object */
3805 	hash_destroy(_state->json_hash);
3806 	_state->json_hash = NULL;
3807 }
3808 
3809 static void
populate_recordset_array_element_start(void * state,bool isnull)3810 populate_recordset_array_element_start(void *state, bool isnull)
3811 {
3812 	PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
3813 
3814 	if (_state->lex->lex_level == 1 &&
3815 		_state->lex->token_type != JSON_TOKEN_OBJECT_START)
3816 		ereport(ERROR,
3817 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3818 				 errmsg("argument of %s must be an array of objects",
3819 						_state->function_name)));
3820 }
3821 
3822 static void
populate_recordset_array_start(void * state)3823 populate_recordset_array_start(void *state)
3824 {
3825 	/* nothing to do */
3826 }
3827 
3828 static void
populate_recordset_scalar(void * state,char * token,JsonTokenType tokentype)3829 populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
3830 {
3831 	PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
3832 
3833 	if (_state->lex->lex_level == 0)
3834 		ereport(ERROR,
3835 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3836 				 errmsg("cannot call %s on a scalar",
3837 						_state->function_name)));
3838 
3839 	if (_state->lex->lex_level == 2)
3840 		_state->saved_scalar = token;
3841 }
3842 
3843 static void
populate_recordset_object_field_start(void * state,char * fname,bool isnull)3844 populate_recordset_object_field_start(void *state, char *fname, bool isnull)
3845 {
3846 	PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
3847 
3848 	if (_state->lex->lex_level > 2)
3849 		return;
3850 
3851 	_state->saved_token_type = _state->lex->token_type;
3852 
3853 	if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
3854 		_state->lex->token_type == JSON_TOKEN_OBJECT_START)
3855 	{
3856 		_state->save_json_start = _state->lex->token_start;
3857 	}
3858 	else
3859 	{
3860 		_state->save_json_start = NULL;
3861 	}
3862 }
3863 
3864 static void
populate_recordset_object_field_end(void * state,char * fname,bool isnull)3865 populate_recordset_object_field_end(void *state, char *fname, bool isnull)
3866 {
3867 	PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
3868 	JsonHashEntry *hashentry;
3869 	bool		found;
3870 
3871 	/*
3872 	 * Ignore nested fields.
3873 	 */
3874 	if (_state->lex->lex_level > 2)
3875 		return;
3876 
3877 	/*
3878 	 * Ignore field names >= NAMEDATALEN - they can't match a record field.
3879 	 * (Note: without this test, the hash code would truncate the string at
3880 	 * NAMEDATALEN-1, and could then match against a similarly-truncated
3881 	 * record field name.  That would be a reasonable behavior, but this code
3882 	 * has previously insisted on exact equality, so we keep this behavior.)
3883 	 */
3884 	if (strlen(fname) >= NAMEDATALEN)
3885 		return;
3886 
3887 	hashentry = hash_search(_state->json_hash, fname, HASH_ENTER, &found);
3888 
3889 	/*
3890 	 * found being true indicates a duplicate. We don't do anything about
3891 	 * that, a later field with the same name overrides the earlier field.
3892 	 */
3893 
3894 	hashentry->type = _state->saved_token_type;
3895 	Assert(isnull == (hashentry->type == JSON_TOKEN_NULL));
3896 
3897 	if (_state->save_json_start != NULL)
3898 	{
3899 		int			len = _state->lex->prev_token_terminator - _state->save_json_start;
3900 		char	   *val = palloc((len + 1) * sizeof(char));
3901 
3902 		memcpy(val, _state->save_json_start, len);
3903 		val[len] = '\0';
3904 		hashentry->val = val;
3905 	}
3906 	else
3907 	{
3908 		/* must have had a scalar instead */
3909 		hashentry->val = _state->saved_scalar;
3910 	}
3911 }
3912 
3913 /*
3914  * findJsonbValueFromContainer() wrapper that sets up JsonbValue key string.
3915  */
3916 static JsonbValue *
findJsonbValueFromContainerLen(JsonbContainer * container,uint32 flags,char * key,uint32 keylen)3917 findJsonbValueFromContainerLen(JsonbContainer *container, uint32 flags,
3918 							   char *key, uint32 keylen)
3919 {
3920 	JsonbValue	k;
3921 
3922 	k.type = jbvString;
3923 	k.val.string.val = key;
3924 	k.val.string.len = keylen;
3925 
3926 	return findJsonbValueFromContainer(container, flags, &k);
3927 }
3928 
3929 /*
3930  * Semantic actions for json_strip_nulls.
3931  *
3932  * Simply repeat the input on the output unless we encounter
3933  * a null object field. State for this is set when the field
3934  * is started and reset when the scalar action (which must be next)
3935  * is called.
3936  */
3937 
3938 static void
sn_object_start(void * state)3939 sn_object_start(void *state)
3940 {
3941 	StripnullState *_state = (StripnullState *) state;
3942 
3943 	appendStringInfoCharMacro(_state->strval, '{');
3944 }
3945 
3946 static void
sn_object_end(void * state)3947 sn_object_end(void *state)
3948 {
3949 	StripnullState *_state = (StripnullState *) state;
3950 
3951 	appendStringInfoCharMacro(_state->strval, '}');
3952 }
3953 
3954 static void
sn_array_start(void * state)3955 sn_array_start(void *state)
3956 {
3957 	StripnullState *_state = (StripnullState *) state;
3958 
3959 	appendStringInfoCharMacro(_state->strval, '[');
3960 }
3961 
3962 static void
sn_array_end(void * state)3963 sn_array_end(void *state)
3964 {
3965 	StripnullState *_state = (StripnullState *) state;
3966 
3967 	appendStringInfoCharMacro(_state->strval, ']');
3968 }
3969 
3970 static void
sn_object_field_start(void * state,char * fname,bool isnull)3971 sn_object_field_start(void *state, char *fname, bool isnull)
3972 {
3973 	StripnullState *_state = (StripnullState *) state;
3974 
3975 	if (isnull)
3976 	{
3977 		/*
3978 		 * The next thing must be a scalar or isnull couldn't be true, so
3979 		 * there is no danger of this state being carried down into a nested
3980 		 * object or array. The flag will be reset in the scalar action.
3981 		 */
3982 		_state->skip_next_null = true;
3983 		return;
3984 	}
3985 
3986 	if (_state->strval->data[_state->strval->len - 1] != '{')
3987 		appendStringInfoCharMacro(_state->strval, ',');
3988 
3989 	/*
3990 	 * Unfortunately we don't have the quoted and escaped string any more, so
3991 	 * we have to re-escape it.
3992 	 */
3993 	escape_json(_state->strval, fname);
3994 
3995 	appendStringInfoCharMacro(_state->strval, ':');
3996 }
3997 
3998 static void
sn_array_element_start(void * state,bool isnull)3999 sn_array_element_start(void *state, bool isnull)
4000 {
4001 	StripnullState *_state = (StripnullState *) state;
4002 
4003 	if (_state->strval->data[_state->strval->len - 1] != '[')
4004 		appendStringInfoCharMacro(_state->strval, ',');
4005 }
4006 
4007 static void
sn_scalar(void * state,char * token,JsonTokenType tokentype)4008 sn_scalar(void *state, char *token, JsonTokenType tokentype)
4009 {
4010 	StripnullState *_state = (StripnullState *) state;
4011 
4012 	if (_state->skip_next_null)
4013 	{
4014 		Assert(tokentype == JSON_TOKEN_NULL);
4015 		_state->skip_next_null = false;
4016 		return;
4017 	}
4018 
4019 	if (tokentype == JSON_TOKEN_STRING)
4020 		escape_json(_state->strval, token);
4021 	else
4022 		appendStringInfoString(_state->strval, token);
4023 }
4024 
4025 /*
4026  * SQL function json_strip_nulls(json) -> json
4027  */
4028 Datum
json_strip_nulls(PG_FUNCTION_ARGS)4029 json_strip_nulls(PG_FUNCTION_ARGS)
4030 {
4031 	text	   *json = PG_GETARG_TEXT_PP(0);
4032 	StripnullState *state;
4033 	JsonLexContext *lex;
4034 	JsonSemAction *sem;
4035 
4036 	lex = makeJsonLexContext(json, true);
4037 	state = palloc0(sizeof(StripnullState));
4038 	sem = palloc0(sizeof(JsonSemAction));
4039 
4040 	state->strval = makeStringInfo();
4041 	state->skip_next_null = false;
4042 	state->lex = lex;
4043 
4044 	sem->semstate = (void *) state;
4045 	sem->object_start = sn_object_start;
4046 	sem->object_end = sn_object_end;
4047 	sem->array_start = sn_array_start;
4048 	sem->array_end = sn_array_end;
4049 	sem->scalar = sn_scalar;
4050 	sem->array_element_start = sn_array_element_start;
4051 	sem->object_field_start = sn_object_field_start;
4052 
4053 	pg_parse_json(lex, sem);
4054 
4055 	PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data,
4056 											  state->strval->len));
4057 
4058 }
4059 
4060 /*
4061  * SQL function jsonb_strip_nulls(jsonb) -> jsonb
4062  */
4063 Datum
jsonb_strip_nulls(PG_FUNCTION_ARGS)4064 jsonb_strip_nulls(PG_FUNCTION_ARGS)
4065 {
4066 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
4067 	JsonbIterator *it;
4068 	JsonbParseState *parseState = NULL;
4069 	JsonbValue *res = NULL;
4070 	JsonbValue	v,
4071 				k;
4072 	JsonbIteratorToken type;
4073 	bool		last_was_key = false;
4074 
4075 	if (JB_ROOT_IS_SCALAR(jb))
4076 		PG_RETURN_POINTER(jb);
4077 
4078 	it = JsonbIteratorInit(&jb->root);
4079 
4080 	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
4081 	{
4082 		Assert(!(type == WJB_KEY && last_was_key));
4083 
4084 		if (type == WJB_KEY)
4085 		{
4086 			/* stash the key until we know if it has a null value */
4087 			k = v;
4088 			last_was_key = true;
4089 			continue;
4090 		}
4091 
4092 		if (last_was_key)
4093 		{
4094 			/* if the last element was a key this one can't be */
4095 			last_was_key = false;
4096 
4097 			/* skip this field if value is null */
4098 			if (type == WJB_VALUE && v.type == jbvNull)
4099 				continue;
4100 
4101 			/* otherwise, do a delayed push of the key */
4102 			(void) pushJsonbValue(&parseState, WJB_KEY, &k);
4103 		}
4104 
4105 		if (type == WJB_VALUE || type == WJB_ELEM)
4106 			res = pushJsonbValue(&parseState, type, &v);
4107 		else
4108 			res = pushJsonbValue(&parseState, type, NULL);
4109 	}
4110 
4111 	Assert(res != NULL);
4112 
4113 	PG_RETURN_POINTER(JsonbValueToJsonb(res));
4114 }
4115 
4116 /*
4117  * Add values from the jsonb to the parse state.
4118  *
4119  * If the parse state container is an object, the jsonb is pushed as
4120  * a value, not a key.
4121  *
4122  * This needs to be done using an iterator because pushJsonbValue doesn't
4123  * like getting jbvBinary values, so we can't just push jb as a whole.
4124  */
4125 static void
addJsonbToParseState(JsonbParseState ** jbps,Jsonb * jb)4126 addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb)
4127 {
4128 	JsonbIterator *it;
4129 	JsonbValue *o = &(*jbps)->contVal;
4130 	JsonbValue	v;
4131 	JsonbIteratorToken type;
4132 
4133 	it = JsonbIteratorInit(&jb->root);
4134 
4135 	Assert(o->type == jbvArray || o->type == jbvObject);
4136 
4137 	if (JB_ROOT_IS_SCALAR(jb))
4138 	{
4139 		(void) JsonbIteratorNext(&it, &v, false);	/* skip array header */
4140 		Assert(v.type == jbvArray);
4141 		(void) JsonbIteratorNext(&it, &v, false);	/* fetch scalar value */
4142 
4143 		switch (o->type)
4144 		{
4145 			case jbvArray:
4146 				(void) pushJsonbValue(jbps, WJB_ELEM, &v);
4147 				break;
4148 			case jbvObject:
4149 				(void) pushJsonbValue(jbps, WJB_VALUE, &v);
4150 				break;
4151 			default:
4152 				elog(ERROR, "unexpected parent of nested structure");
4153 		}
4154 	}
4155 	else
4156 	{
4157 		while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
4158 		{
4159 			if (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM)
4160 				(void) pushJsonbValue(jbps, type, &v);
4161 			else
4162 				(void) pushJsonbValue(jbps, type, NULL);
4163 		}
4164 	}
4165 
4166 }
4167 
4168 /*
4169  * SQL function jsonb_pretty (jsonb)
4170  *
4171  * Pretty-printed text for the jsonb
4172  */
4173 Datum
jsonb_pretty(PG_FUNCTION_ARGS)4174 jsonb_pretty(PG_FUNCTION_ARGS)
4175 {
4176 	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
4177 	StringInfo	str = makeStringInfo();
4178 
4179 	JsonbToCStringIndent(str, &jb->root, VARSIZE(jb));
4180 
4181 	PG_RETURN_TEXT_P(cstring_to_text_with_len(str->data, str->len));
4182 }
4183 
4184 /*
4185  * SQL function jsonb_concat (jsonb, jsonb)
4186  *
4187  * function for || operator
4188  */
4189 Datum
jsonb_concat(PG_FUNCTION_ARGS)4190 jsonb_concat(PG_FUNCTION_ARGS)
4191 {
4192 	Jsonb	   *jb1 = PG_GETARG_JSONB_P(0);
4193 	Jsonb	   *jb2 = PG_GETARG_JSONB_P(1);
4194 	JsonbParseState *state = NULL;
4195 	JsonbValue *res;
4196 	JsonbIterator *it1,
4197 			   *it2;
4198 
4199 	/*
4200 	 * If one of the jsonb is empty, just return the other if it's not scalar
4201 	 * and both are of the same kind.  If it's a scalar or they are of
4202 	 * different kinds we need to perform the concatenation even if one is
4203 	 * empty.
4204 	 */
4205 	if (JB_ROOT_IS_OBJECT(jb1) == JB_ROOT_IS_OBJECT(jb2))
4206 	{
4207 		if (JB_ROOT_COUNT(jb1) == 0 && !JB_ROOT_IS_SCALAR(jb2))
4208 			PG_RETURN_JSONB_P(jb2);
4209 		else if (JB_ROOT_COUNT(jb2) == 0 && !JB_ROOT_IS_SCALAR(jb1))
4210 			PG_RETURN_JSONB_P(jb1);
4211 	}
4212 
4213 	it1 = JsonbIteratorInit(&jb1->root);
4214 	it2 = JsonbIteratorInit(&jb2->root);
4215 
4216 	res = IteratorConcat(&it1, &it2, &state);
4217 
4218 	Assert(res != NULL);
4219 
4220 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4221 }
4222 
4223 
4224 /*
4225  * SQL function jsonb_delete (jsonb, text)
4226  *
4227  * return a copy of the jsonb with the indicated item
4228  * removed.
4229  */
4230 Datum
jsonb_delete(PG_FUNCTION_ARGS)4231 jsonb_delete(PG_FUNCTION_ARGS)
4232 {
4233 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
4234 	text	   *key = PG_GETARG_TEXT_PP(1);
4235 	char	   *keyptr = VARDATA_ANY(key);
4236 	int			keylen = VARSIZE_ANY_EXHDR(key);
4237 	JsonbParseState *state = NULL;
4238 	JsonbIterator *it;
4239 	JsonbValue	v,
4240 			   *res = NULL;
4241 	bool		skipNested = false;
4242 	JsonbIteratorToken r;
4243 
4244 	if (JB_ROOT_IS_SCALAR(in))
4245 		ereport(ERROR,
4246 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4247 				 errmsg("cannot delete from scalar")));
4248 
4249 	if (JB_ROOT_COUNT(in) == 0)
4250 		PG_RETURN_JSONB_P(in);
4251 
4252 	it = JsonbIteratorInit(&in->root);
4253 
4254 	while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
4255 	{
4256 		skipNested = true;
4257 
4258 		if ((r == WJB_ELEM || r == WJB_KEY) &&
4259 			(v.type == jbvString && keylen == v.val.string.len &&
4260 			 memcmp(keyptr, v.val.string.val, keylen) == 0))
4261 		{
4262 			/* skip corresponding value as well */
4263 			if (r == WJB_KEY)
4264 				(void) JsonbIteratorNext(&it, &v, true);
4265 
4266 			continue;
4267 		}
4268 
4269 		res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4270 	}
4271 
4272 	Assert(res != NULL);
4273 
4274 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4275 }
4276 
4277 /*
4278  * SQL function jsonb_delete (jsonb, variadic text[])
4279  *
4280  * return a copy of the jsonb with the indicated items
4281  * removed.
4282  */
4283 Datum
jsonb_delete_array(PG_FUNCTION_ARGS)4284 jsonb_delete_array(PG_FUNCTION_ARGS)
4285 {
4286 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
4287 	ArrayType  *keys = PG_GETARG_ARRAYTYPE_P(1);
4288 	Datum	   *keys_elems;
4289 	bool	   *keys_nulls;
4290 	int			keys_len;
4291 	JsonbParseState *state = NULL;
4292 	JsonbIterator *it;
4293 	JsonbValue	v,
4294 			   *res = NULL;
4295 	bool		skipNested = false;
4296 	JsonbIteratorToken r;
4297 
4298 	if (ARR_NDIM(keys) > 1)
4299 		ereport(ERROR,
4300 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
4301 				 errmsg("wrong number of array subscripts")));
4302 
4303 	if (JB_ROOT_IS_SCALAR(in))
4304 		ereport(ERROR,
4305 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4306 				 errmsg("cannot delete from scalar")));
4307 
4308 	if (JB_ROOT_COUNT(in) == 0)
4309 		PG_RETURN_JSONB_P(in);
4310 
4311 	deconstruct_array(keys, TEXTOID, -1, false, 'i',
4312 					  &keys_elems, &keys_nulls, &keys_len);
4313 
4314 	if (keys_len == 0)
4315 		PG_RETURN_JSONB_P(in);
4316 
4317 	it = JsonbIteratorInit(&in->root);
4318 
4319 	while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
4320 	{
4321 		skipNested = true;
4322 
4323 		if ((r == WJB_ELEM || r == WJB_KEY) && v.type == jbvString)
4324 		{
4325 			int			i;
4326 			bool		found = false;
4327 
4328 			for (i = 0; i < keys_len; i++)
4329 			{
4330 				char	   *keyptr;
4331 				int			keylen;
4332 
4333 				if (keys_nulls[i])
4334 					continue;
4335 
4336 				keyptr = VARDATA_ANY(keys_elems[i]);
4337 				keylen = VARSIZE_ANY_EXHDR(keys_elems[i]);
4338 				if (keylen == v.val.string.len &&
4339 					memcmp(keyptr, v.val.string.val, keylen) == 0)
4340 				{
4341 					found = true;
4342 					break;
4343 				}
4344 			}
4345 			if (found)
4346 			{
4347 				/* skip corresponding value as well */
4348 				if (r == WJB_KEY)
4349 					(void) JsonbIteratorNext(&it, &v, true);
4350 
4351 				continue;
4352 			}
4353 		}
4354 
4355 		res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4356 	}
4357 
4358 	Assert(res != NULL);
4359 
4360 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4361 }
4362 
4363 /*
4364  * SQL function jsonb_delete (jsonb, int)
4365  *
4366  * return a copy of the jsonb with the indicated item
4367  * removed. Negative int means count back from the
4368  * end of the items.
4369  */
4370 Datum
jsonb_delete_idx(PG_FUNCTION_ARGS)4371 jsonb_delete_idx(PG_FUNCTION_ARGS)
4372 {
4373 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
4374 	int			idx = PG_GETARG_INT32(1);
4375 	JsonbParseState *state = NULL;
4376 	JsonbIterator *it;
4377 	uint32		i = 0,
4378 				n;
4379 	JsonbValue	v,
4380 			   *res = NULL;
4381 	JsonbIteratorToken r;
4382 
4383 	if (JB_ROOT_IS_SCALAR(in))
4384 		ereport(ERROR,
4385 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4386 				 errmsg("cannot delete from scalar")));
4387 
4388 	if (JB_ROOT_IS_OBJECT(in))
4389 		ereport(ERROR,
4390 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4391 				 errmsg("cannot delete from object using integer index")));
4392 
4393 	if (JB_ROOT_COUNT(in) == 0)
4394 		PG_RETURN_JSONB_P(in);
4395 
4396 	it = JsonbIteratorInit(&in->root);
4397 
4398 	r = JsonbIteratorNext(&it, &v, false);
4399 	Assert(r == WJB_BEGIN_ARRAY);
4400 	n = v.val.array.nElems;
4401 
4402 	if (idx < 0)
4403 	{
4404 		if (-idx > n)
4405 			idx = n;
4406 		else
4407 			idx = n + idx;
4408 	}
4409 
4410 	if (idx >= n)
4411 		PG_RETURN_JSONB_P(in);
4412 
4413 	pushJsonbValue(&state, r, NULL);
4414 
4415 	while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE)
4416 	{
4417 		if (r == WJB_ELEM)
4418 		{
4419 			if (i++ == idx)
4420 				continue;
4421 		}
4422 
4423 		res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4424 	}
4425 
4426 	Assert(res != NULL);
4427 
4428 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4429 }
4430 
4431 /*
4432  * SQL function jsonb_set(jsonb, text[], jsonb, boolean)
4433  *
4434  */
4435 Datum
jsonb_set(PG_FUNCTION_ARGS)4436 jsonb_set(PG_FUNCTION_ARGS)
4437 {
4438 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
4439 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
4440 	Jsonb	   *newval = PG_GETARG_JSONB_P(2);
4441 	bool		create = PG_GETARG_BOOL(3);
4442 	JsonbValue *res = NULL;
4443 	Datum	   *path_elems;
4444 	bool	   *path_nulls;
4445 	int			path_len;
4446 	JsonbIterator *it;
4447 	JsonbParseState *st = NULL;
4448 
4449 	if (ARR_NDIM(path) > 1)
4450 		ereport(ERROR,
4451 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
4452 				 errmsg("wrong number of array subscripts")));
4453 
4454 	if (JB_ROOT_IS_SCALAR(in))
4455 		ereport(ERROR,
4456 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4457 				 errmsg("cannot set path in scalar")));
4458 
4459 	if (JB_ROOT_COUNT(in) == 0 && !create)
4460 		PG_RETURN_JSONB_P(in);
4461 
4462 	deconstruct_array(path, TEXTOID, -1, false, 'i',
4463 					  &path_elems, &path_nulls, &path_len);
4464 
4465 	if (path_len == 0)
4466 		PG_RETURN_JSONB_P(in);
4467 
4468 	it = JsonbIteratorInit(&in->root);
4469 
4470 	res = setPath(&it, path_elems, path_nulls, path_len, &st,
4471 				  0, newval, create ? JB_PATH_CREATE : JB_PATH_REPLACE);
4472 
4473 	Assert(res != NULL);
4474 
4475 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4476 }
4477 
4478 
4479 /*
4480  * SQL function jsonb_delete_path(jsonb, text[])
4481  */
4482 Datum
jsonb_delete_path(PG_FUNCTION_ARGS)4483 jsonb_delete_path(PG_FUNCTION_ARGS)
4484 {
4485 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
4486 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
4487 	JsonbValue *res = NULL;
4488 	Datum	   *path_elems;
4489 	bool	   *path_nulls;
4490 	int			path_len;
4491 	JsonbIterator *it;
4492 	JsonbParseState *st = NULL;
4493 
4494 	if (ARR_NDIM(path) > 1)
4495 		ereport(ERROR,
4496 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
4497 				 errmsg("wrong number of array subscripts")));
4498 
4499 	if (JB_ROOT_IS_SCALAR(in))
4500 		ereport(ERROR,
4501 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4502 				 errmsg("cannot delete path in scalar")));
4503 
4504 	if (JB_ROOT_COUNT(in) == 0)
4505 		PG_RETURN_JSONB_P(in);
4506 
4507 	deconstruct_array(path, TEXTOID, -1, false, 'i',
4508 					  &path_elems, &path_nulls, &path_len);
4509 
4510 	if (path_len == 0)
4511 		PG_RETURN_JSONB_P(in);
4512 
4513 	it = JsonbIteratorInit(&in->root);
4514 
4515 	res = setPath(&it, path_elems, path_nulls, path_len, &st,
4516 				  0, NULL, JB_PATH_DELETE);
4517 
4518 	Assert(res != NULL);
4519 
4520 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4521 }
4522 
4523 /*
4524  * SQL function jsonb_insert(jsonb, text[], jsonb, boolean)
4525  *
4526  */
4527 Datum
jsonb_insert(PG_FUNCTION_ARGS)4528 jsonb_insert(PG_FUNCTION_ARGS)
4529 {
4530 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
4531 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
4532 	Jsonb	   *newval = PG_GETARG_JSONB_P(2);
4533 	bool		after = PG_GETARG_BOOL(3);
4534 	JsonbValue *res = NULL;
4535 	Datum	   *path_elems;
4536 	bool	   *path_nulls;
4537 	int			path_len;
4538 	JsonbIterator *it;
4539 	JsonbParseState *st = NULL;
4540 
4541 	if (ARR_NDIM(path) > 1)
4542 		ereport(ERROR,
4543 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
4544 				 errmsg("wrong number of array subscripts")));
4545 
4546 	if (JB_ROOT_IS_SCALAR(in))
4547 		ereport(ERROR,
4548 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4549 				 errmsg("cannot set path in scalar")));
4550 
4551 	deconstruct_array(path, TEXTOID, -1, false, 'i',
4552 					  &path_elems, &path_nulls, &path_len);
4553 
4554 	if (path_len == 0)
4555 		PG_RETURN_JSONB_P(in);
4556 
4557 	it = JsonbIteratorInit(&in->root);
4558 
4559 	res = setPath(&it, path_elems, path_nulls, path_len, &st, 0, newval,
4560 				  after ? JB_PATH_INSERT_AFTER : JB_PATH_INSERT_BEFORE);
4561 
4562 	Assert(res != NULL);
4563 
4564 	PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
4565 }
4566 
4567 /*
4568  * Iterate over all jsonb objects and merge them into one.
4569  * The logic of this function copied from the same hstore function,
4570  * except the case, when it1 & it2 represents jbvObject.
4571  * In that case we just append the content of it2 to it1 without any
4572  * verifications.
4573  */
4574 static JsonbValue *
IteratorConcat(JsonbIterator ** it1,JsonbIterator ** it2,JsonbParseState ** state)4575 IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
4576 			   JsonbParseState **state)
4577 {
4578 	JsonbValue	v1,
4579 				v2,
4580 			   *res = NULL;
4581 	JsonbIteratorToken r1,
4582 				r2,
4583 				rk1,
4584 				rk2;
4585 
4586 	rk1 = JsonbIteratorNext(it1, &v1, false);
4587 	rk2 = JsonbIteratorNext(it2, &v2, false);
4588 
4589 	/*
4590 	 * JsonbIteratorNext reports raw scalars as if they were single-element
4591 	 * arrays; hence we only need consider "object" and "array" cases here.
4592 	 */
4593 	if (rk1 == WJB_BEGIN_OBJECT && rk2 == WJB_BEGIN_OBJECT)
4594 	{
4595 		/*
4596 		 * Both inputs are objects.
4597 		 *
4598 		 * Append all the tokens from v1 to res, except last WJB_END_OBJECT
4599 		 * (because res will not be finished yet).
4600 		 */
4601 		pushJsonbValue(state, rk1, NULL);
4602 		while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_OBJECT)
4603 			pushJsonbValue(state, r1, &v1);
4604 
4605 		/*
4606 		 * Append all the tokens from v2 to res, including last WJB_END_OBJECT
4607 		 * (the concatenation will be completed).  Any duplicate keys will
4608 		 * automatically override the value from the first object.
4609 		 */
4610 		while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE)
4611 			res = pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL);
4612 	}
4613 	else if (rk1 == WJB_BEGIN_ARRAY && rk2 == WJB_BEGIN_ARRAY)
4614 	{
4615 		/*
4616 		 * Both inputs are arrays.
4617 		 */
4618 		pushJsonbValue(state, rk1, NULL);
4619 
4620 		while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY)
4621 		{
4622 			Assert(r1 == WJB_ELEM);
4623 			pushJsonbValue(state, r1, &v1);
4624 		}
4625 
4626 		while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_END_ARRAY)
4627 		{
4628 			Assert(r2 == WJB_ELEM);
4629 			pushJsonbValue(state, WJB_ELEM, &v2);
4630 		}
4631 
4632 		res = pushJsonbValue(state, WJB_END_ARRAY, NULL /* signal to sort */ );
4633 	}
4634 	else if (rk1 == WJB_BEGIN_OBJECT)
4635 	{
4636 		/*
4637 		 * We have object || array.
4638 		 */
4639 		Assert(rk2 == WJB_BEGIN_ARRAY);
4640 
4641 		pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL);
4642 
4643 		pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL);
4644 		while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_DONE)
4645 			pushJsonbValue(state, r1, r1 != WJB_END_OBJECT ? &v1 : NULL);
4646 
4647 		while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE)
4648 			res = pushJsonbValue(state, r2, r2 != WJB_END_ARRAY ? &v2 : NULL);
4649 	}
4650 	else
4651 	{
4652 		/*
4653 		 * We have array || object.
4654 		 */
4655 		Assert(rk1 == WJB_BEGIN_ARRAY);
4656 		Assert(rk2 == WJB_BEGIN_OBJECT);
4657 
4658 		pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL);
4659 
4660 		while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY)
4661 			pushJsonbValue(state, r1, &v1);
4662 
4663 		pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL);
4664 		while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE)
4665 			pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL);
4666 
4667 		res = pushJsonbValue(state, WJB_END_ARRAY, NULL);
4668 	}
4669 
4670 	return res;
4671 }
4672 
4673 /*
4674  * Do most of the heavy work for jsonb_set/jsonb_insert
4675  *
4676  * If JB_PATH_DELETE bit is set in op_type, the element is to be removed.
4677  *
4678  * If any bit mentioned in JB_PATH_CREATE_OR_INSERT is set in op_type,
4679  * we create the new value if the key or array index does not exist.
4680  *
4681  * Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type
4682  * behave as JB_PATH_CREATE if new value is inserted in JsonbObject.
4683  *
4684  * All path elements before the last must already exist
4685  * whatever bits in op_type are set, or nothing is done.
4686  */
4687 static JsonbValue *
setPath(JsonbIterator ** it,Datum * path_elems,bool * path_nulls,int path_len,JsonbParseState ** st,int level,Jsonb * newval,int op_type)4688 setPath(JsonbIterator **it, Datum *path_elems,
4689 		bool *path_nulls, int path_len,
4690 		JsonbParseState **st, int level, Jsonb *newval, int op_type)
4691 {
4692 	JsonbValue	v;
4693 	JsonbIteratorToken r;
4694 	JsonbValue *res;
4695 
4696 	check_stack_depth();
4697 
4698 	if (path_nulls[level])
4699 		ereport(ERROR,
4700 				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4701 				 errmsg("path element at position %d is null",
4702 						level + 1)));
4703 
4704 	r = JsonbIteratorNext(it, &v, false);
4705 
4706 	switch (r)
4707 	{
4708 		case WJB_BEGIN_ARRAY:
4709 			(void) pushJsonbValue(st, r, NULL);
4710 			setPathArray(it, path_elems, path_nulls, path_len, st, level,
4711 						 newval, v.val.array.nElems, op_type);
4712 			r = JsonbIteratorNext(it, &v, false);
4713 			Assert(r == WJB_END_ARRAY);
4714 			res = pushJsonbValue(st, r, NULL);
4715 			break;
4716 		case WJB_BEGIN_OBJECT:
4717 			(void) pushJsonbValue(st, r, NULL);
4718 			setPathObject(it, path_elems, path_nulls, path_len, st, level,
4719 						  newval, v.val.object.nPairs, op_type);
4720 			r = JsonbIteratorNext(it, &v, true);
4721 			Assert(r == WJB_END_OBJECT);
4722 			res = pushJsonbValue(st, r, NULL);
4723 			break;
4724 		case WJB_ELEM:
4725 		case WJB_VALUE:
4726 			res = pushJsonbValue(st, r, &v);
4727 			break;
4728 		default:
4729 			elog(ERROR, "unrecognized iterator result: %d", (int) r);
4730 			res = NULL;			/* keep compiler quiet */
4731 			break;
4732 	}
4733 
4734 	return res;
4735 }
4736 
4737 /*
4738  * Object walker for setPath
4739  */
4740 static void
setPathObject(JsonbIterator ** it,Datum * path_elems,bool * path_nulls,int path_len,JsonbParseState ** st,int level,Jsonb * newval,uint32 npairs,int op_type)4741 setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
4742 			  int path_len, JsonbParseState **st, int level,
4743 			  Jsonb *newval, uint32 npairs, int op_type)
4744 {
4745 	JsonbValue	v;
4746 	int			i;
4747 	JsonbValue	k;
4748 	bool		done = false;
4749 
4750 	if (level >= path_len || path_nulls[level])
4751 		done = true;
4752 
4753 	/* empty object is a special case for create */
4754 	if ((npairs == 0) && (op_type & JB_PATH_CREATE_OR_INSERT) &&
4755 		(level == path_len - 1))
4756 	{
4757 		JsonbValue	newkey;
4758 
4759 		newkey.type = jbvString;
4760 		newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]);
4761 		newkey.val.string.val = VARDATA_ANY(path_elems[level]);
4762 
4763 		(void) pushJsonbValue(st, WJB_KEY, &newkey);
4764 		addJsonbToParseState(st, newval);
4765 	}
4766 
4767 	for (i = 0; i < npairs; i++)
4768 	{
4769 		JsonbIteratorToken r = JsonbIteratorNext(it, &k, true);
4770 
4771 		Assert(r == WJB_KEY);
4772 
4773 		if (!done &&
4774 			k.val.string.len == VARSIZE_ANY_EXHDR(path_elems[level]) &&
4775 			memcmp(k.val.string.val, VARDATA_ANY(path_elems[level]),
4776 				   k.val.string.len) == 0)
4777 		{
4778 			if (level == path_len - 1)
4779 			{
4780 				/*
4781 				 * called from jsonb_insert(), it forbids redefining an
4782 				 * existing value
4783 				 */
4784 				if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER))
4785 					ereport(ERROR,
4786 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4787 							 errmsg("cannot replace existing key"),
4788 							 errhint("Try using the function jsonb_set "
4789 									 "to replace key value.")));
4790 
4791 				r = JsonbIteratorNext(it, &v, true);	/* skip value */
4792 				if (!(op_type & JB_PATH_DELETE))
4793 				{
4794 					(void) pushJsonbValue(st, WJB_KEY, &k);
4795 					addJsonbToParseState(st, newval);
4796 				}
4797 				done = true;
4798 			}
4799 			else
4800 			{
4801 				(void) pushJsonbValue(st, r, &k);
4802 				setPath(it, path_elems, path_nulls, path_len,
4803 						st, level + 1, newval, op_type);
4804 			}
4805 		}
4806 		else
4807 		{
4808 			if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done &&
4809 				level == path_len - 1 && i == npairs - 1)
4810 			{
4811 				JsonbValue	newkey;
4812 
4813 				newkey.type = jbvString;
4814 				newkey.val.string.len = VARSIZE_ANY_EXHDR(path_elems[level]);
4815 				newkey.val.string.val = VARDATA_ANY(path_elems[level]);
4816 
4817 				(void) pushJsonbValue(st, WJB_KEY, &newkey);
4818 				addJsonbToParseState(st, newval);
4819 			}
4820 
4821 			(void) pushJsonbValue(st, r, &k);
4822 			r = JsonbIteratorNext(it, &v, false);
4823 			(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4824 			if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
4825 			{
4826 				int			walking_level = 1;
4827 
4828 				while (walking_level != 0)
4829 				{
4830 					r = JsonbIteratorNext(it, &v, false);
4831 
4832 					if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
4833 						++walking_level;
4834 					if (r == WJB_END_ARRAY || r == WJB_END_OBJECT)
4835 						--walking_level;
4836 
4837 					(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4838 				}
4839 			}
4840 		}
4841 	}
4842 }
4843 
4844 /*
4845  * Array walker for setPath
4846  */
4847 static void
setPathArray(JsonbIterator ** it,Datum * path_elems,bool * path_nulls,int path_len,JsonbParseState ** st,int level,Jsonb * newval,uint32 nelems,int op_type)4848 setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
4849 			 int path_len, JsonbParseState **st, int level,
4850 			 Jsonb *newval, uint32 nelems, int op_type)
4851 {
4852 	JsonbValue	v;
4853 	int			idx,
4854 				i;
4855 	bool		done = false;
4856 
4857 	/* pick correct index */
4858 	if (level < path_len && !path_nulls[level])
4859 	{
4860 		char	   *c = TextDatumGetCString(path_elems[level]);
4861 		long		lindex;
4862 		char	   *badp;
4863 
4864 		errno = 0;
4865 		lindex = strtol(c, &badp, 10);
4866 		if (errno != 0 || badp == c || *badp != '\0' || lindex > INT_MAX ||
4867 			lindex < INT_MIN)
4868 			ereport(ERROR,
4869 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4870 					 errmsg("path element at position %d is not an integer: \"%s\"",
4871 							level + 1, c)));
4872 		idx = lindex;
4873 	}
4874 	else
4875 		idx = nelems;
4876 
4877 	if (idx < 0)
4878 	{
4879 		if (-idx > nelems)
4880 			idx = INT_MIN;
4881 		else
4882 			idx = nelems + idx;
4883 	}
4884 
4885 	if (idx > 0 && idx > nelems)
4886 		idx = nelems;
4887 
4888 	/*
4889 	 * if we're creating, and idx == INT_MIN, we prepend the new value to the
4890 	 * array also if the array is empty - in which case we don't really care
4891 	 * what the idx value is
4892 	 */
4893 
4894 	if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) &&
4895 		(op_type & JB_PATH_CREATE_OR_INSERT))
4896 	{
4897 		Assert(newval != NULL);
4898 		addJsonbToParseState(st, newval);
4899 		done = true;
4900 	}
4901 
4902 	/* iterate over the array elements */
4903 	for (i = 0; i < nelems; i++)
4904 	{
4905 		JsonbIteratorToken r;
4906 
4907 		if (i == idx && level < path_len)
4908 		{
4909 			if (level == path_len - 1)
4910 			{
4911 				r = JsonbIteratorNext(it, &v, true);	/* skip */
4912 
4913 				if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_CREATE))
4914 					addJsonbToParseState(st, newval);
4915 
4916 				/*
4917 				 * We should keep current value only in case of
4918 				 * JB_PATH_INSERT_BEFORE or JB_PATH_INSERT_AFTER because
4919 				 * otherwise it should be deleted or replaced
4920 				 */
4921 				if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_INSERT_BEFORE))
4922 					(void) pushJsonbValue(st, r, &v);
4923 
4924 				if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE))
4925 					addJsonbToParseState(st, newval);
4926 
4927 				done = true;
4928 			}
4929 			else
4930 				(void) setPath(it, path_elems, path_nulls, path_len,
4931 							   st, level + 1, newval, op_type);
4932 		}
4933 		else
4934 		{
4935 			r = JsonbIteratorNext(it, &v, false);
4936 
4937 			(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4938 
4939 			if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
4940 			{
4941 				int			walking_level = 1;
4942 
4943 				while (walking_level != 0)
4944 				{
4945 					r = JsonbIteratorNext(it, &v, false);
4946 
4947 					if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
4948 						++walking_level;
4949 					if (r == WJB_END_ARRAY || r == WJB_END_OBJECT)
4950 						--walking_level;
4951 
4952 					(void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
4953 				}
4954 			}
4955 
4956 			if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done &&
4957 				level == path_len - 1 && i == nelems - 1)
4958 			{
4959 				addJsonbToParseState(st, newval);
4960 			}
4961 		}
4962 	}
4963 }
4964 
4965 /*
4966  * Parse information about what elements of a jsonb document we want to iterate
4967  * in functions iterate_json(b)_values. This information is presented in jsonb
4968  * format, so that it can be easily extended in the future.
4969  */
4970 uint32
parse_jsonb_index_flags(Jsonb * jb)4971 parse_jsonb_index_flags(Jsonb *jb)
4972 {
4973 	JsonbIterator *it;
4974 	JsonbValue	v;
4975 	JsonbIteratorToken type;
4976 	uint32		flags = 0;
4977 
4978 	it = JsonbIteratorInit(&jb->root);
4979 
4980 	type = JsonbIteratorNext(&it, &v, false);
4981 
4982 	/*
4983 	 * We iterate over array (scalar internally is represented as array, so,
4984 	 * we will accept it too) to check all its elements.  Flag names are
4985 	 * chosen the same as jsonb_typeof uses.
4986 	 */
4987 	if (type != WJB_BEGIN_ARRAY)
4988 		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4989 						errmsg("wrong flag type, only arrays and scalars are allowed")));
4990 
4991 	while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
4992 	{
4993 		if (v.type != jbvString)
4994 			ereport(ERROR,
4995 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4996 					 errmsg("flag array element is not a string"),
4997 					 errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\", and \"all\"")));
4998 
4999 		if (v.val.string.len == 3 &&
5000 			pg_strncasecmp(v.val.string.val, "all", 3) == 0)
5001 			flags |= jtiAll;
5002 		else if (v.val.string.len == 3 &&
5003 				 pg_strncasecmp(v.val.string.val, "key", 3) == 0)
5004 			flags |= jtiKey;
5005 		else if (v.val.string.len == 6 &&
5006 				 pg_strncasecmp(v.val.string.val, "string", 5) == 0)
5007 			flags |= jtiString;
5008 		else if (v.val.string.len == 7 &&
5009 				 pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
5010 			flags |= jtiNumeric;
5011 		else if (v.val.string.len == 7 &&
5012 				 pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
5013 			flags |= jtiBool;
5014 		else
5015 			ereport(ERROR,
5016 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5017 					 errmsg("wrong flag in flag array: \"%s\"",
5018 							pnstrdup(v.val.string.val, v.val.string.len)),
5019 					 errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\", and \"all\"")));
5020 	}
5021 
5022 	/* expect end of array now */
5023 	if (type != WJB_END_ARRAY)
5024 		elog(ERROR, "unexpected end of flag array");
5025 
5026 	/* get final WJB_DONE and free iterator */
5027 	type = JsonbIteratorNext(&it, &v, false);
5028 	if (type != WJB_DONE)
5029 		elog(ERROR, "unexpected end of flag array");
5030 
5031 	return flags;
5032 }
5033 
5034 /*
5035  * Iterate over jsonb values or elements, specified by flags, and pass them
5036  * together with an iteration state to a specified JsonIterateStringValuesAction.
5037  */
5038 void
iterate_jsonb_values(Jsonb * jb,uint32 flags,void * state,JsonIterateStringValuesAction action)5039 iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
5040 					 JsonIterateStringValuesAction action)
5041 {
5042 	JsonbIterator *it;
5043 	JsonbValue	v;
5044 	JsonbIteratorToken type;
5045 
5046 	it = JsonbIteratorInit(&jb->root);
5047 
5048 	/*
5049 	 * Just recursively iterating over jsonb and call callback on all
5050 	 * correspoding elements
5051 	 */
5052 	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
5053 	{
5054 		if (type == WJB_KEY)
5055 		{
5056 			if (flags & jtiKey)
5057 				action(state, v.val.string.val, v.val.string.len);
5058 
5059 			continue;
5060 		}
5061 		else if (!(type == WJB_VALUE || type == WJB_ELEM))
5062 		{
5063 			/* do not call callback for composite JsonbValue */
5064 			continue;
5065 		}
5066 
5067 		/* JsonbValue is a value of object or element of array */
5068 		switch (v.type)
5069 		{
5070 			case jbvString:
5071 				if (flags & jtiString)
5072 					action(state, v.val.string.val, v.val.string.len);
5073 				break;
5074 			case jbvNumeric:
5075 				if (flags & jtiNumeric)
5076 				{
5077 					char	   *val;
5078 
5079 					val = DatumGetCString(DirectFunctionCall1(numeric_out,
5080 															  NumericGetDatum(v.val.numeric)));
5081 
5082 					action(state, val, strlen(val));
5083 					pfree(val);
5084 				}
5085 				break;
5086 			case jbvBool:
5087 				if (flags & jtiBool)
5088 				{
5089 					if (v.val.boolean)
5090 						action(state, "true", 4);
5091 					else
5092 						action(state, "false", 5);
5093 				}
5094 				break;
5095 			default:
5096 				/* do not call callback for composite JsonbValue */
5097 				break;
5098 		}
5099 	}
5100 }
5101 
5102 /*
5103  * Iterate over json values and elements, specified by flags, and pass them
5104  * together with an iteration state to a specified JsonIterateStringValuesAction.
5105  */
5106 void
iterate_json_values(text * json,uint32 flags,void * action_state,JsonIterateStringValuesAction action)5107 iterate_json_values(text *json, uint32 flags, void *action_state,
5108 					JsonIterateStringValuesAction action)
5109 {
5110 	JsonLexContext *lex = makeJsonLexContext(json, true);
5111 	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
5112 	IterateJsonStringValuesState *state = palloc0(sizeof(IterateJsonStringValuesState));
5113 
5114 	state->lex = lex;
5115 	state->action = action;
5116 	state->action_state = action_state;
5117 	state->flags = flags;
5118 
5119 	sem->semstate = (void *) state;
5120 	sem->scalar = iterate_values_scalar;
5121 	sem->object_field_start = iterate_values_object_field_start;
5122 
5123 	pg_parse_json(lex, sem);
5124 }
5125 
5126 /*
5127  * An auxiliary function for iterate_json_values to invoke a specified
5128  * JsonIterateStringValuesAction for specified values.
5129  */
5130 static void
iterate_values_scalar(void * state,char * token,JsonTokenType tokentype)5131 iterate_values_scalar(void *state, char *token, JsonTokenType tokentype)
5132 {
5133 	IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
5134 
5135 	switch (tokentype)
5136 	{
5137 		case JSON_TOKEN_STRING:
5138 			if (_state->flags & jtiString)
5139 				_state->action(_state->action_state, token, strlen(token));
5140 			break;
5141 		case JSON_TOKEN_NUMBER:
5142 			if (_state->flags & jtiNumeric)
5143 				_state->action(_state->action_state, token, strlen(token));
5144 			break;
5145 		case JSON_TOKEN_TRUE:
5146 		case JSON_TOKEN_FALSE:
5147 			if (_state->flags & jtiBool)
5148 				_state->action(_state->action_state, token, strlen(token));
5149 			break;
5150 		default:
5151 			/* do not call callback for any other token */
5152 			break;
5153 	}
5154 }
5155 
5156 static void
iterate_values_object_field_start(void * state,char * fname,bool isnull)5157 iterate_values_object_field_start(void *state, char *fname, bool isnull)
5158 {
5159 	IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
5160 
5161 	if (_state->flags & jtiKey)
5162 	{
5163 		char	   *val = pstrdup(fname);
5164 
5165 		_state->action(_state->action_state, val, strlen(val));
5166 	}
5167 }
5168 
5169 /*
5170  * Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
5171  * to every string value or element. Any necessary context for a
5172  * JsonTransformStringValuesAction can be passed in the action_state variable.
5173  * Function returns a copy of an original jsonb object with transformed values.
5174  */
5175 Jsonb *
transform_jsonb_string_values(Jsonb * jsonb,void * action_state,JsonTransformStringValuesAction transform_action)5176 transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
5177 							  JsonTransformStringValuesAction transform_action)
5178 {
5179 	JsonbIterator *it;
5180 	JsonbValue	v,
5181 			   *res = NULL;
5182 	JsonbIteratorToken type;
5183 	JsonbParseState *st = NULL;
5184 	text	   *out;
5185 	bool		is_scalar = false;
5186 
5187 	it = JsonbIteratorInit(&jsonb->root);
5188 	is_scalar = it->isScalar;
5189 
5190 	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
5191 	{
5192 		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
5193 		{
5194 			out = transform_action(action_state, v.val.string.val, v.val.string.len);
5195 			v.val.string.val = VARDATA_ANY(out);
5196 			v.val.string.len = VARSIZE_ANY_EXHDR(out);
5197 			res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
5198 		}
5199 		else
5200 		{
5201 			res = pushJsonbValue(&st, type, (type == WJB_KEY ||
5202 											 type == WJB_VALUE ||
5203 											 type == WJB_ELEM) ? &v : NULL);
5204 		}
5205 	}
5206 
5207 	if (res->type == jbvArray)
5208 		res->val.array.rawScalar = is_scalar;
5209 
5210 	return JsonbValueToJsonb(res);
5211 }
5212 
5213 /*
5214  * Iterate over a json, and apply a specified JsonTransformStringValuesAction
5215  * to every string value or element. Any necessary context for a
5216  * JsonTransformStringValuesAction can be passed in the action_state variable.
5217  * Function returns a StringInfo, which is a copy of an original json with
5218  * transformed values.
5219  */
5220 text *
transform_json_string_values(text * json,void * action_state,JsonTransformStringValuesAction transform_action)5221 transform_json_string_values(text *json, void *action_state,
5222 							 JsonTransformStringValuesAction transform_action)
5223 {
5224 	JsonLexContext *lex = makeJsonLexContext(json, true);
5225 	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
5226 	TransformJsonStringValuesState *state = palloc0(sizeof(TransformJsonStringValuesState));
5227 
5228 	state->lex = lex;
5229 	state->strval = makeStringInfo();
5230 	state->action = transform_action;
5231 	state->action_state = action_state;
5232 
5233 	sem->semstate = (void *) state;
5234 	sem->scalar = transform_string_values_scalar;
5235 	sem->object_start = transform_string_values_object_start;
5236 	sem->object_end = transform_string_values_object_end;
5237 	sem->array_start = transform_string_values_array_start;
5238 	sem->array_end = transform_string_values_array_end;
5239 	sem->scalar = transform_string_values_scalar;
5240 	sem->array_element_start = transform_string_values_array_element_start;
5241 	sem->object_field_start = transform_string_values_object_field_start;
5242 
5243 	pg_parse_json(lex, sem);
5244 
5245 	return cstring_to_text_with_len(state->strval->data, state->strval->len);
5246 }
5247 
5248 /*
5249  * Set of auxiliary functions for transform_json_string_values to invoke a
5250  * specified JsonTransformStringValuesAction for all values and left everything
5251  * else untouched.
5252  */
5253 static void
transform_string_values_object_start(void * state)5254 transform_string_values_object_start(void *state)
5255 {
5256 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5257 
5258 	appendStringInfoCharMacro(_state->strval, '{');
5259 }
5260 
5261 static void
transform_string_values_object_end(void * state)5262 transform_string_values_object_end(void *state)
5263 {
5264 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5265 
5266 	appendStringInfoCharMacro(_state->strval, '}');
5267 }
5268 
5269 static void
transform_string_values_array_start(void * state)5270 transform_string_values_array_start(void *state)
5271 {
5272 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5273 
5274 	appendStringInfoCharMacro(_state->strval, '[');
5275 }
5276 
5277 static void
transform_string_values_array_end(void * state)5278 transform_string_values_array_end(void *state)
5279 {
5280 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5281 
5282 	appendStringInfoCharMacro(_state->strval, ']');
5283 }
5284 
5285 static void
transform_string_values_object_field_start(void * state,char * fname,bool isnull)5286 transform_string_values_object_field_start(void *state, char *fname, bool isnull)
5287 {
5288 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5289 
5290 	if (_state->strval->data[_state->strval->len - 1] != '{')
5291 		appendStringInfoCharMacro(_state->strval, ',');
5292 
5293 	/*
5294 	 * Unfortunately we don't have the quoted and escaped string any more, so
5295 	 * we have to re-escape it.
5296 	 */
5297 	escape_json(_state->strval, fname);
5298 	appendStringInfoCharMacro(_state->strval, ':');
5299 }
5300 
5301 static void
transform_string_values_array_element_start(void * state,bool isnull)5302 transform_string_values_array_element_start(void *state, bool isnull)
5303 {
5304 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5305 
5306 	if (_state->strval->data[_state->strval->len - 1] != '[')
5307 		appendStringInfoCharMacro(_state->strval, ',');
5308 }
5309 
5310 static void
transform_string_values_scalar(void * state,char * token,JsonTokenType tokentype)5311 transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
5312 {
5313 	TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
5314 
5315 	if (tokentype == JSON_TOKEN_STRING)
5316 	{
5317 		text	   *out = _state->action(_state->action_state, token, strlen(token));
5318 
5319 		escape_json(_state->strval, text_to_cstring(out));
5320 	}
5321 	else
5322 		appendStringInfoString(_state->strval, token);
5323 }
5324