1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * jsonpath_gram.y
5  *	 Grammar definitions for jsonpath datatype
6  *
7  * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
8  *
9  * Copyright (c) 2019-2020, PostgreSQL Global Development Group
10  *
11  * IDENTIFICATION
12  *	src/backend/utils/adt/jsonpath_gram.y
13  *
14  *-------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "catalog/pg_collation.h"
20 #include "fmgr.h"
21 #include "miscadmin.h"
22 #include "nodes/pg_list.h"
23 #include "regex/regex.h"
24 #include "utils/builtins.h"
25 #include "utils/jsonpath.h"
26 
27 /* struct JsonPathString is shared between scan and gram */
28 typedef struct JsonPathString
29 {
30 	char	   *val;
31 	int			len;
32 	int			total;
33 }			JsonPathString;
34 
35 union YYSTYPE;
36 
37 /* flex 2.5.4 doesn't bother with a decl for this */
38 int	jsonpath_yylex(union YYSTYPE *yylval_param);
39 int	jsonpath_yyparse(JsonPathParseResult **result);
40 void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
41 
42 static JsonPathParseItem *makeItemType(JsonPathItemType type);
43 static JsonPathParseItem *makeItemString(JsonPathString *s);
44 static JsonPathParseItem *makeItemVariable(JsonPathString *s);
45 static JsonPathParseItem *makeItemKey(JsonPathString *s);
46 static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
47 static JsonPathParseItem *makeItemBool(bool val);
48 static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
49 										 JsonPathParseItem *la,
50 										 JsonPathParseItem *ra);
51 static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
52 										JsonPathParseItem *a);
53 static JsonPathParseItem *makeItemList(List *list);
54 static JsonPathParseItem *makeIndexArray(List *list);
55 static JsonPathParseItem *makeAny(int first, int last);
56 static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
57 											JsonPathString *pattern,
58 											JsonPathString *flags);
59 
60 /*
61  * Bison doesn't allocate anything that needs to live across parser calls,
62  * so we can easily have it use palloc instead of malloc.  This prevents
63  * memory leaks if we error out during parsing.  Note this only works with
64  * bison >= 2.0.  However, in bison 1.875 the default is to use alloca()
65  * if possible, so there's not really much problem anyhow, at least if
66  * you're building with gcc.
67  */
68 #define YYMALLOC palloc
69 #define YYFREE   pfree
70 
71 %}
72 
73 /* BISON Declarations */
74 %pure-parser
75 %expect 0
76 %name-prefix="jsonpath_yy"
77 %error-verbose
78 %parse-param {JsonPathParseResult **result}
79 
80 %union {
81 	JsonPathString		str;
82 	List			   *elems;	/* list of JsonPathParseItem */
83 	List			   *indexs;	/* list of integers */
84 	JsonPathParseItem  *value;
85 	JsonPathParseResult *result;
86 	JsonPathItemType	optype;
87 	bool				boolean;
88 	int					integer;
89 }
90 
91 %token	<str>		TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
92 %token	<str>		IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
93 %token	<str>		OR_P AND_P NOT_P
94 %token	<str>		LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
95 %token	<str>		ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
96 %token	<str>		ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
97 %token	<str>		DATETIME_P
98 
99 %type	<result>	result
100 
101 %type	<value>		scalar_value path_primary expr array_accessor
102 					any_path accessor_op key predicate delimited_predicate
103 					index_elem starts_with_initial expr_or_predicate
104 					datetime_template opt_datetime_template
105 
106 %type	<elems>		accessor_expr
107 
108 %type	<indexs>	index_list
109 
110 %type	<optype>	comp_op method
111 
112 %type	<boolean>	mode
113 
114 %type	<str>		key_name
115 
116 %type	<integer>	any_level
117 
118 %left	OR_P
119 %left	AND_P
120 %right	NOT_P
121 %left	'+' '-'
122 %left	'*' '/' '%'
123 %left	UMINUS
124 %nonassoc '(' ')'
125 
126 /* Grammar follows */
127 %%
128 
129 result:
130 	mode expr_or_predicate			{
131 										*result = palloc(sizeof(JsonPathParseResult));
132 										(*result)->expr = $2;
133 										(*result)->lax = $1;
134 									}
135 	| /* EMPTY */					{ *result = NULL; }
136 	;
137 
138 expr_or_predicate:
139 	expr							{ $$ = $1; }
140 	| predicate						{ $$ = $1; }
141 	;
142 
143 mode:
144 	STRICT_P						{ $$ = false; }
145 	| LAX_P							{ $$ = true; }
146 	| /* EMPTY */					{ $$ = true; }
147 	;
148 
149 scalar_value:
150 	STRING_P						{ $$ = makeItemString(&$1); }
151 	| NULL_P						{ $$ = makeItemString(NULL); }
152 	| TRUE_P						{ $$ = makeItemBool(true); }
153 	| FALSE_P						{ $$ = makeItemBool(false); }
154 	| NUMERIC_P						{ $$ = makeItemNumeric(&$1); }
155 	| INT_P							{ $$ = makeItemNumeric(&$1); }
156 	| VARIABLE_P 					{ $$ = makeItemVariable(&$1); }
157 	;
158 
159 comp_op:
160 	EQUAL_P							{ $$ = jpiEqual; }
161 	| NOTEQUAL_P					{ $$ = jpiNotEqual; }
162 	| LESS_P						{ $$ = jpiLess; }
163 	| GREATER_P						{ $$ = jpiGreater; }
164 	| LESSEQUAL_P					{ $$ = jpiLessOrEqual; }
165 	| GREATEREQUAL_P				{ $$ = jpiGreaterOrEqual; }
166 	;
167 
168 delimited_predicate:
169 	'(' predicate ')'				{ $$ = $2; }
170 	| EXISTS_P '(' expr ')'			{ $$ = makeItemUnary(jpiExists, $3); }
171 	;
172 
173 predicate:
174 	delimited_predicate				{ $$ = $1; }
175 	| expr comp_op expr				{ $$ = makeItemBinary($2, $1, $3); }
176 	| predicate AND_P predicate		{ $$ = makeItemBinary(jpiAnd, $1, $3); }
177 	| predicate OR_P predicate		{ $$ = makeItemBinary(jpiOr, $1, $3); }
178 	| NOT_P delimited_predicate 	{ $$ = makeItemUnary(jpiNot, $2); }
179 	| '(' predicate ')' IS_P UNKNOWN_P
180 									{ $$ = makeItemUnary(jpiIsUnknown, $2); }
181 	| expr STARTS_P WITH_P starts_with_initial
182 									{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
183 	| expr LIKE_REGEX_P STRING_P 	{ $$ = makeItemLikeRegex($1, &$3, NULL); }
184 	| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
185 									{ $$ = makeItemLikeRegex($1, &$3, &$5); }
186 	;
187 
188 starts_with_initial:
189 	STRING_P						{ $$ = makeItemString(&$1); }
190 	| VARIABLE_P					{ $$ = makeItemVariable(&$1); }
191 	;
192 
193 path_primary:
194 	scalar_value					{ $$ = $1; }
195 	| '$'							{ $$ = makeItemType(jpiRoot); }
196 	| '@'							{ $$ = makeItemType(jpiCurrent); }
197 	| LAST_P						{ $$ = makeItemType(jpiLast); }
198 	;
199 
200 accessor_expr:
201 	path_primary					{ $$ = list_make1($1); }
202 	| '(' expr ')' accessor_op		{ $$ = list_make2($2, $4); }
203 	| '(' predicate ')' accessor_op	{ $$ = list_make2($2, $4); }
204 	| accessor_expr accessor_op		{ $$ = lappend($1, $2); }
205 	;
206 
207 expr:
208 	accessor_expr					{ $$ = makeItemList($1); }
209 	| '(' expr ')'					{ $$ = $2; }
210 	| '+' expr %prec UMINUS			{ $$ = makeItemUnary(jpiPlus, $2); }
211 	| '-' expr %prec UMINUS			{ $$ = makeItemUnary(jpiMinus, $2); }
212 	| expr '+' expr					{ $$ = makeItemBinary(jpiAdd, $1, $3); }
213 	| expr '-' expr					{ $$ = makeItemBinary(jpiSub, $1, $3); }
214 	| expr '*' expr					{ $$ = makeItemBinary(jpiMul, $1, $3); }
215 	| expr '/' expr					{ $$ = makeItemBinary(jpiDiv, $1, $3); }
216 	| expr '%' expr					{ $$ = makeItemBinary(jpiMod, $1, $3); }
217 	;
218 
219 index_elem:
220 	expr							{ $$ = makeItemBinary(jpiSubscript, $1, NULL); }
221 	| expr TO_P expr				{ $$ = makeItemBinary(jpiSubscript, $1, $3); }
222 	;
223 
224 index_list:
225 	index_elem						{ $$ = list_make1($1); }
226 	| index_list ',' index_elem		{ $$ = lappend($1, $3); }
227 	;
228 
229 array_accessor:
230 	'[' '*' ']'						{ $$ = makeItemType(jpiAnyArray); }
231 	| '[' index_list ']'			{ $$ = makeIndexArray($2); }
232 	;
233 
234 any_level:
235 	INT_P							{ $$ = pg_atoi($1.val, 4, 0); }
236 	| LAST_P						{ $$ = -1; }
237 	;
238 
239 any_path:
240 	ANY_P							{ $$ = makeAny(0, -1); }
241 	| ANY_P '{' any_level '}'		{ $$ = makeAny($3, $3); }
242 	| ANY_P '{' any_level TO_P any_level '}'
243 									{ $$ = makeAny($3, $5); }
244 	;
245 
246 accessor_op:
247 	'.' key							{ $$ = $2; }
248 	| '.' '*'						{ $$ = makeItemType(jpiAnyKey); }
249 	| array_accessor				{ $$ = $1; }
250 	| '.' any_path					{ $$ = $2; }
251 	| '.' method '(' ')'			{ $$ = makeItemType($2); }
252 	| '.' DATETIME_P '(' opt_datetime_template ')'
253 									{ $$ = makeItemUnary(jpiDatetime, $4); }
254 	| '?' '(' predicate ')'			{ $$ = makeItemUnary(jpiFilter, $3); }
255 	;
256 
257 datetime_template:
258 	STRING_P						{ $$ = makeItemString(&$1); }
259 	;
260 
261 opt_datetime_template:
262 	datetime_template				{ $$ = $1; }
263 	| /* EMPTY */					{ $$ = NULL; }
264 	;
265 
266 key:
267 	key_name						{ $$ = makeItemKey(&$1); }
268 	;
269 
270 key_name:
271 	IDENT_P
272 	| STRING_P
273 	| TO_P
274 	| NULL_P
275 	| TRUE_P
276 	| FALSE_P
277 	| IS_P
278 	| UNKNOWN_P
279 	| EXISTS_P
280 	| STRICT_P
281 	| LAX_P
282 	| ABS_P
283 	| SIZE_P
284 	| TYPE_P
285 	| FLOOR_P
286 	| DOUBLE_P
287 	| CEILING_P
288 	| DATETIME_P
289 	| KEYVALUE_P
290 	| LAST_P
291 	| STARTS_P
292 	| WITH_P
293 	| LIKE_REGEX_P
294 	| FLAG_P
295 	;
296 
297 method:
298 	ABS_P							{ $$ = jpiAbs; }
299 	| SIZE_P						{ $$ = jpiSize; }
300 	| TYPE_P						{ $$ = jpiType; }
301 	| FLOOR_P						{ $$ = jpiFloor; }
302 	| DOUBLE_P						{ $$ = jpiDouble; }
303 	| CEILING_P						{ $$ = jpiCeiling; }
304 	| KEYVALUE_P					{ $$ = jpiKeyValue; }
305 	;
306 %%
307 
308 /*
309  * The helper functions below allocate and fill JsonPathParseItem's of various
310  * types.
311  */
312 
313 static JsonPathParseItem *
314 makeItemType(JsonPathItemType type)
315 {
316 	JsonPathParseItem  *v = palloc(sizeof(*v));
317 
318 	CHECK_FOR_INTERRUPTS();
319 
320 	v->type = type;
321 	v->next = NULL;
322 
323 	return v;
324 }
325 
326 static JsonPathParseItem *
makeItemString(JsonPathString * s)327 makeItemString(JsonPathString *s)
328 {
329 	JsonPathParseItem  *v;
330 
331 	if (s == NULL)
332 	{
333 		v = makeItemType(jpiNull);
334 	}
335 	else
336 	{
337 		v = makeItemType(jpiString);
338 		v->value.string.val = s->val;
339 		v->value.string.len = s->len;
340 	}
341 
342 	return v;
343 }
344 
345 static JsonPathParseItem *
makeItemVariable(JsonPathString * s)346 makeItemVariable(JsonPathString *s)
347 {
348 	JsonPathParseItem  *v;
349 
350 	v = makeItemType(jpiVariable);
351 	v->value.string.val = s->val;
352 	v->value.string.len = s->len;
353 
354 	return v;
355 }
356 
357 static JsonPathParseItem *
makeItemKey(JsonPathString * s)358 makeItemKey(JsonPathString *s)
359 {
360 	JsonPathParseItem  *v;
361 
362 	v = makeItemString(s);
363 	v->type = jpiKey;
364 
365 	return v;
366 }
367 
368 static JsonPathParseItem *
makeItemNumeric(JsonPathString * s)369 makeItemNumeric(JsonPathString *s)
370 {
371 	JsonPathParseItem  *v;
372 
373 	v = makeItemType(jpiNumeric);
374 	v->value.numeric =
375 		DatumGetNumeric(DirectFunctionCall3(numeric_in,
376 											CStringGetDatum(s->val),
377 											ObjectIdGetDatum(InvalidOid),
378 											Int32GetDatum(-1)));
379 
380 	return v;
381 }
382 
383 static JsonPathParseItem *
makeItemBool(bool val)384 makeItemBool(bool val)
385 {
386 	JsonPathParseItem  *v = makeItemType(jpiBool);
387 
388 	v->value.boolean = val;
389 
390 	return v;
391 }
392 
393 static JsonPathParseItem *
makeItemBinary(JsonPathItemType type,JsonPathParseItem * la,JsonPathParseItem * ra)394 makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
395 {
396 	JsonPathParseItem  *v = makeItemType(type);
397 
398 	v->value.args.left = la;
399 	v->value.args.right = ra;
400 
401 	return v;
402 }
403 
404 static JsonPathParseItem *
makeItemUnary(JsonPathItemType type,JsonPathParseItem * a)405 makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
406 {
407 	JsonPathParseItem  *v;
408 
409 	if (type == jpiPlus && a->type == jpiNumeric && !a->next)
410 		return a;
411 
412 	if (type == jpiMinus && a->type == jpiNumeric && !a->next)
413 	{
414 		v = makeItemType(jpiNumeric);
415 		v->value.numeric =
416 			DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
417 												NumericGetDatum(a->value.numeric)));
418 		return v;
419 	}
420 
421 	v = makeItemType(type);
422 
423 	v->value.arg = a;
424 
425 	return v;
426 }
427 
428 static JsonPathParseItem *
makeItemList(List * list)429 makeItemList(List *list)
430 {
431 	JsonPathParseItem  *head,
432 					   *end;
433 	ListCell		   *cell;
434 
435 	head = end = (JsonPathParseItem *) linitial(list);
436 
437 	if (list_length(list) == 1)
438 		return head;
439 
440 	/* append items to the end of already existing list */
441 	while (end->next)
442 		end = end->next;
443 
444 	for_each_from(cell, list, 1)
445 	{
446 		JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
447 
448 		end->next = c;
449 		end = c;
450 	}
451 
452 	return head;
453 }
454 
455 static JsonPathParseItem *
makeIndexArray(List * list)456 makeIndexArray(List *list)
457 {
458 	JsonPathParseItem  *v = makeItemType(jpiIndexArray);
459 	ListCell		   *cell;
460 	int					i = 0;
461 
462 	Assert(list_length(list) > 0);
463 	v->value.array.nelems = list_length(list);
464 
465 	v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
466 								  v->value.array.nelems);
467 
468 	foreach(cell, list)
469 	{
470 		JsonPathParseItem  *jpi = lfirst(cell);
471 
472 		Assert(jpi->type == jpiSubscript);
473 
474 		v->value.array.elems[i].from = jpi->value.args.left;
475 		v->value.array.elems[i++].to = jpi->value.args.right;
476 	}
477 
478 	return v;
479 }
480 
481 static JsonPathParseItem *
makeAny(int first,int last)482 makeAny(int first, int last)
483 {
484 	JsonPathParseItem  *v = makeItemType(jpiAny);
485 
486 	v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
487 	v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
488 
489 	return v;
490 }
491 
492 static JsonPathParseItem *
makeItemLikeRegex(JsonPathParseItem * expr,JsonPathString * pattern,JsonPathString * flags)493 makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
494 				  JsonPathString *flags)
495 {
496 	JsonPathParseItem  *v = makeItemType(jpiLikeRegex);
497 	int					i;
498 	int					cflags;
499 
500 	v->value.like_regex.expr = expr;
501 	v->value.like_regex.pattern = pattern->val;
502 	v->value.like_regex.patternlen = pattern->len;
503 
504 	/* Parse the flags string, convert to bitmask.  Duplicate flags are OK. */
505 	v->value.like_regex.flags = 0;
506 	for (i = 0; flags && i < flags->len; i++)
507 	{
508 		switch (flags->val[i])
509 		{
510 			case 'i':
511 				v->value.like_regex.flags |= JSP_REGEX_ICASE;
512 				break;
513 			case 's':
514 				v->value.like_regex.flags |= JSP_REGEX_DOTALL;
515 				break;
516 			case 'm':
517 				v->value.like_regex.flags |= JSP_REGEX_MLINE;
518 				break;
519 			case 'x':
520 				v->value.like_regex.flags |= JSP_REGEX_WSPACE;
521 				break;
522 			case 'q':
523 				v->value.like_regex.flags |= JSP_REGEX_QUOTE;
524 				break;
525 			default:
526 				ereport(ERROR,
527 						(errcode(ERRCODE_SYNTAX_ERROR),
528 						 errmsg("invalid input syntax for type %s", "jsonpath"),
529 						 errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
530 								   flags->val[i])));
531 				break;
532 		}
533 	}
534 
535 	/* Convert flags to what RE_compile_and_cache needs */
536 	cflags = jspConvertRegexFlags(v->value.like_regex.flags);
537 
538 	/* check regex validity */
539 	(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
540 														 pattern->len),
541 								cflags, DEFAULT_COLLATION_OID);
542 
543 	return v;
544 }
545 
546 /*
547  * Convert from XQuery regex flags to those recognized by our regex library.
548  */
549 int
jspConvertRegexFlags(uint32 xflags)550 jspConvertRegexFlags(uint32 xflags)
551 {
552 	/* By default, XQuery is very nearly the same as Spencer's AREs */
553 	int			cflags = REG_ADVANCED;
554 
555 	/* Ignore-case means the same thing, too, modulo locale issues */
556 	if (xflags & JSP_REGEX_ICASE)
557 		cflags |= REG_ICASE;
558 
559 	/* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
560 	if (xflags & JSP_REGEX_QUOTE)
561 	{
562 		cflags &= ~REG_ADVANCED;
563 		cflags |= REG_QUOTE;
564 	}
565 	else
566 	{
567 		/* Note that dotall mode is the default in POSIX */
568 		if (!(xflags & JSP_REGEX_DOTALL))
569 			cflags |= REG_NLSTOP;
570 		if (xflags & JSP_REGEX_MLINE)
571 			cflags |= REG_NLANCH;
572 
573 		/*
574 		 * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
575 		 * not really enough alike to justify treating JSP_REGEX_WSPACE as
576 		 * REG_EXPANDED.  For now we treat 'x' as unimplemented; perhaps in
577 		 * future we'll modify the regex library to have an option for
578 		 * XQuery-style ignore-whitespace mode.
579 		 */
580 		if (xflags & JSP_REGEX_WSPACE)
581 			ereport(ERROR,
582 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
583 					 errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
584 	}
585 
586 	return cflags;
587 }
588 
589 /*
590  * jsonpath_scan.l is compiled as part of jsonpath_gram.y.  Currently, this is
591  * unavoidable because jsonpath_gram does not create a .h file to export its
592  * token symbols.  If these files ever grow large enough to be worth compiling
593  * separately, that could be fixed; but for now it seems like useless
594  * complication.
595  */
596 
597 #include "jsonpath_scan.c"
598