1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * jsonpath_gram.y
5  *	 Grammar definitions for jsonpath datatype
6  *
7  * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
8  *
9  * Copyright (c) 2019, PostgreSQL Global Development Group
10  *
11  * IDENTIFICATION
12  *	src/backend/utils/adt/jsonpath_gram.y
13  *
14  *-------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "catalog/pg_collation.h"
20 #include "fmgr.h"
21 #include "miscadmin.h"
22 #include "nodes/pg_list.h"
23 #include "regex/regex.h"
24 #include "utils/builtins.h"
25 #include "utils/jsonpath.h"
26 
27 /* struct JsonPathString is shared between scan and gram */
28 typedef struct JsonPathString
29 {
30 	char	   *val;
31 	int			len;
32 	int			total;
33 }			JsonPathString;
34 
35 union YYSTYPE;
36 
37 /* flex 2.5.4 doesn't bother with a decl for this */
38 int	jsonpath_yylex(union YYSTYPE *yylval_param);
39 int	jsonpath_yyparse(JsonPathParseResult **result);
40 void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
41 
42 static JsonPathParseItem *makeItemType(JsonPathItemType type);
43 static JsonPathParseItem *makeItemString(JsonPathString *s);
44 static JsonPathParseItem *makeItemVariable(JsonPathString *s);
45 static JsonPathParseItem *makeItemKey(JsonPathString *s);
46 static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
47 static JsonPathParseItem *makeItemBool(bool val);
48 static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
49 										 JsonPathParseItem *la,
50 										 JsonPathParseItem *ra);
51 static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
52 										JsonPathParseItem *a);
53 static JsonPathParseItem *makeItemList(List *list);
54 static JsonPathParseItem *makeIndexArray(List *list);
55 static JsonPathParseItem *makeAny(int first, int last);
56 static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
57 											JsonPathString *pattern,
58 											JsonPathString *flags);
59 
60 /*
61  * Bison doesn't allocate anything that needs to live across parser calls,
62  * so we can easily have it use palloc instead of malloc.  This prevents
63  * memory leaks if we error out during parsing.  Note this only works with
64  * bison >= 2.0.  However, in bison 1.875 the default is to use alloca()
65  * if possible, so there's not really much problem anyhow, at least if
66  * you're building with gcc.
67  */
68 #define YYMALLOC palloc
69 #define YYFREE   pfree
70 
71 %}
72 
73 /* BISON Declarations */
74 %pure-parser
75 %expect 0
76 %name-prefix="jsonpath_yy"
77 %error-verbose
78 %parse-param {JsonPathParseResult **result}
79 
80 %union {
81 	JsonPathString		str;
82 	List			   *elems;	/* list of JsonPathParseItem */
83 	List			   *indexs;	/* list of integers */
84 	JsonPathParseItem  *value;
85 	JsonPathParseResult *result;
86 	JsonPathItemType	optype;
87 	bool				boolean;
88 	int					integer;
89 }
90 
91 %token	<str>		TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
92 %token	<str>		IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
93 %token	<str>		OR_P AND_P NOT_P
94 %token	<str>		LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
95 %token	<str>		ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
96 %token	<str>		ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
97 
98 %type	<result>	result
99 
100 %type	<value>		scalar_value path_primary expr array_accessor
101 					any_path accessor_op key predicate delimited_predicate
102 					index_elem starts_with_initial expr_or_predicate
103 
104 %type	<elems>		accessor_expr
105 
106 %type	<indexs>	index_list
107 
108 %type	<optype>	comp_op method
109 
110 %type	<boolean>	mode
111 
112 %type	<str>		key_name
113 
114 %type	<integer>	any_level
115 
116 %left	OR_P
117 %left	AND_P
118 %right	NOT_P
119 %left	'+' '-'
120 %left	'*' '/' '%'
121 %left	UMINUS
122 %nonassoc '(' ')'
123 
124 /* Grammar follows */
125 %%
126 
127 result:
128 	mode expr_or_predicate			{
129 										*result = palloc(sizeof(JsonPathParseResult));
130 										(*result)->expr = $2;
131 										(*result)->lax = $1;
132 									}
133 	| /* EMPTY */					{ *result = NULL; }
134 	;
135 
136 expr_or_predicate:
137 	expr							{ $$ = $1; }
138 	| predicate						{ $$ = $1; }
139 	;
140 
141 mode:
142 	STRICT_P						{ $$ = false; }
143 	| LAX_P							{ $$ = true; }
144 	| /* EMPTY */					{ $$ = true; }
145 	;
146 
147 scalar_value:
148 	STRING_P						{ $$ = makeItemString(&$1); }
149 	| NULL_P						{ $$ = makeItemString(NULL); }
150 	| TRUE_P						{ $$ = makeItemBool(true); }
151 	| FALSE_P						{ $$ = makeItemBool(false); }
152 	| NUMERIC_P						{ $$ = makeItemNumeric(&$1); }
153 	| INT_P							{ $$ = makeItemNumeric(&$1); }
154 	| VARIABLE_P 					{ $$ = makeItemVariable(&$1); }
155 	;
156 
157 comp_op:
158 	EQUAL_P							{ $$ = jpiEqual; }
159 	| NOTEQUAL_P					{ $$ = jpiNotEqual; }
160 	| LESS_P						{ $$ = jpiLess; }
161 	| GREATER_P						{ $$ = jpiGreater; }
162 	| LESSEQUAL_P					{ $$ = jpiLessOrEqual; }
163 	| GREATEREQUAL_P				{ $$ = jpiGreaterOrEqual; }
164 	;
165 
166 delimited_predicate:
167 	'(' predicate ')'				{ $$ = $2; }
168 	| EXISTS_P '(' expr ')'			{ $$ = makeItemUnary(jpiExists, $3); }
169 	;
170 
171 predicate:
172 	delimited_predicate				{ $$ = $1; }
173 	| expr comp_op expr				{ $$ = makeItemBinary($2, $1, $3); }
174 	| predicate AND_P predicate		{ $$ = makeItemBinary(jpiAnd, $1, $3); }
175 	| predicate OR_P predicate		{ $$ = makeItemBinary(jpiOr, $1, $3); }
176 	| NOT_P delimited_predicate 	{ $$ = makeItemUnary(jpiNot, $2); }
177 	| '(' predicate ')' IS_P UNKNOWN_P
178 									{ $$ = makeItemUnary(jpiIsUnknown, $2); }
179 	| expr STARTS_P WITH_P starts_with_initial
180 									{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
181 	| expr LIKE_REGEX_P STRING_P 	{ $$ = makeItemLikeRegex($1, &$3, NULL); }
182 	| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
183 									{ $$ = makeItemLikeRegex($1, &$3, &$5); }
184 	;
185 
186 starts_with_initial:
187 	STRING_P						{ $$ = makeItemString(&$1); }
188 	| VARIABLE_P					{ $$ = makeItemVariable(&$1); }
189 	;
190 
191 path_primary:
192 	scalar_value					{ $$ = $1; }
193 	| '$'							{ $$ = makeItemType(jpiRoot); }
194 	| '@'							{ $$ = makeItemType(jpiCurrent); }
195 	| LAST_P						{ $$ = makeItemType(jpiLast); }
196 	;
197 
198 accessor_expr:
199 	path_primary					{ $$ = list_make1($1); }
200 	| '(' expr ')' accessor_op		{ $$ = list_make2($2, $4); }
201 	| '(' predicate ')' accessor_op	{ $$ = list_make2($2, $4); }
202 	| accessor_expr accessor_op		{ $$ = lappend($1, $2); }
203 	;
204 
205 expr:
206 	accessor_expr					{ $$ = makeItemList($1); }
207 	| '(' expr ')'					{ $$ = $2; }
208 	| '+' expr %prec UMINUS			{ $$ = makeItemUnary(jpiPlus, $2); }
209 	| '-' expr %prec UMINUS			{ $$ = makeItemUnary(jpiMinus, $2); }
210 	| expr '+' expr					{ $$ = makeItemBinary(jpiAdd, $1, $3); }
211 	| expr '-' expr					{ $$ = makeItemBinary(jpiSub, $1, $3); }
212 	| expr '*' expr					{ $$ = makeItemBinary(jpiMul, $1, $3); }
213 	| expr '/' expr					{ $$ = makeItemBinary(jpiDiv, $1, $3); }
214 	| expr '%' expr					{ $$ = makeItemBinary(jpiMod, $1, $3); }
215 	;
216 
217 index_elem:
218 	expr							{ $$ = makeItemBinary(jpiSubscript, $1, NULL); }
219 	| expr TO_P expr				{ $$ = makeItemBinary(jpiSubscript, $1, $3); }
220 	;
221 
222 index_list:
223 	index_elem						{ $$ = list_make1($1); }
224 	| index_list ',' index_elem		{ $$ = lappend($1, $3); }
225 	;
226 
227 array_accessor:
228 	'[' '*' ']'						{ $$ = makeItemType(jpiAnyArray); }
229 	| '[' index_list ']'			{ $$ = makeIndexArray($2); }
230 	;
231 
232 any_level:
233 	INT_P							{ $$ = pg_atoi($1.val, 4, 0); }
234 	| LAST_P						{ $$ = -1; }
235 	;
236 
237 any_path:
238 	ANY_P							{ $$ = makeAny(0, -1); }
239 	| ANY_P '{' any_level '}'		{ $$ = makeAny($3, $3); }
240 	| ANY_P '{' any_level TO_P any_level '}'
241 									{ $$ = makeAny($3, $5); }
242 	;
243 
244 accessor_op:
245 	'.' key							{ $$ = $2; }
246 	| '.' '*'						{ $$ = makeItemType(jpiAnyKey); }
247 	| array_accessor				{ $$ = $1; }
248 	| '.' any_path					{ $$ = $2; }
249 	| '.' method '(' ')'			{ $$ = makeItemType($2); }
250 	| '?' '(' predicate ')'			{ $$ = makeItemUnary(jpiFilter, $3); }
251 	;
252 
253 key:
254 	key_name						{ $$ = makeItemKey(&$1); }
255 	;
256 
257 key_name:
258 	IDENT_P
259 	| STRING_P
260 	| TO_P
261 	| NULL_P
262 	| TRUE_P
263 	| FALSE_P
264 	| IS_P
265 	| UNKNOWN_P
266 	| EXISTS_P
267 	| STRICT_P
268 	| LAX_P
269 	| ABS_P
270 	| SIZE_P
271 	| TYPE_P
272 	| FLOOR_P
273 	| DOUBLE_P
274 	| CEILING_P
275 	| KEYVALUE_P
276 	| LAST_P
277 	| STARTS_P
278 	| WITH_P
279 	| LIKE_REGEX_P
280 	| FLAG_P
281 	;
282 
283 method:
284 	ABS_P							{ $$ = jpiAbs; }
285 	| SIZE_P						{ $$ = jpiSize; }
286 	| TYPE_P						{ $$ = jpiType; }
287 	| FLOOR_P						{ $$ = jpiFloor; }
288 	| DOUBLE_P						{ $$ = jpiDouble; }
289 	| CEILING_P						{ $$ = jpiCeiling; }
290 	| KEYVALUE_P					{ $$ = jpiKeyValue; }
291 	;
292 %%
293 
294 /*
295  * The helper functions below allocate and fill JsonPathParseItem's of various
296  * types.
297  */
298 
299 static JsonPathParseItem *
300 makeItemType(JsonPathItemType type)
301 {
302 	JsonPathParseItem  *v = palloc(sizeof(*v));
303 
304 	CHECK_FOR_INTERRUPTS();
305 
306 	v->type = type;
307 	v->next = NULL;
308 
309 	return v;
310 }
311 
312 static JsonPathParseItem *
makeItemString(JsonPathString * s)313 makeItemString(JsonPathString *s)
314 {
315 	JsonPathParseItem  *v;
316 
317 	if (s == NULL)
318 	{
319 		v = makeItemType(jpiNull);
320 	}
321 	else
322 	{
323 		v = makeItemType(jpiString);
324 		v->value.string.val = s->val;
325 		v->value.string.len = s->len;
326 	}
327 
328 	return v;
329 }
330 
331 static JsonPathParseItem *
makeItemVariable(JsonPathString * s)332 makeItemVariable(JsonPathString *s)
333 {
334 	JsonPathParseItem  *v;
335 
336 	v = makeItemType(jpiVariable);
337 	v->value.string.val = s->val;
338 	v->value.string.len = s->len;
339 
340 	return v;
341 }
342 
343 static JsonPathParseItem *
makeItemKey(JsonPathString * s)344 makeItemKey(JsonPathString *s)
345 {
346 	JsonPathParseItem  *v;
347 
348 	v = makeItemString(s);
349 	v->type = jpiKey;
350 
351 	return v;
352 }
353 
354 static JsonPathParseItem *
makeItemNumeric(JsonPathString * s)355 makeItemNumeric(JsonPathString *s)
356 {
357 	JsonPathParseItem  *v;
358 
359 	v = makeItemType(jpiNumeric);
360 	v->value.numeric =
361 		DatumGetNumeric(DirectFunctionCall3(numeric_in,
362 											CStringGetDatum(s->val),
363 											ObjectIdGetDatum(InvalidOid),
364 											Int32GetDatum(-1)));
365 
366 	return v;
367 }
368 
369 static JsonPathParseItem *
makeItemBool(bool val)370 makeItemBool(bool val)
371 {
372 	JsonPathParseItem  *v = makeItemType(jpiBool);
373 
374 	v->value.boolean = val;
375 
376 	return v;
377 }
378 
379 static JsonPathParseItem *
makeItemBinary(JsonPathItemType type,JsonPathParseItem * la,JsonPathParseItem * ra)380 makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
381 {
382 	JsonPathParseItem  *v = makeItemType(type);
383 
384 	v->value.args.left = la;
385 	v->value.args.right = ra;
386 
387 	return v;
388 }
389 
390 static JsonPathParseItem *
makeItemUnary(JsonPathItemType type,JsonPathParseItem * a)391 makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
392 {
393 	JsonPathParseItem  *v;
394 
395 	if (type == jpiPlus && a->type == jpiNumeric && !a->next)
396 		return a;
397 
398 	if (type == jpiMinus && a->type == jpiNumeric && !a->next)
399 	{
400 		v = makeItemType(jpiNumeric);
401 		v->value.numeric =
402 			DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
403 												NumericGetDatum(a->value.numeric)));
404 		return v;
405 	}
406 
407 	v = makeItemType(type);
408 
409 	v->value.arg = a;
410 
411 	return v;
412 }
413 
414 static JsonPathParseItem *
makeItemList(List * list)415 makeItemList(List *list)
416 {
417 	JsonPathParseItem  *head,
418 					   *end;
419 	ListCell		   *cell = list_head(list);
420 
421 	head = end = (JsonPathParseItem *) lfirst(cell);
422 
423 	if (!lnext(cell))
424 		return head;
425 
426 	/* append items to the end of already existing list */
427 	while (end->next)
428 		end = end->next;
429 
430 	for_each_cell(cell, lnext(cell))
431 	{
432 		JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
433 
434 		end->next = c;
435 		end = c;
436 	}
437 
438 	return head;
439 }
440 
441 static JsonPathParseItem *
makeIndexArray(List * list)442 makeIndexArray(List *list)
443 {
444 	JsonPathParseItem  *v = makeItemType(jpiIndexArray);
445 	ListCell		   *cell;
446 	int					i = 0;
447 
448 	Assert(list_length(list) > 0);
449 	v->value.array.nelems = list_length(list);
450 
451 	v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
452 								  v->value.array.nelems);
453 
454 	foreach(cell, list)
455 	{
456 		JsonPathParseItem  *jpi = lfirst(cell);
457 
458 		Assert(jpi->type == jpiSubscript);
459 
460 		v->value.array.elems[i].from = jpi->value.args.left;
461 		v->value.array.elems[i++].to = jpi->value.args.right;
462 	}
463 
464 	return v;
465 }
466 
467 static JsonPathParseItem *
makeAny(int first,int last)468 makeAny(int first, int last)
469 {
470 	JsonPathParseItem  *v = makeItemType(jpiAny);
471 
472 	v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
473 	v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
474 
475 	return v;
476 }
477 
478 static JsonPathParseItem *
makeItemLikeRegex(JsonPathParseItem * expr,JsonPathString * pattern,JsonPathString * flags)479 makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
480 				  JsonPathString *flags)
481 {
482 	JsonPathParseItem  *v = makeItemType(jpiLikeRegex);
483 	int					i;
484 	int					cflags;
485 
486 	v->value.like_regex.expr = expr;
487 	v->value.like_regex.pattern = pattern->val;
488 	v->value.like_regex.patternlen = pattern->len;
489 
490 	/* Parse the flags string, convert to bitmask.  Duplicate flags are OK. */
491 	v->value.like_regex.flags = 0;
492 	for (i = 0; flags && i < flags->len; i++)
493 	{
494 		switch (flags->val[i])
495 		{
496 			case 'i':
497 				v->value.like_regex.flags |= JSP_REGEX_ICASE;
498 				break;
499 			case 's':
500 				v->value.like_regex.flags |= JSP_REGEX_DOTALL;
501 				break;
502 			case 'm':
503 				v->value.like_regex.flags |= JSP_REGEX_MLINE;
504 				break;
505 			case 'x':
506 				v->value.like_regex.flags |= JSP_REGEX_WSPACE;
507 				break;
508 			case 'q':
509 				v->value.like_regex.flags |= JSP_REGEX_QUOTE;
510 				break;
511 			default:
512 				ereport(ERROR,
513 						(errcode(ERRCODE_SYNTAX_ERROR),
514 						 errmsg("invalid input syntax for type %s", "jsonpath"),
515 						 errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
516 								   flags->val[i])));
517 				break;
518 		}
519 	}
520 
521 	/* Convert flags to what RE_compile_and_cache needs */
522 	cflags = jspConvertRegexFlags(v->value.like_regex.flags);
523 
524 	/* check regex validity */
525 	(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
526 														 pattern->len),
527 								cflags, DEFAULT_COLLATION_OID);
528 
529 	return v;
530 }
531 
532 /*
533  * Convert from XQuery regex flags to those recognized by our regex library.
534  */
535 int
jspConvertRegexFlags(uint32 xflags)536 jspConvertRegexFlags(uint32 xflags)
537 {
538 	/* By default, XQuery is very nearly the same as Spencer's AREs */
539 	int			cflags = REG_ADVANCED;
540 
541 	/* Ignore-case means the same thing, too, modulo locale issues */
542 	if (xflags & JSP_REGEX_ICASE)
543 		cflags |= REG_ICASE;
544 
545 	/* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
546 	if (xflags & JSP_REGEX_QUOTE)
547 	{
548 		cflags &= ~REG_ADVANCED;
549 		cflags |= REG_QUOTE;
550 	}
551 	else
552 	{
553 		/* Note that dotall mode is the default in POSIX */
554 		if (!(xflags & JSP_REGEX_DOTALL))
555 			cflags |= REG_NLSTOP;
556 		if (xflags & JSP_REGEX_MLINE)
557 			cflags |= REG_NLANCH;
558 
559 		/*
560 		 * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
561 		 * not really enough alike to justify treating JSP_REGEX_WSPACE as
562 		 * REG_EXPANDED.  For now we treat 'x' as unimplemented; perhaps in
563 		 * future we'll modify the regex library to have an option for
564 		 * XQuery-style ignore-whitespace mode.
565 		 */
566 		if (xflags & JSP_REGEX_WSPACE)
567 			ereport(ERROR,
568 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
569 					 errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
570 	}
571 
572 	return cflags;
573 }
574 
575 /*
576  * jsonpath_scan.l is compiled as part of jsonpath_gram.y.  Currently, this is
577  * unavoidable because jsonpath_gram does not create a .h file to export its
578  * token symbols.  If these files ever grow large enough to be worth compiling
579  * separately, that could be fixed; but for now it seems like useless
580  * complication.
581  */
582 
583 #include "jsonpath_scan.c"
584