1 %{
2 /*-------------------------------------------------------------------------
3 *
4 * jsonpath_gram.y
5 * Grammar definitions for jsonpath datatype
6 *
7 * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
8 *
9 * Copyright (c) 2019-2020, PostgreSQL Global Development Group
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/jsonpath_gram.y
13 *
14 *-------------------------------------------------------------------------
15 */
16
17 #include "postgres.h"
18
19 #include "catalog/pg_collation.h"
20 #include "fmgr.h"
21 #include "miscadmin.h"
22 #include "nodes/pg_list.h"
23 #include "regex/regex.h"
24 #include "utils/builtins.h"
25 #include "utils/jsonpath.h"
26
27 /* struct JsonPathString is shared between scan and gram */
28 typedef struct JsonPathString
29 {
30 char *val;
31 int len;
32 int total;
33 } JsonPathString;
34
35 union YYSTYPE;
36
37 /* flex 2.5.4 doesn't bother with a decl for this */
38 int jsonpath_yylex(union YYSTYPE *yylval_param);
39 int jsonpath_yyparse(JsonPathParseResult **result);
40 void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
41
42 static JsonPathParseItem *makeItemType(JsonPathItemType type);
43 static JsonPathParseItem *makeItemString(JsonPathString *s);
44 static JsonPathParseItem *makeItemVariable(JsonPathString *s);
45 static JsonPathParseItem *makeItemKey(JsonPathString *s);
46 static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
47 static JsonPathParseItem *makeItemBool(bool val);
48 static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
49 JsonPathParseItem *la,
50 JsonPathParseItem *ra);
51 static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
52 JsonPathParseItem *a);
53 static JsonPathParseItem *makeItemList(List *list);
54 static JsonPathParseItem *makeIndexArray(List *list);
55 static JsonPathParseItem *makeAny(int first, int last);
56 static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
57 JsonPathString *pattern,
58 JsonPathString *flags);
59
60 /*
61 * Bison doesn't allocate anything that needs to live across parser calls,
62 * so we can easily have it use palloc instead of malloc. This prevents
63 * memory leaks if we error out during parsing. Note this only works with
64 * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
65 * if possible, so there's not really much problem anyhow, at least if
66 * you're building with gcc.
67 */
68 #define YYMALLOC palloc
69 #define YYFREE pfree
70
71 %}
72
73 /* BISON Declarations */
74 %pure-parser
75 %expect 0
76 %name-prefix="jsonpath_yy"
77 %error-verbose
78 %parse-param {JsonPathParseResult **result}
79
80 %union {
81 JsonPathString str;
82 List *elems; /* list of JsonPathParseItem */
83 List *indexs; /* list of integers */
84 JsonPathParseItem *value;
85 JsonPathParseResult *result;
86 JsonPathItemType optype;
87 bool boolean;
88 int integer;
89 }
90
91 %token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
92 %token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
93 %token <str> OR_P AND_P NOT_P
94 %token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
95 %token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
96 %token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
97 %token <str> DATETIME_P
98
99 %type <result> result
100
101 %type <value> scalar_value path_primary expr array_accessor
102 any_path accessor_op key predicate delimited_predicate
103 index_elem starts_with_initial expr_or_predicate
104 datetime_template opt_datetime_template
105
106 %type <elems> accessor_expr
107
108 %type <indexs> index_list
109
110 %type <optype> comp_op method
111
112 %type <boolean> mode
113
114 %type <str> key_name
115
116 %type <integer> any_level
117
118 %left OR_P
119 %left AND_P
120 %right NOT_P
121 %left '+' '-'
122 %left '*' '/' '%'
123 %left UMINUS
124 %nonassoc '(' ')'
125
126 /* Grammar follows */
127 %%
128
129 result:
130 mode expr_or_predicate {
131 *result = palloc(sizeof(JsonPathParseResult));
132 (*result)->expr = $2;
133 (*result)->lax = $1;
134 }
135 | /* EMPTY */ { *result = NULL; }
136 ;
137
138 expr_or_predicate:
139 expr { $$ = $1; }
140 | predicate { $$ = $1; }
141 ;
142
143 mode:
144 STRICT_P { $$ = false; }
145 | LAX_P { $$ = true; }
146 | /* EMPTY */ { $$ = true; }
147 ;
148
149 scalar_value:
150 STRING_P { $$ = makeItemString(&$1); }
151 | NULL_P { $$ = makeItemString(NULL); }
152 | TRUE_P { $$ = makeItemBool(true); }
153 | FALSE_P { $$ = makeItemBool(false); }
154 | NUMERIC_P { $$ = makeItemNumeric(&$1); }
155 | INT_P { $$ = makeItemNumeric(&$1); }
156 | VARIABLE_P { $$ = makeItemVariable(&$1); }
157 ;
158
159 comp_op:
160 EQUAL_P { $$ = jpiEqual; }
161 | NOTEQUAL_P { $$ = jpiNotEqual; }
162 | LESS_P { $$ = jpiLess; }
163 | GREATER_P { $$ = jpiGreater; }
164 | LESSEQUAL_P { $$ = jpiLessOrEqual; }
165 | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
166 ;
167
168 delimited_predicate:
169 '(' predicate ')' { $$ = $2; }
170 | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
171 ;
172
173 predicate:
174 delimited_predicate { $$ = $1; }
175 | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
176 | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
177 | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
178 | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
179 | '(' predicate ')' IS_P UNKNOWN_P
180 { $$ = makeItemUnary(jpiIsUnknown, $2); }
181 | expr STARTS_P WITH_P starts_with_initial
182 { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
183 | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
184 | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
185 { $$ = makeItemLikeRegex($1, &$3, &$5); }
186 ;
187
188 starts_with_initial:
189 STRING_P { $$ = makeItemString(&$1); }
190 | VARIABLE_P { $$ = makeItemVariable(&$1); }
191 ;
192
193 path_primary:
194 scalar_value { $$ = $1; }
195 | '$' { $$ = makeItemType(jpiRoot); }
196 | '@' { $$ = makeItemType(jpiCurrent); }
197 | LAST_P { $$ = makeItemType(jpiLast); }
198 ;
199
200 accessor_expr:
201 path_primary { $$ = list_make1($1); }
202 | '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
203 | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
204 | accessor_expr accessor_op { $$ = lappend($1, $2); }
205 ;
206
207 expr:
208 accessor_expr { $$ = makeItemList($1); }
209 | '(' expr ')' { $$ = $2; }
210 | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
211 | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
212 | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
213 | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
214 | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
215 | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
216 | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
217 ;
218
219 index_elem:
220 expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
221 | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
222 ;
223
224 index_list:
225 index_elem { $$ = list_make1($1); }
226 | index_list ',' index_elem { $$ = lappend($1, $3); }
227 ;
228
229 array_accessor:
230 '[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
231 | '[' index_list ']' { $$ = makeIndexArray($2); }
232 ;
233
234 any_level:
235 INT_P { $$ = pg_atoi($1.val, 4, 0); }
236 | LAST_P { $$ = -1; }
237 ;
238
239 any_path:
240 ANY_P { $$ = makeAny(0, -1); }
241 | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
242 | ANY_P '{' any_level TO_P any_level '}'
243 { $$ = makeAny($3, $5); }
244 ;
245
246 accessor_op:
247 '.' key { $$ = $2; }
248 | '.' '*' { $$ = makeItemType(jpiAnyKey); }
249 | array_accessor { $$ = $1; }
250 | '.' any_path { $$ = $2; }
251 | '.' method '(' ')' { $$ = makeItemType($2); }
252 | '.' DATETIME_P '(' opt_datetime_template ')'
253 { $$ = makeItemUnary(jpiDatetime, $4); }
254 | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
255 ;
256
257 datetime_template:
258 STRING_P { $$ = makeItemString(&$1); }
259 ;
260
261 opt_datetime_template:
262 datetime_template { $$ = $1; }
263 | /* EMPTY */ { $$ = NULL; }
264 ;
265
266 key:
267 key_name { $$ = makeItemKey(&$1); }
268 ;
269
270 key_name:
271 IDENT_P
272 | STRING_P
273 | TO_P
274 | NULL_P
275 | TRUE_P
276 | FALSE_P
277 | IS_P
278 | UNKNOWN_P
279 | EXISTS_P
280 | STRICT_P
281 | LAX_P
282 | ABS_P
283 | SIZE_P
284 | TYPE_P
285 | FLOOR_P
286 | DOUBLE_P
287 | CEILING_P
288 | DATETIME_P
289 | KEYVALUE_P
290 | LAST_P
291 | STARTS_P
292 | WITH_P
293 | LIKE_REGEX_P
294 | FLAG_P
295 ;
296
297 method:
298 ABS_P { $$ = jpiAbs; }
299 | SIZE_P { $$ = jpiSize; }
300 | TYPE_P { $$ = jpiType; }
301 | FLOOR_P { $$ = jpiFloor; }
302 | DOUBLE_P { $$ = jpiDouble; }
303 | CEILING_P { $$ = jpiCeiling; }
304 | KEYVALUE_P { $$ = jpiKeyValue; }
305 ;
306 %%
307
308 /*
309 * The helper functions below allocate and fill JsonPathParseItem's of various
310 * types.
311 */
312
313 static JsonPathParseItem *
314 makeItemType(JsonPathItemType type)
315 {
316 JsonPathParseItem *v = palloc(sizeof(*v));
317
318 CHECK_FOR_INTERRUPTS();
319
320 v->type = type;
321 v->next = NULL;
322
323 return v;
324 }
325
326 static JsonPathParseItem *
makeItemString(JsonPathString * s)327 makeItemString(JsonPathString *s)
328 {
329 JsonPathParseItem *v;
330
331 if (s == NULL)
332 {
333 v = makeItemType(jpiNull);
334 }
335 else
336 {
337 v = makeItemType(jpiString);
338 v->value.string.val = s->val;
339 v->value.string.len = s->len;
340 }
341
342 return v;
343 }
344
345 static JsonPathParseItem *
makeItemVariable(JsonPathString * s)346 makeItemVariable(JsonPathString *s)
347 {
348 JsonPathParseItem *v;
349
350 v = makeItemType(jpiVariable);
351 v->value.string.val = s->val;
352 v->value.string.len = s->len;
353
354 return v;
355 }
356
357 static JsonPathParseItem *
makeItemKey(JsonPathString * s)358 makeItemKey(JsonPathString *s)
359 {
360 JsonPathParseItem *v;
361
362 v = makeItemString(s);
363 v->type = jpiKey;
364
365 return v;
366 }
367
368 static JsonPathParseItem *
makeItemNumeric(JsonPathString * s)369 makeItemNumeric(JsonPathString *s)
370 {
371 JsonPathParseItem *v;
372
373 v = makeItemType(jpiNumeric);
374 v->value.numeric =
375 DatumGetNumeric(DirectFunctionCall3(numeric_in,
376 CStringGetDatum(s->val),
377 ObjectIdGetDatum(InvalidOid),
378 Int32GetDatum(-1)));
379
380 return v;
381 }
382
383 static JsonPathParseItem *
makeItemBool(bool val)384 makeItemBool(bool val)
385 {
386 JsonPathParseItem *v = makeItemType(jpiBool);
387
388 v->value.boolean = val;
389
390 return v;
391 }
392
393 static JsonPathParseItem *
makeItemBinary(JsonPathItemType type,JsonPathParseItem * la,JsonPathParseItem * ra)394 makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
395 {
396 JsonPathParseItem *v = makeItemType(type);
397
398 v->value.args.left = la;
399 v->value.args.right = ra;
400
401 return v;
402 }
403
404 static JsonPathParseItem *
makeItemUnary(JsonPathItemType type,JsonPathParseItem * a)405 makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
406 {
407 JsonPathParseItem *v;
408
409 if (type == jpiPlus && a->type == jpiNumeric && !a->next)
410 return a;
411
412 if (type == jpiMinus && a->type == jpiNumeric && !a->next)
413 {
414 v = makeItemType(jpiNumeric);
415 v->value.numeric =
416 DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
417 NumericGetDatum(a->value.numeric)));
418 return v;
419 }
420
421 v = makeItemType(type);
422
423 v->value.arg = a;
424
425 return v;
426 }
427
428 static JsonPathParseItem *
makeItemList(List * list)429 makeItemList(List *list)
430 {
431 JsonPathParseItem *head,
432 *end;
433 ListCell *cell;
434
435 head = end = (JsonPathParseItem *) linitial(list);
436
437 if (list_length(list) == 1)
438 return head;
439
440 /* append items to the end of already existing list */
441 while (end->next)
442 end = end->next;
443
444 for_each_from(cell, list, 1)
445 {
446 JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
447
448 end->next = c;
449 end = c;
450 }
451
452 return head;
453 }
454
455 static JsonPathParseItem *
makeIndexArray(List * list)456 makeIndexArray(List *list)
457 {
458 JsonPathParseItem *v = makeItemType(jpiIndexArray);
459 ListCell *cell;
460 int i = 0;
461
462 Assert(list_length(list) > 0);
463 v->value.array.nelems = list_length(list);
464
465 v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
466 v->value.array.nelems);
467
468 foreach(cell, list)
469 {
470 JsonPathParseItem *jpi = lfirst(cell);
471
472 Assert(jpi->type == jpiSubscript);
473
474 v->value.array.elems[i].from = jpi->value.args.left;
475 v->value.array.elems[i++].to = jpi->value.args.right;
476 }
477
478 return v;
479 }
480
481 static JsonPathParseItem *
makeAny(int first,int last)482 makeAny(int first, int last)
483 {
484 JsonPathParseItem *v = makeItemType(jpiAny);
485
486 v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
487 v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
488
489 return v;
490 }
491
492 static JsonPathParseItem *
makeItemLikeRegex(JsonPathParseItem * expr,JsonPathString * pattern,JsonPathString * flags)493 makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
494 JsonPathString *flags)
495 {
496 JsonPathParseItem *v = makeItemType(jpiLikeRegex);
497 int i;
498 int cflags;
499
500 v->value.like_regex.expr = expr;
501 v->value.like_regex.pattern = pattern->val;
502 v->value.like_regex.patternlen = pattern->len;
503
504 /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
505 v->value.like_regex.flags = 0;
506 for (i = 0; flags && i < flags->len; i++)
507 {
508 switch (flags->val[i])
509 {
510 case 'i':
511 v->value.like_regex.flags |= JSP_REGEX_ICASE;
512 break;
513 case 's':
514 v->value.like_regex.flags |= JSP_REGEX_DOTALL;
515 break;
516 case 'm':
517 v->value.like_regex.flags |= JSP_REGEX_MLINE;
518 break;
519 case 'x':
520 v->value.like_regex.flags |= JSP_REGEX_WSPACE;
521 break;
522 case 'q':
523 v->value.like_regex.flags |= JSP_REGEX_QUOTE;
524 break;
525 default:
526 ereport(ERROR,
527 (errcode(ERRCODE_SYNTAX_ERROR),
528 errmsg("invalid input syntax for type %s", "jsonpath"),
529 errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
530 flags->val[i])));
531 break;
532 }
533 }
534
535 /* Convert flags to what RE_compile_and_cache needs */
536 cflags = jspConvertRegexFlags(v->value.like_regex.flags);
537
538 /* check regex validity */
539 (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
540 pattern->len),
541 cflags, DEFAULT_COLLATION_OID);
542
543 return v;
544 }
545
546 /*
547 * Convert from XQuery regex flags to those recognized by our regex library.
548 */
549 int
jspConvertRegexFlags(uint32 xflags)550 jspConvertRegexFlags(uint32 xflags)
551 {
552 /* By default, XQuery is very nearly the same as Spencer's AREs */
553 int cflags = REG_ADVANCED;
554
555 /* Ignore-case means the same thing, too, modulo locale issues */
556 if (xflags & JSP_REGEX_ICASE)
557 cflags |= REG_ICASE;
558
559 /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
560 if (xflags & JSP_REGEX_QUOTE)
561 {
562 cflags &= ~REG_ADVANCED;
563 cflags |= REG_QUOTE;
564 }
565 else
566 {
567 /* Note that dotall mode is the default in POSIX */
568 if (!(xflags & JSP_REGEX_DOTALL))
569 cflags |= REG_NLSTOP;
570 if (xflags & JSP_REGEX_MLINE)
571 cflags |= REG_NLANCH;
572
573 /*
574 * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
575 * not really enough alike to justify treating JSP_REGEX_WSPACE as
576 * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
577 * future we'll modify the regex library to have an option for
578 * XQuery-style ignore-whitespace mode.
579 */
580 if (xflags & JSP_REGEX_WSPACE)
581 ereport(ERROR,
582 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
583 errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
584 }
585
586 return cflags;
587 }
588
589 /*
590 * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
591 * unavoidable because jsonpath_gram does not create a .h file to export its
592 * token symbols. If these files ever grow large enough to be worth compiling
593 * separately, that could be fixed; but for now it seems like useless
594 * complication.
595 */
596
597 #include "jsonpath_scan.c"
598