1 %{
2 /*-------------------------------------------------------------------------
3 *
4 * jsonpath_gram.y
5 * Grammar definitions for jsonpath datatype
6 *
7 * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
8 *
9 * Copyright (c) 2019, PostgreSQL Global Development Group
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/jsonpath_gram.y
13 *
14 *-------------------------------------------------------------------------
15 */
16
17 #include "postgres.h"
18
19 #include "catalog/pg_collation.h"
20 #include "fmgr.h"
21 #include "miscadmin.h"
22 #include "nodes/pg_list.h"
23 #include "regex/regex.h"
24 #include "utils/builtins.h"
25 #include "utils/jsonpath.h"
26
27 /* struct JsonPathString is shared between scan and gram */
28 typedef struct JsonPathString
29 {
30 char *val;
31 int len;
32 int total;
33 } JsonPathString;
34
35 union YYSTYPE;
36
37 /* flex 2.5.4 doesn't bother with a decl for this */
38 int jsonpath_yylex(union YYSTYPE *yylval_param);
39 int jsonpath_yyparse(JsonPathParseResult **result);
40 void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
41
42 static JsonPathParseItem *makeItemType(JsonPathItemType type);
43 static JsonPathParseItem *makeItemString(JsonPathString *s);
44 static JsonPathParseItem *makeItemVariable(JsonPathString *s);
45 static JsonPathParseItem *makeItemKey(JsonPathString *s);
46 static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
47 static JsonPathParseItem *makeItemBool(bool val);
48 static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
49 JsonPathParseItem *la,
50 JsonPathParseItem *ra);
51 static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
52 JsonPathParseItem *a);
53 static JsonPathParseItem *makeItemList(List *list);
54 static JsonPathParseItem *makeIndexArray(List *list);
55 static JsonPathParseItem *makeAny(int first, int last);
56 static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
57 JsonPathString *pattern,
58 JsonPathString *flags);
59
60 /*
61 * Bison doesn't allocate anything that needs to live across parser calls,
62 * so we can easily have it use palloc instead of malloc. This prevents
63 * memory leaks if we error out during parsing. Note this only works with
64 * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
65 * if possible, so there's not really much problem anyhow, at least if
66 * you're building with gcc.
67 */
68 #define YYMALLOC palloc
69 #define YYFREE pfree
70
71 %}
72
73 /* BISON Declarations */
74 %pure-parser
75 %expect 0
76 %name-prefix="jsonpath_yy"
77 %error-verbose
78 %parse-param {JsonPathParseResult **result}
79
80 %union {
81 JsonPathString str;
82 List *elems; /* list of JsonPathParseItem */
83 List *indexs; /* list of integers */
84 JsonPathParseItem *value;
85 JsonPathParseResult *result;
86 JsonPathItemType optype;
87 bool boolean;
88 int integer;
89 }
90
91 %token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
92 %token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
93 %token <str> OR_P AND_P NOT_P
94 %token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
95 %token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
96 %token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
97
98 %type <result> result
99
100 %type <value> scalar_value path_primary expr array_accessor
101 any_path accessor_op key predicate delimited_predicate
102 index_elem starts_with_initial expr_or_predicate
103
104 %type <elems> accessor_expr
105
106 %type <indexs> index_list
107
108 %type <optype> comp_op method
109
110 %type <boolean> mode
111
112 %type <str> key_name
113
114 %type <integer> any_level
115
116 %left OR_P
117 %left AND_P
118 %right NOT_P
119 %left '+' '-'
120 %left '*' '/' '%'
121 %left UMINUS
122 %nonassoc '(' ')'
123
124 /* Grammar follows */
125 %%
126
127 result:
128 mode expr_or_predicate {
129 *result = palloc(sizeof(JsonPathParseResult));
130 (*result)->expr = $2;
131 (*result)->lax = $1;
132 }
133 | /* EMPTY */ { *result = NULL; }
134 ;
135
136 expr_or_predicate:
137 expr { $$ = $1; }
138 | predicate { $$ = $1; }
139 ;
140
141 mode:
142 STRICT_P { $$ = false; }
143 | LAX_P { $$ = true; }
144 | /* EMPTY */ { $$ = true; }
145 ;
146
147 scalar_value:
148 STRING_P { $$ = makeItemString(&$1); }
149 | NULL_P { $$ = makeItemString(NULL); }
150 | TRUE_P { $$ = makeItemBool(true); }
151 | FALSE_P { $$ = makeItemBool(false); }
152 | NUMERIC_P { $$ = makeItemNumeric(&$1); }
153 | INT_P { $$ = makeItemNumeric(&$1); }
154 | VARIABLE_P { $$ = makeItemVariable(&$1); }
155 ;
156
157 comp_op:
158 EQUAL_P { $$ = jpiEqual; }
159 | NOTEQUAL_P { $$ = jpiNotEqual; }
160 | LESS_P { $$ = jpiLess; }
161 | GREATER_P { $$ = jpiGreater; }
162 | LESSEQUAL_P { $$ = jpiLessOrEqual; }
163 | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
164 ;
165
166 delimited_predicate:
167 '(' predicate ')' { $$ = $2; }
168 | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
169 ;
170
171 predicate:
172 delimited_predicate { $$ = $1; }
173 | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
174 | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
175 | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
176 | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
177 | '(' predicate ')' IS_P UNKNOWN_P
178 { $$ = makeItemUnary(jpiIsUnknown, $2); }
179 | expr STARTS_P WITH_P starts_with_initial
180 { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
181 | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
182 | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
183 { $$ = makeItemLikeRegex($1, &$3, &$5); }
184 ;
185
186 starts_with_initial:
187 STRING_P { $$ = makeItemString(&$1); }
188 | VARIABLE_P { $$ = makeItemVariable(&$1); }
189 ;
190
191 path_primary:
192 scalar_value { $$ = $1; }
193 | '$' { $$ = makeItemType(jpiRoot); }
194 | '@' { $$ = makeItemType(jpiCurrent); }
195 | LAST_P { $$ = makeItemType(jpiLast); }
196 ;
197
198 accessor_expr:
199 path_primary { $$ = list_make1($1); }
200 | '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
201 | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
202 | accessor_expr accessor_op { $$ = lappend($1, $2); }
203 ;
204
205 expr:
206 accessor_expr { $$ = makeItemList($1); }
207 | '(' expr ')' { $$ = $2; }
208 | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
209 | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
210 | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
211 | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
212 | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
213 | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
214 | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
215 ;
216
217 index_elem:
218 expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
219 | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
220 ;
221
222 index_list:
223 index_elem { $$ = list_make1($1); }
224 | index_list ',' index_elem { $$ = lappend($1, $3); }
225 ;
226
227 array_accessor:
228 '[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
229 | '[' index_list ']' { $$ = makeIndexArray($2); }
230 ;
231
232 any_level:
233 INT_P { $$ = pg_atoi($1.val, 4, 0); }
234 | LAST_P { $$ = -1; }
235 ;
236
237 any_path:
238 ANY_P { $$ = makeAny(0, -1); }
239 | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
240 | ANY_P '{' any_level TO_P any_level '}'
241 { $$ = makeAny($3, $5); }
242 ;
243
244 accessor_op:
245 '.' key { $$ = $2; }
246 | '.' '*' { $$ = makeItemType(jpiAnyKey); }
247 | array_accessor { $$ = $1; }
248 | '.' any_path { $$ = $2; }
249 | '.' method '(' ')' { $$ = makeItemType($2); }
250 | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
251 ;
252
253 key:
254 key_name { $$ = makeItemKey(&$1); }
255 ;
256
257 key_name:
258 IDENT_P
259 | STRING_P
260 | TO_P
261 | NULL_P
262 | TRUE_P
263 | FALSE_P
264 | IS_P
265 | UNKNOWN_P
266 | EXISTS_P
267 | STRICT_P
268 | LAX_P
269 | ABS_P
270 | SIZE_P
271 | TYPE_P
272 | FLOOR_P
273 | DOUBLE_P
274 | CEILING_P
275 | KEYVALUE_P
276 | LAST_P
277 | STARTS_P
278 | WITH_P
279 | LIKE_REGEX_P
280 | FLAG_P
281 ;
282
283 method:
284 ABS_P { $$ = jpiAbs; }
285 | SIZE_P { $$ = jpiSize; }
286 | TYPE_P { $$ = jpiType; }
287 | FLOOR_P { $$ = jpiFloor; }
288 | DOUBLE_P { $$ = jpiDouble; }
289 | CEILING_P { $$ = jpiCeiling; }
290 | KEYVALUE_P { $$ = jpiKeyValue; }
291 ;
292 %%
293
294 /*
295 * The helper functions below allocate and fill JsonPathParseItem's of various
296 * types.
297 */
298
299 static JsonPathParseItem *
300 makeItemType(JsonPathItemType type)
301 {
302 JsonPathParseItem *v = palloc(sizeof(*v));
303
304 CHECK_FOR_INTERRUPTS();
305
306 v->type = type;
307 v->next = NULL;
308
309 return v;
310 }
311
312 static JsonPathParseItem *
makeItemString(JsonPathString * s)313 makeItemString(JsonPathString *s)
314 {
315 JsonPathParseItem *v;
316
317 if (s == NULL)
318 {
319 v = makeItemType(jpiNull);
320 }
321 else
322 {
323 v = makeItemType(jpiString);
324 v->value.string.val = s->val;
325 v->value.string.len = s->len;
326 }
327
328 return v;
329 }
330
331 static JsonPathParseItem *
makeItemVariable(JsonPathString * s)332 makeItemVariable(JsonPathString *s)
333 {
334 JsonPathParseItem *v;
335
336 v = makeItemType(jpiVariable);
337 v->value.string.val = s->val;
338 v->value.string.len = s->len;
339
340 return v;
341 }
342
343 static JsonPathParseItem *
makeItemKey(JsonPathString * s)344 makeItemKey(JsonPathString *s)
345 {
346 JsonPathParseItem *v;
347
348 v = makeItemString(s);
349 v->type = jpiKey;
350
351 return v;
352 }
353
354 static JsonPathParseItem *
makeItemNumeric(JsonPathString * s)355 makeItemNumeric(JsonPathString *s)
356 {
357 JsonPathParseItem *v;
358
359 v = makeItemType(jpiNumeric);
360 v->value.numeric =
361 DatumGetNumeric(DirectFunctionCall3(numeric_in,
362 CStringGetDatum(s->val),
363 ObjectIdGetDatum(InvalidOid),
364 Int32GetDatum(-1)));
365
366 return v;
367 }
368
369 static JsonPathParseItem *
makeItemBool(bool val)370 makeItemBool(bool val)
371 {
372 JsonPathParseItem *v = makeItemType(jpiBool);
373
374 v->value.boolean = val;
375
376 return v;
377 }
378
379 static JsonPathParseItem *
makeItemBinary(JsonPathItemType type,JsonPathParseItem * la,JsonPathParseItem * ra)380 makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
381 {
382 JsonPathParseItem *v = makeItemType(type);
383
384 v->value.args.left = la;
385 v->value.args.right = ra;
386
387 return v;
388 }
389
390 static JsonPathParseItem *
makeItemUnary(JsonPathItemType type,JsonPathParseItem * a)391 makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
392 {
393 JsonPathParseItem *v;
394
395 if (type == jpiPlus && a->type == jpiNumeric && !a->next)
396 return a;
397
398 if (type == jpiMinus && a->type == jpiNumeric && !a->next)
399 {
400 v = makeItemType(jpiNumeric);
401 v->value.numeric =
402 DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
403 NumericGetDatum(a->value.numeric)));
404 return v;
405 }
406
407 v = makeItemType(type);
408
409 v->value.arg = a;
410
411 return v;
412 }
413
414 static JsonPathParseItem *
makeItemList(List * list)415 makeItemList(List *list)
416 {
417 JsonPathParseItem *head,
418 *end;
419 ListCell *cell = list_head(list);
420
421 head = end = (JsonPathParseItem *) lfirst(cell);
422
423 if (!lnext(cell))
424 return head;
425
426 /* append items to the end of already existing list */
427 while (end->next)
428 end = end->next;
429
430 for_each_cell(cell, lnext(cell))
431 {
432 JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
433
434 end->next = c;
435 end = c;
436 }
437
438 return head;
439 }
440
441 static JsonPathParseItem *
makeIndexArray(List * list)442 makeIndexArray(List *list)
443 {
444 JsonPathParseItem *v = makeItemType(jpiIndexArray);
445 ListCell *cell;
446 int i = 0;
447
448 Assert(list_length(list) > 0);
449 v->value.array.nelems = list_length(list);
450
451 v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
452 v->value.array.nelems);
453
454 foreach(cell, list)
455 {
456 JsonPathParseItem *jpi = lfirst(cell);
457
458 Assert(jpi->type == jpiSubscript);
459
460 v->value.array.elems[i].from = jpi->value.args.left;
461 v->value.array.elems[i++].to = jpi->value.args.right;
462 }
463
464 return v;
465 }
466
467 static JsonPathParseItem *
makeAny(int first,int last)468 makeAny(int first, int last)
469 {
470 JsonPathParseItem *v = makeItemType(jpiAny);
471
472 v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
473 v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
474
475 return v;
476 }
477
478 static JsonPathParseItem *
makeItemLikeRegex(JsonPathParseItem * expr,JsonPathString * pattern,JsonPathString * flags)479 makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
480 JsonPathString *flags)
481 {
482 JsonPathParseItem *v = makeItemType(jpiLikeRegex);
483 int i;
484 int cflags;
485
486 v->value.like_regex.expr = expr;
487 v->value.like_regex.pattern = pattern->val;
488 v->value.like_regex.patternlen = pattern->len;
489
490 /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
491 v->value.like_regex.flags = 0;
492 for (i = 0; flags && i < flags->len; i++)
493 {
494 switch (flags->val[i])
495 {
496 case 'i':
497 v->value.like_regex.flags |= JSP_REGEX_ICASE;
498 break;
499 case 's':
500 v->value.like_regex.flags |= JSP_REGEX_DOTALL;
501 break;
502 case 'm':
503 v->value.like_regex.flags |= JSP_REGEX_MLINE;
504 break;
505 case 'x':
506 v->value.like_regex.flags |= JSP_REGEX_WSPACE;
507 break;
508 case 'q':
509 v->value.like_regex.flags |= JSP_REGEX_QUOTE;
510 break;
511 default:
512 ereport(ERROR,
513 (errcode(ERRCODE_SYNTAX_ERROR),
514 errmsg("invalid input syntax for type %s", "jsonpath"),
515 errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
516 flags->val[i])));
517 break;
518 }
519 }
520
521 /* Convert flags to what RE_compile_and_cache needs */
522 cflags = jspConvertRegexFlags(v->value.like_regex.flags);
523
524 /* check regex validity */
525 (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
526 pattern->len),
527 cflags, DEFAULT_COLLATION_OID);
528
529 return v;
530 }
531
532 /*
533 * Convert from XQuery regex flags to those recognized by our regex library.
534 */
535 int
jspConvertRegexFlags(uint32 xflags)536 jspConvertRegexFlags(uint32 xflags)
537 {
538 /* By default, XQuery is very nearly the same as Spencer's AREs */
539 int cflags = REG_ADVANCED;
540
541 /* Ignore-case means the same thing, too, modulo locale issues */
542 if (xflags & JSP_REGEX_ICASE)
543 cflags |= REG_ICASE;
544
545 /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
546 if (xflags & JSP_REGEX_QUOTE)
547 {
548 cflags &= ~REG_ADVANCED;
549 cflags |= REG_QUOTE;
550 }
551 else
552 {
553 /* Note that dotall mode is the default in POSIX */
554 if (!(xflags & JSP_REGEX_DOTALL))
555 cflags |= REG_NLSTOP;
556 if (xflags & JSP_REGEX_MLINE)
557 cflags |= REG_NLANCH;
558
559 /*
560 * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
561 * not really enough alike to justify treating JSP_REGEX_WSPACE as
562 * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
563 * future we'll modify the regex library to have an option for
564 * XQuery-style ignore-whitespace mode.
565 */
566 if (xflags & JSP_REGEX_WSPACE)
567 ereport(ERROR,
568 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
569 errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
570 }
571
572 return cflags;
573 }
574
575 /*
576 * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
577 * unavoidable because jsonpath_gram does not create a .h file to export its
578 * token symbols. If these files ever grow large enough to be worth compiling
579 * separately, that could be fixed; but for now it seems like useless
580 * complication.
581 */
582
583 #include "jsonpath_scan.c"
584