1 %left LOWEST.
2 %left TILDE.
3 %left TAGLIST.
4 %left QUOTE.
5 %left COLON.
6 %left MINUS.
7 %left NUMBER.
8 %left STOPWORD.
9
10 %left TERMLIST.
11 %left TERM.
12 %left PREFIX.
13 %left PERCENT.
14 %left ATTRIBUTE.
15 %right LP.
16 %left RP.
17 // needs to be above lp/rp
18 %left MODIFIER.
19 %left AND.
20 %left OR.
21 %left ORX.
22 %left ARROW.
23
24 %token_type {QueryToken}
25
26 %name RSQueryParser_
27
28 %syntax_error {
29 QueryError_SetErrorFmt(ctx->status, QUERY_ESYNTAX,
30 "Syntax error at offset %d near %.*s",
31 TOKEN.pos, TOKEN.len, TOKEN.s);
32 }
33
34 %include {
35
36 #include <stdlib.h>
37 #include <string.h>
38 #include <strings.h>
39 #include <assert.h>
40 #include "parse.h"
41 #include "../util/arr.h"
42 #include "../rmutil/vector.h"
43 #include "../query_node.h"
44
45 // strndup + lowercase in one pass!
strdupcase(const char * s,size_t len)46 char *strdupcase(const char *s, size_t len) {
47 char *ret = rm_strndup(s, len);
48 char *dst = ret;
49 char *src = dst;
50 while (*src) {
51 // unescape
52 if (*src == '\\' && (ispunct(*(src+1)) || isspace(*(src+1)))) {
53 ++src;
54 continue;
55 }
56 *dst = tolower(*src);
57 ++dst;
58 ++src;
59
60 }
61 *dst = '\0';
62
63 return ret;
64 }
65
66 // unescape a string (non null terminated) and return the new length (may be shorter than the original. This manipulates the string itself
unescapen(char * s,size_t sz)67 size_t unescapen(char *s, size_t sz) {
68
69 char *dst = s;
70 char *src = dst;
71 char *end = s + sz;
72 while (src < end) {
73 // unescape
74 if (*src == '\\' && src + 1 < end &&
75 (ispunct(*(src+1)) || isspace(*(src+1)))) {
76 ++src;
77 continue;
78 }
79 *dst++ = *src++;
80 }
81
82 return (size_t)(dst - s);
83 }
84
85 #define NODENN_BOTH_VALID 0
86 #define NODENN_BOTH_INVALID -1
87 #define NODENN_ONE_NULL 1
88 // Returns:
89 // 0 if a && b
90 // -1 if !a && !b
91 // 1 if a ^ b (i.e. !(a&&b||!a||!b)). The result is stored in `out`
one_not_null(void * a,void * b,void * out)92 static int one_not_null(void *a, void *b, void *out) {
93 if (a && b) {
94 return NODENN_BOTH_VALID;
95 } else if (a == NULL && b == NULL) {
96 return NODENN_BOTH_INVALID;
97 } if (a) {
98 *(void **)out = a;
99 return NODENN_ONE_NULL;
100 } else {
101 *(void **)out = b;
102 return NODENN_ONE_NULL;
103 }
104 }
105
106 } // END %include
107
108 %extra_argument { QueryParseCtx *ctx }
109 %default_type { QueryToken }
110 %default_destructor { }
111
112 %type expr { QueryNode * }
113 %destructor expr { QueryNode_Free($$); }
114
115 %type attribute { QueryAttribute }
116 %destructor attribute { rm_free((char*)$$.value); }
117
118 %type attribute_list {QueryAttribute *}
119 %destructor attribute_list { array_free_ex($$, rm_free((char*)((QueryAttribute*)ptr )->value)); }
120
121 %type prefix { QueryNode * }
122 %destructor prefix { QueryNode_Free($$); }
123
124 %type termlist { QueryNode * }
125 %destructor termlist { QueryNode_Free($$); }
126
127 %type union { QueryNode *}
128 %destructor union { QueryNode_Free($$); }
129
130 %type fuzzy { QueryNode *}
131 %destructor fuzzy { QueryNode_Free($$); }
132
133 %type tag_list { QueryNode *}
134 %destructor tag_list { QueryNode_Free($$); }
135
136 //%type
137 %type geo_filter { GeoFilter *}
138 %destructor geo_filter { GeoFilter_Free($$); }
139
140 %type modifierlist { Vector* }
141 %destructor modifierlist {
142 for (size_t i = 0; i < Vector_Size($$); i++) {
143 char *s;
144 Vector_Get($$, i, &s);
145 rm_free(s);
146 }
147 Vector_Free($$);
148 }
149
150 %type num { RangeNumber }
151
152 %type numeric_range { NumericFilter * }
153 %destructor numeric_range {
154 NumericFilter_Free($$);
155 }
156
expr(A)157 query ::= expr(A) . {
158 /* If the root is a negative node, we intersect it with a wildcard node */
159
160 ctx->root = A;
161
162 }
163 query ::= . {
164 ctx->root = NULL;
165 }
166
167 query ::= STAR . {
168 ctx->root = NewWildcardNode();
169 }
170
171 /////////////////////////////////////////////////////////////////
172 // AND Clause / Phrase
173 /////////////////////////////////////////////////////////////////
174
expr(A)175 expr(A) ::= expr(B) expr(C) . [AND] {
176 int rv = one_not_null(B, C, (void**)&A);
177 if (rv == NODENN_BOTH_INVALID) {
178 A = NULL;
179 } else if (rv == NODENN_ONE_NULL) {
180 // Nothing- `out` is already assigned
181 } else {
182 if (B && B->type == QN_PHRASE && B->pn.exact == 0 &&
183 B->opts.fieldMask == RS_FIELDMASK_ALL ) {
184 A = B;
185 } else {
186 A = NewPhraseNode(0);
187 QueryNode_AddChild(A, B);
188 }
189 QueryNode_AddChild(A, C);
190 }
191 }
192
193
194 /////////////////////////////////////////////////////////////////
195 // Unions
196 /////////////////////////////////////////////////////////////////
197
expr(A)198 expr(A) ::= union(B) . [ORX] {
199 A = B;
200 }
201
expr(B)202 union(A) ::= expr(B) OR expr(C) . [OR] {
203 int rv = one_not_null(B, C, (void**)&A);
204 if (rv == NODENN_BOTH_INVALID) {
205 A = NULL;
206 } else if (rv == NODENN_ONE_NULL) {
207 // Nothing- already assigned
208 } else {
209 if (B->type == QN_UNION && B->opts.fieldMask == RS_FIELDMASK_ALL) {
210 A = B;
211 } else {
212 A = NewUnionNode();
213 QueryNode_AddChild(A, B);
214 A->opts.fieldMask |= B->opts.fieldMask;
215 }
216
217 // Handle C
218 QueryNode_AddChild(A, C);
219 A->opts.fieldMask |= C->opts.fieldMask;
220 QueryNode_SetFieldMask(A, A->opts.fieldMask);
221 }
222
223 }
224
expr(C)225 union(A) ::= union(B) OR expr(C). [ORX] {
226 A = B;
227 if (C) {
228 QueryNode_AddChild(A, C);
229 A->opts.fieldMask |= C->opts.fieldMask;
230 QueryNode_SetFieldMask(C, A->opts.fieldMask);
231 }
232 }
233
234 /////////////////////////////////////////////////////////////////
235 // Text Field Filters
236 /////////////////////////////////////////////////////////////////
237
expr(A)238 expr(A) ::= modifier(B) COLON expr(C) . [MODIFIER] {
239 if (C == NULL) {
240 A = NULL;
241 } else {
242 if (ctx->sctx->spec) {
243 QueryNode_SetFieldMask(C, IndexSpec_GetFieldBit(ctx->sctx->spec, B.s, B.len));
244 }
245 A = C;
246 }
247 }
248
249
expr(A)250 expr(A) ::= modifierlist(B) COLON expr(C) . [MODIFIER] {
251
252 if (C == NULL) {
253 A = NULL;
254 } else {
255 //C->opts.fieldMask = 0;
256 t_fieldMask mask = 0;
257 if (ctx->sctx->spec) {
258 for (int i = 0; i < Vector_Size(B); i++) {
259 char *p;
260 Vector_Get(B, i, &p);
261 mask |= IndexSpec_GetFieldBit(ctx->sctx->spec, p, strlen(p));
262 rm_free(p);
263 }
264 }
265 QueryNode_SetFieldMask(C, mask);
266 Vector_Free(B);
267 A=C;
268 }
269 }
270
expr(A)271 expr(A) ::= LP expr(B) RP . {
272 A = B;
273 }
274
275 /////////////////////////////////////////////////////////////////
276 // Attributes
277 /////////////////////////////////////////////////////////////////
278
attribute(A)279 attribute(A) ::= ATTRIBUTE(B) COLON term(C). {
280
281 A = (QueryAttribute){ .name = B.s, .namelen = B.len, .value = rm_strndup(C.s, C.len), .vallen = C.len };
282 }
283
attribute_list(A)284 attribute_list(A) ::= attribute(B) . {
285 A = array_new(QueryAttribute, 2);
286 A = array_append(A, B);
287 }
288
attribute_list(A)289 attribute_list(A) ::= attribute_list(B) SEMICOLON attribute(C) . {
290 A = array_append(B, C);
291 }
292
attribute_list(A)293 attribute_list(A) ::= attribute_list(B) SEMICOLON . {
294 A = B;
295 }
296
attribute_list(A)297 attribute_list(A) ::= . {
298 A = NULL;
299 }
300
expr(A)301 expr(A) ::= expr(B) ARROW LB attribute_list(C) RB . {
302
303 if (B && C) {
304 QueryNode_ApplyAttributes(B, C, array_len(C), ctx->status);
305 }
306 array_free_ex(C, rm_free((char*)((QueryAttribute*)ptr )->value));
307 A = B;
308 }
309
310 /////////////////////////////////////////////////////////////////
311 // Term Lists
312 /////////////////////////////////////////////////////////////////
313
expr(A)314 expr(A) ::= QUOTE termlist(B) QUOTE. [TERMLIST] {
315 B->pn.exact =1;
316 B->opts.flags |= QueryNode_Verbatim;
317
318 A = B;
319 }
320
expr(A)321 expr(A) ::= QUOTE term(B) QUOTE. [TERMLIST] {
322 A = NewTokenNode(ctx, strdupcase(B.s, B.len), -1);
323 A->opts.flags |= QueryNode_Verbatim;
324
325 }
326
expr(A)327 expr(A) ::= term(B) . [LOWEST] {
328 A = NewTokenNode(ctx, strdupcase(B.s, B.len), -1);
329 }
330
expr(A)331 expr(A) ::= prefix(B) . [PREFIX] {
332 A= B;
333 }
334
expr(A)335 expr(A) ::= termlist(B) . [TERMLIST] {
336 A = B;
337 }
338
expr(A)339 expr(A) ::= STOPWORD . [STOPWORD] {
340 A = NULL;
341 }
342
termlist(A)343 termlist(A) ::= term(B) term(C). [TERMLIST] {
344 A = NewPhraseNode(0);
345 QueryNode_AddChild(A, NewTokenNode(ctx, strdupcase(B.s, B.len), -1));
346 QueryNode_AddChild(A, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
347 }
348
termlist(A)349 termlist(A) ::= termlist(B) term(C) . [TERMLIST] {
350 A = B;
351 QueryNode_AddChild(A, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
352 }
353
termlist(A)354 termlist(A) ::= termlist(B) STOPWORD . [TERMLIST] {
355 A = B;
356 }
357
358 /////////////////////////////////////////////////////////////////
359 // Negative Clause
360 /////////////////////////////////////////////////////////////////
361
expr(A)362 expr(A) ::= MINUS expr(B) . {
363 if (B) {
364 A = NewNotNode(B);
365 } else {
366 A = NULL;
367 }
368 }
369
370 /////////////////////////////////////////////////////////////////
371 // Optional Clause
372 /////////////////////////////////////////////////////////////////
373
expr(A)374 expr(A) ::= TILDE expr(B) . {
375 if (B) {
376 A = NewOptionalNode(B);
377 } else {
378 A = NULL;
379 }
380 }
381
382 /////////////////////////////////////////////////////////////////
383 // Prefix experessions
384 /////////////////////////////////////////////////////////////////
385
prefix(A)386 prefix(A) ::= PREFIX(B) . [PREFIX] {
387 B.s = strdupcase(B.s, B.len);
388 A = NewPrefixNode(ctx, B.s, strlen(B.s));
389 }
390
391 /////////////////////////////////////////////////////////////////
392 // Fuzzy terms
393 /////////////////////////////////////////////////////////////////
394
expr(A)395 expr(A) ::= PERCENT term(B) PERCENT. [PREFIX] {
396 B.s = strdupcase(B.s, B.len);
397 A = NewFuzzyNode(ctx, B.s, strlen(B.s), 1);
398 }
399
expr(A)400 expr(A) ::= PERCENT PERCENT term(B) PERCENT PERCENT. [PREFIX] {
401 B.s = strdupcase(B.s, B.len);
402 A = NewFuzzyNode(ctx, B.s, strlen(B.s), 2);
403 }
404
expr(A)405 expr(A) ::= PERCENT PERCENT PERCENT term(B) PERCENT PERCENT PERCENT. [PREFIX] {
406 B.s = strdupcase(B.s, B.len);
407 A = NewFuzzyNode(ctx, B.s, strlen(B.s), 3);
408 }
409
expr(A)410 expr(A) ::= PERCENT STOPWORD(B) PERCENT. [PREFIX] {
411 B.s = strdupcase(B.s, B.len);
412 A = NewFuzzyNode(ctx, B.s, strlen(B.s), 1);
413 }
414
expr(A)415 expr(A) ::= PERCENT PERCENT STOPWORD(B) PERCENT PERCENT. [PREFIX] {
416 B.s = strdupcase(B.s, B.len);
417 A = NewFuzzyNode(ctx, B.s, strlen(B.s), 2);
418 }
419
expr(A)420 expr(A) ::= PERCENT PERCENT PERCENT STOPWORD(B) PERCENT PERCENT PERCENT. [PREFIX] {
421 B.s = strdupcase(B.s, B.len);
422 A = NewFuzzyNode(ctx, B.s, strlen(B.s), 3);
423 }
424
425
426 /////////////////////////////////////////////////////////////////
427 // Field Modidiers
428 /////////////////////////////////////////////////////////////////
429
modifier(A)430 modifier(A) ::= MODIFIER(B) . {
431 B.len = unescapen((char*)B.s, B.len);
432 A = B;
433 }
434
modifierlist(A)435 modifierlist(A) ::= modifier(B) OR term(C). {
436 A = NewVector(char *, 2);
437 char *s = strdupcase(B.s, B.len);
438 Vector_Push(A, s);
439 s = strdupcase(C.s, C.len);
440 Vector_Push(A, s);
441 }
442
modifierlist(A)443 modifierlist(A) ::= modifierlist(B) OR term(C). {
444 char *s = strdupcase(C.s, C.len);
445 Vector_Push(B, s);
446 A = B;
447 }
448
449
450 /////////////////////////////////////////////////////////////////
451 // Tag Lists - curly braces separated lists of words
452 /////////////////////////////////////////////////////////////////
expr(A)453 expr(A) ::= modifier(B) COLON tag_list(C) . {
454 if (!C) {
455 A= NULL;
456 } else {
457 // Tag field names must be case sensitive, we we can't do strdupcase
458 char *s = rm_strndup(B.s, B.len);
459 size_t slen = unescapen((char*)s, B.len);
460
461 A = NewTagNode(s, slen);
462 QueryNode_AddChildren(A, C->children, QueryNode_NumChildren(C));
463
464 // Set the children count on C to 0 so they won't get recursively free'd
465 QueryNode_ClearChildren(C, 0);
466 QueryNode_Free(C);
467 }
468 }
469
tag_list(A)470 tag_list(A) ::= LB term(B) . [TAGLIST] {
471 A = NewPhraseNode(0);
472 QueryNode_AddChild(A, NewTokenNode(ctx, strdupcase(B.s, B.len), -1));
473 }
474
tag_list(A)475 tag_list(A) ::= LB STOPWORD(B) . [TAGLIST] {
476 A = NewPhraseNode(0);
477 QueryNode_AddChild(A, NewTokenNode(ctx, strdupcase(B.s, B.len), -1));
478 }
479
tag_list(A)480 tag_list(A) ::= LB prefix(B) . [TAGLIST] {
481 A = NewPhraseNode(0);
482 QueryNode_AddChild(A, B);
483 }
484
tag_list(A)485 tag_list(A) ::= LB termlist(B) . [TAGLIST] {
486 A = NewPhraseNode(0);
487 QueryNode_AddChild(A, B);
488 }
489
tag_list(A)490 tag_list(A) ::= tag_list(B) OR term(C) . [TAGLIST] {
491 QueryNode_AddChild(B, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
492 A = B;
493 }
494
tag_list(A)495 tag_list(A) ::= tag_list(B) OR STOPWORD(C) . [TAGLIST] {
496 QueryNode_AddChild(B, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
497 A = B;
498 }
499
tag_list(A)500 tag_list(A) ::= tag_list(B) OR prefix(C) . [TAGLIST] {
501 QueryNode_AddChild(B, C);
502 A = B;
503 }
504
tag_list(A)505 tag_list(A) ::= tag_list(B) OR termlist(C) . [TAGLIST] {
506 QueryNode_AddChild(B, C);
507 A = B;
508 }
509
510
tag_list(A)511 tag_list(A) ::= tag_list(B) RB . [TAGLIST] {
512 A = B;
513 }
514
515
516 /////////////////////////////////////////////////////////////////
517 // Numeric Ranges
518 /////////////////////////////////////////////////////////////////
expr(A)519 expr(A) ::= modifier(B) COLON numeric_range(C). {
520 // we keep the capitalization as is
521 C->fieldName = rm_strndup(B.s, B.len);
522 A = NewNumericNode(C);
523 }
524
numeric_range(A)525 numeric_range(A) ::= LSQB num(B) num(C) RSQB. [NUMBER] {
526 A = NewNumericFilter(B.num, C.num, B.inclusive, C.inclusive);
527 }
528
529 /////////////////////////////////////////////////////////////////
530 // Geo Filters
531 /////////////////////////////////////////////////////////////////
532
expr(A)533 expr(A) ::= modifier(B) COLON geo_filter(C). {
534 // we keep the capitalization as is
535 C->property = rm_strndup(B.s, B.len);
536 A = NewGeofilterNode(C);
537 }
538
geo_filter(A)539 geo_filter(A) ::= LSQB num(B) num(C) num(D) TERM(E) RSQB. [NUMBER] {
540 char buf[16] = {0};
541 if (E.len < 16) {
542 memcpy(buf, E.s, E.len);
543 } else {
544 strcpy(buf, "INVALID");
545 }
546 A = NewGeoFilter(B.num, C.num, D.num, buf);
547 GeoFilter_Validate(A, ctx->status);
548 }
549
550
551
552
553 /////////////////////////////////////////////////////////////////
554 // Primitives - numbers and strings
555 /////////////////////////////////////////////////////////////////
num(A)556 num(A) ::= NUMBER(B). {
557 A.num = B.numval;
558 A.inclusive = 1;
559 }
560
num(A)561 num(A) ::= LP num(B). {
562 A=B;
563 A.inclusive = 0;
564 }
565
num(A)566 num(A) ::= MINUS num(B). {
567 B.num = -B.num;
568 A = B;
569 }
570
term(A)571 term(A) ::= TERM(B) . {
572 A = B;
573 }
574
term(A)575 term(A) ::= NUMBER(B) . {
576 A = B;
577 }
578
579