1 /*-------------------------------------------------------------------------
2 *
3 * tsquery.c
4 * I/O functions for tsquery
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsquery.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 #include "postgres.h"
16
17 #include "libpq/pqformat.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_utils.h"
22 #include "utils/builtins.h"
23 #include "utils/memutils.h"
24 #include "utils/pg_crc.h"
25
26 /* FTS operator priorities, see ts_type.h */
27 const int tsearch_op_priority[OP_COUNT] =
28 {
29 4, /* OP_NOT */
30 2, /* OP_AND */
31 1, /* OP_OR */
32 3 /* OP_PHRASE */
33 };
34
35 struct TSQueryParserStateData
36 {
37 /* State for gettoken_query */
38 char *buffer; /* entire string we are scanning */
39 char *buf; /* current scan point */
40 int state;
41 int count; /* nesting count, incremented by (,
42 * decremented by ) */
43
44 /* polish (prefix) notation in list, filled in by push* functions */
45 List *polstr;
46
47 /*
48 * Strings from operands are collected in op. curop is a pointer to the
49 * end of used space of op.
50 */
51 char *op;
52 char *curop;
53 int lenop; /* allocated size of op */
54 int sumlen; /* used size of op */
55
56 /* state for value's parser */
57 TSVectorParseState valstate;
58 };
59
60 /* parser's states */
61 #define WAITOPERAND 1
62 #define WAITOPERATOR 2
63 #define WAITFIRSTOPERAND 3
64 #define WAITSINGLEOPERAND 4
65
66 /*
67 * subroutine to parse the modifiers (weight and prefix flag currently)
68 * part, like ':AB*' of a query.
69 */
70 static char *
get_modifiers(char * buf,int16 * weight,bool * prefix)71 get_modifiers(char *buf, int16 *weight, bool *prefix)
72 {
73 *weight = 0;
74 *prefix = false;
75
76 if (!t_iseq(buf, ':'))
77 return buf;
78
79 buf++;
80 while (*buf && pg_mblen(buf) == 1)
81 {
82 switch (*buf)
83 {
84 case 'a':
85 case 'A':
86 *weight |= 1 << 3;
87 break;
88 case 'b':
89 case 'B':
90 *weight |= 1 << 2;
91 break;
92 case 'c':
93 case 'C':
94 *weight |= 1 << 1;
95 break;
96 case 'd':
97 case 'D':
98 *weight |= 1;
99 break;
100 case '*':
101 *prefix = true;
102 break;
103 default:
104 return buf;
105 }
106 buf++;
107 }
108
109 return buf;
110 }
111
112 /*
113 * Parse phrase operator. The operator
114 * may take the following forms:
115 *
116 * a <N> b (distance is exactly N lexemes)
117 * a <-> b (default distance = 1)
118 *
119 * The buffer should begin with '<' char
120 */
121 static char *
parse_phrase_operator(char * buf,int16 * distance)122 parse_phrase_operator(char *buf, int16 *distance)
123 {
124 enum
125 {
126 PHRASE_OPEN = 0,
127 PHRASE_DIST,
128 PHRASE_CLOSE,
129 PHRASE_ERR,
130 PHRASE_FINISH
131 } state = PHRASE_OPEN;
132 char *ptr = buf;
133 char *endptr;
134 long l = 1; /* default distance */
135
136 while (*ptr)
137 {
138 switch (state)
139 {
140 case PHRASE_OPEN:
141 Assert(t_iseq(ptr, '<'));
142 state = PHRASE_DIST;
143 ptr++;
144 break;
145
146 case PHRASE_DIST:
147 if (t_iseq(ptr, '-'))
148 {
149 state = PHRASE_CLOSE;
150 ptr++;
151 break;
152 }
153 if (!t_isdigit(ptr))
154 {
155 state = PHRASE_ERR;
156 break;
157 }
158
159 errno = 0;
160 l = strtol(ptr, &endptr, 10);
161 if (ptr == endptr)
162 state = PHRASE_ERR;
163 else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
164 ereport(ERROR,
165 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
166 errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
167 MAXENTRYPOS)));
168 else
169 {
170 state = PHRASE_CLOSE;
171 ptr = endptr;
172 }
173 break;
174
175 case PHRASE_CLOSE:
176 if (t_iseq(ptr, '>'))
177 {
178 state = PHRASE_FINISH;
179 ptr++;
180 }
181 else
182 state = PHRASE_ERR;
183 break;
184
185 case PHRASE_FINISH:
186 *distance = (int16) l;
187 return ptr;
188
189 case PHRASE_ERR:
190 default:
191 goto err;
192 }
193 }
194
195 err:
196 *distance = -1;
197 return buf;
198 }
199
200 /*
201 * token types for parsing
202 */
203 typedef enum
204 {
205 PT_END = 0,
206 PT_ERR = 1,
207 PT_VAL = 2,
208 PT_OPR = 3,
209 PT_OPEN = 4,
210 PT_CLOSE = 5
211 } ts_tokentype;
212
213 /*
214 * get token from query string
215 *
216 * *operator is filled in with OP_* when return values is PT_OPR,
217 * but *weight could contain a distance value in case of phrase operator.
218 * *strval, *lenval and *weight are filled in when return value is PT_VAL
219 *
220 */
221 static ts_tokentype
gettoken_query(TSQueryParserState state,int8 * operator,int * lenval,char ** strval,int16 * weight,bool * prefix)222 gettoken_query(TSQueryParserState state,
223 int8 *operator,
224 int *lenval, char **strval, int16 *weight, bool *prefix)
225 {
226 *weight = 0;
227 *prefix = false;
228
229 while (1)
230 {
231 switch (state->state)
232 {
233 case WAITFIRSTOPERAND:
234 case WAITOPERAND:
235 if (t_iseq(state->buf, '!'))
236 {
237 (state->buf)++; /* can safely ++, t_iseq guarantee
238 * that pg_mblen()==1 */
239 *operator = OP_NOT;
240 state->state = WAITOPERAND;
241 return PT_OPR;
242 }
243 else if (t_iseq(state->buf, '('))
244 {
245 state->count++;
246 (state->buf)++;
247 state->state = WAITOPERAND;
248 return PT_OPEN;
249 }
250 else if (t_iseq(state->buf, ':'))
251 {
252 ereport(ERROR,
253 (errcode(ERRCODE_SYNTAX_ERROR),
254 errmsg("syntax error in tsquery: \"%s\"",
255 state->buffer)));
256 }
257 else if (!t_isspace(state->buf))
258 {
259 /*
260 * We rely on the tsvector parser to parse the value for
261 * us
262 */
263 reset_tsvector_parser(state->valstate, state->buf);
264 if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
265 {
266 state->buf = get_modifiers(state->buf, weight, prefix);
267 state->state = WAITOPERATOR;
268 return PT_VAL;
269 }
270 else if (state->state == WAITFIRSTOPERAND)
271 return PT_END;
272 else
273 ereport(ERROR,
274 (errcode(ERRCODE_SYNTAX_ERROR),
275 errmsg("no operand in tsquery: \"%s\"",
276 state->buffer)));
277 }
278 break;
279 case WAITOPERATOR:
280 if (t_iseq(state->buf, '&'))
281 {
282 state->state = WAITOPERAND;
283 *operator = OP_AND;
284 (state->buf)++;
285 return PT_OPR;
286 }
287 else if (t_iseq(state->buf, '|'))
288 {
289 state->state = WAITOPERAND;
290 *operator = OP_OR;
291 (state->buf)++;
292 return PT_OPR;
293 }
294 else if (t_iseq(state->buf, '<'))
295 {
296 state->state = WAITOPERAND;
297 *operator = OP_PHRASE;
298 /* weight var is used as storage for distance */
299 state->buf = parse_phrase_operator(state->buf, weight);
300 if (*weight < 0)
301 return PT_ERR;
302 return PT_OPR;
303 }
304 else if (t_iseq(state->buf, ')'))
305 {
306 (state->buf)++;
307 state->count--;
308 return (state->count < 0) ? PT_ERR : PT_CLOSE;
309 }
310 else if (*(state->buf) == '\0')
311 return (state->count) ? PT_ERR : PT_END;
312 else if (!t_isspace(state->buf))
313 return PT_ERR;
314 break;
315 case WAITSINGLEOPERAND:
316 if (*(state->buf) == '\0')
317 return PT_END;
318 *strval = state->buf;
319 *lenval = strlen(state->buf);
320 state->buf += strlen(state->buf);
321 state->count++;
322 return PT_VAL;
323 default:
324 return PT_ERR;
325 break;
326 }
327 state->buf += pg_mblen(state->buf);
328 }
329 }
330
331 /*
332 * Push an operator to state->polstr
333 */
334 void
pushOperator(TSQueryParserState state,int8 oper,int16 distance)335 pushOperator(TSQueryParserState state, int8 oper, int16 distance)
336 {
337 QueryOperator *tmp;
338
339 Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
340
341 tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
342 tmp->type = QI_OPR;
343 tmp->oper = oper;
344 tmp->distance = (oper == OP_PHRASE) ? distance : 0;
345 /* left is filled in later with findoprnd */
346
347 state->polstr = lcons(tmp, state->polstr);
348 }
349
350 static void
pushValue_internal(TSQueryParserState state,pg_crc32 valcrc,int distance,int lenval,int weight,bool prefix)351 pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
352 {
353 QueryOperand *tmp;
354
355 if (distance >= MAXSTRPOS)
356 ereport(ERROR,
357 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
358 errmsg("value is too big in tsquery: \"%s\"",
359 state->buffer)));
360 if (lenval >= MAXSTRLEN)
361 ereport(ERROR,
362 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
363 errmsg("operand is too long in tsquery: \"%s\"",
364 state->buffer)));
365
366 tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
367 tmp->type = QI_VAL;
368 tmp->weight = weight;
369 tmp->prefix = prefix;
370 tmp->valcrc = (int32) valcrc;
371 tmp->length = lenval;
372 tmp->distance = distance;
373
374 state->polstr = lcons(tmp, state->polstr);
375 }
376
377 /*
378 * Push an operand to state->polstr.
379 *
380 * strval must point to a string equal to state->curop. lenval is the length
381 * of the string.
382 */
383 void
pushValue(TSQueryParserState state,char * strval,int lenval,int16 weight,bool prefix)384 pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
385 {
386 pg_crc32 valcrc;
387
388 if (lenval >= MAXSTRLEN)
389 ereport(ERROR,
390 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
391 errmsg("word is too long in tsquery: \"%s\"",
392 state->buffer)));
393
394 INIT_LEGACY_CRC32(valcrc);
395 COMP_LEGACY_CRC32(valcrc, strval, lenval);
396 FIN_LEGACY_CRC32(valcrc);
397 pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
398
399 /* append the value string to state.op, enlarging buffer if needed first */
400 while (state->curop - state->op + lenval + 1 >= state->lenop)
401 {
402 int used = state->curop - state->op;
403
404 state->lenop *= 2;
405 state->op = (char *) repalloc((void *) state->op, state->lenop);
406 state->curop = state->op + used;
407 }
408 memcpy((void *) state->curop, (void *) strval, lenval);
409 state->curop += lenval;
410 *(state->curop) = '\0';
411 state->curop++;
412 state->sumlen += lenval + 1 /* \0 */ ;
413 }
414
415
416 /*
417 * Push a stopword placeholder to state->polstr
418 */
419 void
pushStop(TSQueryParserState state)420 pushStop(TSQueryParserState state)
421 {
422 QueryOperand *tmp;
423
424 tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
425 tmp->type = QI_VALSTOP;
426
427 state->polstr = lcons(tmp, state->polstr);
428 }
429
430
431 #define STACKDEPTH 32
432
433 typedef struct OperatorElement
434 {
435 int8 op;
436 int16 distance;
437 } OperatorElement;
438
439 static void
pushOpStack(OperatorElement * stack,int * lenstack,int8 op,int16 distance)440 pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
441 {
442 if (*lenstack == STACKDEPTH) /* internal error */
443 elog(ERROR, "tsquery stack too small");
444
445 stack[*lenstack].op = op;
446 stack[*lenstack].distance = distance;
447
448 (*lenstack)++;
449 }
450
451 static void
cleanOpStack(TSQueryParserState state,OperatorElement * stack,int * lenstack,int8 op)452 cleanOpStack(TSQueryParserState state,
453 OperatorElement *stack, int *lenstack, int8 op)
454 {
455 int opPriority = OP_PRIORITY(op);
456
457 while (*lenstack)
458 {
459 /* NOT is right associative unlike to others */
460 if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
461 (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
462 break;
463
464 (*lenstack)--;
465 pushOperator(state, stack[*lenstack].op,
466 stack[*lenstack].distance);
467 }
468 }
469
470 /*
471 * Make polish (prefix) notation of query.
472 *
473 * See parse_tsquery for explanation of pushval.
474 */
475 static void
makepol(TSQueryParserState state,PushFunction pushval,Datum opaque)476 makepol(TSQueryParserState state,
477 PushFunction pushval,
478 Datum opaque)
479 {
480 int8 operator = 0;
481 ts_tokentype type;
482 int lenval = 0;
483 char *strval = NULL;
484 OperatorElement opstack[STACKDEPTH];
485 int lenstack = 0;
486 int16 weight = 0;
487 bool prefix;
488
489 /* since this function recurses, it could be driven to stack overflow */
490 check_stack_depth();
491
492 while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
493 {
494 switch (type)
495 {
496 case PT_VAL:
497 pushval(opaque, state, strval, lenval, weight, prefix);
498 break;
499 case PT_OPR:
500 cleanOpStack(state, opstack, &lenstack, operator);
501 pushOpStack(opstack, &lenstack, operator, weight);
502 break;
503 case PT_OPEN:
504 makepol(state, pushval, opaque);
505 break;
506 case PT_CLOSE:
507 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
508 return;
509 case PT_ERR:
510 default:
511 ereport(ERROR,
512 (errcode(ERRCODE_SYNTAX_ERROR),
513 errmsg("syntax error in tsquery: \"%s\"",
514 state->buffer)));
515 }
516 }
517
518 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
519 }
520
521 static void
findoprnd_recurse(QueryItem * ptr,uint32 * pos,int nnodes,bool * needcleanup)522 findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
523 {
524 /* since this function recurses, it could be driven to stack overflow. */
525 check_stack_depth();
526
527 if (*pos >= nnodes)
528 elog(ERROR, "malformed tsquery: operand not found");
529
530 if (ptr[*pos].type == QI_VAL)
531 {
532 (*pos)++;
533 }
534 else if (ptr[*pos].type == QI_VALSTOP)
535 {
536 *needcleanup = true; /* we'll have to remove stop words */
537 (*pos)++;
538 }
539 else
540 {
541 Assert(ptr[*pos].type == QI_OPR);
542
543 if (ptr[*pos].qoperator.oper == OP_NOT)
544 {
545 ptr[*pos].qoperator.left = 1; /* fixed offset */
546 (*pos)++;
547
548 /* process the only argument */
549 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
550 }
551 else
552 {
553 QueryOperator *curitem = &ptr[*pos].qoperator;
554 int tmp = *pos; /* save current position */
555
556 Assert(curitem->oper == OP_AND ||
557 curitem->oper == OP_OR ||
558 curitem->oper == OP_PHRASE);
559
560 (*pos)++;
561
562 /* process RIGHT argument */
563 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
564
565 curitem->left = *pos - tmp; /* set LEFT arg's offset */
566
567 /* process LEFT argument */
568 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
569 }
570 }
571 }
572
573
574 /*
575 * Fill in the left-fields previously left unfilled.
576 * The input QueryItems must be in polish (prefix) notation.
577 * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
578 */
579 static void
findoprnd(QueryItem * ptr,int size,bool * needcleanup)580 findoprnd(QueryItem *ptr, int size, bool *needcleanup)
581 {
582 uint32 pos;
583
584 *needcleanup = false;
585 pos = 0;
586 findoprnd_recurse(ptr, &pos, size, needcleanup);
587
588 if (pos != size)
589 elog(ERROR, "malformed tsquery: extra nodes");
590 }
591
592
593 /*
594 * Each value (operand) in the query is passed to pushval. pushval can
595 * transform the simple value to an arbitrarily complex expression using
596 * pushValue and pushOperator. It must push a single value with pushValue,
597 * a complete expression with all operands, or a stopword placeholder
598 * with pushStop, otherwise the prefix notation representation will be broken,
599 * having an operator with no operand.
600 *
601 * opaque is passed on to pushval as is, pushval can use it to store its
602 * private state.
603 */
604 TSQuery
parse_tsquery(char * buf,PushFunction pushval,Datum opaque,bool isplain)605 parse_tsquery(char *buf,
606 PushFunction pushval,
607 Datum opaque,
608 bool isplain)
609 {
610 struct TSQueryParserStateData state;
611 int i;
612 TSQuery query;
613 int commonlen;
614 QueryItem *ptr;
615 ListCell *cell;
616 bool needcleanup;
617
618 /* init state */
619 state.buffer = buf;
620 state.buf = buf;
621 state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
622 state.count = 0;
623 state.polstr = NIL;
624
625 /* init value parser's state */
626 state.valstate = init_tsvector_parser(state.buffer, true, true);
627
628 /* init list of operand */
629 state.sumlen = 0;
630 state.lenop = 64;
631 state.curop = state.op = (char *) palloc(state.lenop);
632 *(state.curop) = '\0';
633
634 /* parse query & make polish notation (postfix, but in reverse order) */
635 makepol(&state, pushval, opaque);
636
637 close_tsvector_parser(state.valstate);
638
639 if (list_length(state.polstr) == 0)
640 {
641 ereport(NOTICE,
642 (errmsg("text-search query doesn't contain lexemes: \"%s\"",
643 state.buffer)));
644 query = (TSQuery) palloc(HDRSIZETQ);
645 SET_VARSIZE(query, HDRSIZETQ);
646 query->size = 0;
647 return query;
648 }
649
650 if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
651 ereport(ERROR,
652 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
653 errmsg("tsquery is too large")));
654 commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
655
656 /* Pack the QueryItems in the final TSQuery struct to return to caller */
657 query = (TSQuery) palloc0(commonlen);
658 SET_VARSIZE(query, commonlen);
659 query->size = list_length(state.polstr);
660 ptr = GETQUERY(query);
661
662 /* Copy QueryItems to TSQuery */
663 i = 0;
664 foreach(cell, state.polstr)
665 {
666 QueryItem *item = (QueryItem *) lfirst(cell);
667
668 switch (item->type)
669 {
670 case QI_VAL:
671 memcpy(&ptr[i], item, sizeof(QueryOperand));
672 break;
673 case QI_VALSTOP:
674 ptr[i].type = QI_VALSTOP;
675 break;
676 case QI_OPR:
677 memcpy(&ptr[i], item, sizeof(QueryOperator));
678 break;
679 default:
680 elog(ERROR, "unrecognized QueryItem type: %d", item->type);
681 }
682 i++;
683 }
684
685 /* Copy all the operand strings to TSQuery */
686 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
687 pfree(state.op);
688
689 /*
690 * Set left operand pointers for every operator. While we're at it,
691 * detect whether there are any QI_VALSTOP nodes.
692 */
693 findoprnd(ptr, query->size, &needcleanup);
694
695 /*
696 * If there are QI_VALSTOP nodes, delete them and simplify the tree.
697 */
698 if (needcleanup)
699 query = cleanup_tsquery_stopwords(query);
700
701 return query;
702 }
703
704 static void
pushval_asis(Datum opaque,TSQueryParserState state,char * strval,int lenval,int16 weight,bool prefix)705 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
706 int16 weight, bool prefix)
707 {
708 pushValue(state, strval, lenval, weight, prefix);
709 }
710
711 /*
712 * in without morphology
713 */
714 Datum
tsqueryin(PG_FUNCTION_ARGS)715 tsqueryin(PG_FUNCTION_ARGS)
716 {
717 char *in = PG_GETARG_CSTRING(0);
718
719 PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
720 }
721
722 /*
723 * out function
724 */
725 typedef struct
726 {
727 QueryItem *curpol;
728 char *buf;
729 char *cur;
730 char *op;
731 int buflen;
732 } INFIX;
733
734 /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
735 #define RESIZEBUF(inf, addsize) \
736 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
737 { \
738 int len = (inf)->cur - (inf)->buf; \
739 (inf)->buflen *= 2; \
740 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
741 (inf)->cur = (inf)->buf + len; \
742 }
743
744 /*
745 * recursively traverse the tree and
746 * print it in infix (human-readable) form
747 */
748 static void
infix(INFIX * in,int parentPriority,bool rightPhraseOp)749 infix(INFIX *in, int parentPriority, bool rightPhraseOp)
750 {
751 /* since this function recurses, it could be driven to stack overflow. */
752 check_stack_depth();
753
754 if (in->curpol->type == QI_VAL)
755 {
756 QueryOperand *curpol = &in->curpol->qoperand;
757 char *op = in->op + curpol->distance;
758 int clen;
759
760 RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
761 *(in->cur) = '\'';
762 in->cur++;
763 while (*op)
764 {
765 if (t_iseq(op, '\''))
766 {
767 *(in->cur) = '\'';
768 in->cur++;
769 }
770 else if (t_iseq(op, '\\'))
771 {
772 *(in->cur) = '\\';
773 in->cur++;
774 }
775 COPYCHAR(in->cur, op);
776
777 clen = pg_mblen(op);
778 op += clen;
779 in->cur += clen;
780 }
781 *(in->cur) = '\'';
782 in->cur++;
783 if (curpol->weight || curpol->prefix)
784 {
785 *(in->cur) = ':';
786 in->cur++;
787 if (curpol->prefix)
788 {
789 *(in->cur) = '*';
790 in->cur++;
791 }
792 if (curpol->weight & (1 << 3))
793 {
794 *(in->cur) = 'A';
795 in->cur++;
796 }
797 if (curpol->weight & (1 << 2))
798 {
799 *(in->cur) = 'B';
800 in->cur++;
801 }
802 if (curpol->weight & (1 << 1))
803 {
804 *(in->cur) = 'C';
805 in->cur++;
806 }
807 if (curpol->weight & 1)
808 {
809 *(in->cur) = 'D';
810 in->cur++;
811 }
812 }
813 *(in->cur) = '\0';
814 in->curpol++;
815 }
816 else if (in->curpol->qoperator.oper == OP_NOT)
817 {
818 int priority = QO_PRIORITY(in->curpol);
819
820 if (priority < parentPriority)
821 {
822 RESIZEBUF(in, 2);
823 sprintf(in->cur, "( ");
824 in->cur = strchr(in->cur, '\0');
825 }
826 RESIZEBUF(in, 1);
827 *(in->cur) = '!';
828 in->cur++;
829 *(in->cur) = '\0';
830 in->curpol++;
831
832 infix(in, priority, false);
833 if (priority < parentPriority)
834 {
835 RESIZEBUF(in, 2);
836 sprintf(in->cur, " )");
837 in->cur = strchr(in->cur, '\0');
838 }
839 }
840 else
841 {
842 int8 op = in->curpol->qoperator.oper;
843 int priority = QO_PRIORITY(in->curpol);
844 int16 distance = in->curpol->qoperator.distance;
845 INFIX nrm;
846 bool needParenthesis = false;
847
848 in->curpol++;
849 if (priority < parentPriority ||
850 /* phrase operator depends on order */
851 (op == OP_PHRASE && rightPhraseOp))
852 {
853 needParenthesis = true;
854 RESIZEBUF(in, 2);
855 sprintf(in->cur, "( ");
856 in->cur = strchr(in->cur, '\0');
857 }
858
859 nrm.curpol = in->curpol;
860 nrm.op = in->op;
861 nrm.buflen = 16;
862 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
863
864 /* get right operand */
865 infix(&nrm, priority, (op == OP_PHRASE));
866
867 /* get & print left operand */
868 in->curpol = nrm.curpol;
869 infix(in, priority, false);
870
871 /* print operator & right operand */
872 RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
873 switch (op)
874 {
875 case OP_OR:
876 sprintf(in->cur, " | %s", nrm.buf);
877 break;
878 case OP_AND:
879 sprintf(in->cur, " & %s", nrm.buf);
880 break;
881 case OP_PHRASE:
882 if (distance != 1)
883 sprintf(in->cur, " <%d> %s", distance, nrm.buf);
884 else
885 sprintf(in->cur, " <-> %s", nrm.buf);
886 break;
887 default:
888 /* OP_NOT is handled in above if-branch */
889 elog(ERROR, "unrecognized operator type: %d", op);
890 }
891 in->cur = strchr(in->cur, '\0');
892 pfree(nrm.buf);
893
894 if (needParenthesis)
895 {
896 RESIZEBUF(in, 2);
897 sprintf(in->cur, " )");
898 in->cur = strchr(in->cur, '\0');
899 }
900 }
901 }
902
903 Datum
tsqueryout(PG_FUNCTION_ARGS)904 tsqueryout(PG_FUNCTION_ARGS)
905 {
906 TSQuery query = PG_GETARG_TSQUERY(0);
907 INFIX nrm;
908
909 if (query->size == 0)
910 {
911 char *b = palloc(1);
912
913 *b = '\0';
914 PG_RETURN_POINTER(b);
915 }
916 nrm.curpol = GETQUERY(query);
917 nrm.buflen = 32;
918 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
919 *(nrm.cur) = '\0';
920 nrm.op = GETOPERAND(query);
921 infix(&nrm, -1 /* lowest priority */ , false);
922
923 PG_FREE_IF_COPY(query, 0);
924 PG_RETURN_CSTRING(nrm.buf);
925 }
926
927 /*
928 * Binary Input / Output functions. The binary format is as follows:
929 *
930 * uint32 number of operators/operands in the query
931 *
932 * Followed by the operators and operands, in prefix notation. For each
933 * operand:
934 *
935 * uint8 type, QI_VAL
936 * uint8 weight
937 * operand text in client encoding, null-terminated
938 * uint8 prefix
939 *
940 * For each operator:
941 * uint8 type, QI_OPR
942 * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
943 * uint16 distance (only for OP_PHRASE)
944 */
945 Datum
tsquerysend(PG_FUNCTION_ARGS)946 tsquerysend(PG_FUNCTION_ARGS)
947 {
948 TSQuery query = PG_GETARG_TSQUERY(0);
949 StringInfoData buf;
950 int i;
951 QueryItem *item = GETQUERY(query);
952
953 pq_begintypsend(&buf);
954
955 pq_sendint(&buf, query->size, sizeof(uint32));
956 for (i = 0; i < query->size; i++)
957 {
958 pq_sendint(&buf, item->type, sizeof(item->type));
959
960 switch (item->type)
961 {
962 case QI_VAL:
963 pq_sendint(&buf, item->qoperand.weight, sizeof(uint8));
964 pq_sendint(&buf, item->qoperand.prefix, sizeof(uint8));
965 pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
966 break;
967 case QI_OPR:
968 pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
969 if (item->qoperator.oper == OP_PHRASE)
970 pq_sendint(&buf, item->qoperator.distance,
971 sizeof(item->qoperator.distance));
972 break;
973 default:
974 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
975 }
976 item++;
977 }
978
979 PG_FREE_IF_COPY(query, 0);
980
981 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
982 }
983
984 Datum
tsqueryrecv(PG_FUNCTION_ARGS)985 tsqueryrecv(PG_FUNCTION_ARGS)
986 {
987 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
988 TSQuery query;
989 int i,
990 len;
991 QueryItem *item;
992 int datalen;
993 char *ptr;
994 uint32 size;
995 const char **operands;
996 bool needcleanup;
997
998 size = pq_getmsgint(buf, sizeof(uint32));
999 if (size > (MaxAllocSize / sizeof(QueryItem)))
1000 elog(ERROR, "invalid size of tsquery");
1001
1002 /* Allocate space to temporarily hold operand strings */
1003 operands = palloc(size * sizeof(char *));
1004
1005 /* Allocate space for all the QueryItems. */
1006 len = HDRSIZETQ + sizeof(QueryItem) * size;
1007 query = (TSQuery) palloc0(len);
1008 query->size = size;
1009 item = GETQUERY(query);
1010
1011 datalen = 0;
1012 for (i = 0; i < size; i++)
1013 {
1014 item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1015
1016 if (item->type == QI_VAL)
1017 {
1018 size_t val_len; /* length after recoding to server encoding */
1019 uint8 weight;
1020 uint8 prefix;
1021 const char *val;
1022 pg_crc32 valcrc;
1023
1024 weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
1025 prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
1026 val = pq_getmsgstring(buf);
1027 val_len = strlen(val);
1028
1029 /* Sanity checks */
1030
1031 if (weight > 0xF)
1032 elog(ERROR, "invalid tsquery: invalid weight bitmap");
1033
1034 if (val_len > MAXSTRLEN)
1035 elog(ERROR, "invalid tsquery: operand too long");
1036
1037 if (datalen > MAXSTRPOS)
1038 elog(ERROR, "invalid tsquery: total operand length exceeded");
1039
1040 /* Looks valid. */
1041
1042 INIT_LEGACY_CRC32(valcrc);
1043 COMP_LEGACY_CRC32(valcrc, val, val_len);
1044 FIN_LEGACY_CRC32(valcrc);
1045
1046 item->qoperand.weight = weight;
1047 item->qoperand.prefix = (prefix) ? true : false;
1048 item->qoperand.valcrc = (int32) valcrc;
1049 item->qoperand.length = val_len;
1050 item->qoperand.distance = datalen;
1051
1052 /*
1053 * Operand strings are copied to the final struct after this loop;
1054 * here we just collect them to an array
1055 */
1056 operands[i] = val;
1057
1058 datalen += val_len + 1; /* + 1 for the '\0' terminator */
1059 }
1060 else if (item->type == QI_OPR)
1061 {
1062 int8 oper;
1063
1064 oper = (int8) pq_getmsgint(buf, sizeof(int8));
1065 if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
1066 elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1067 (int) oper);
1068 if (i == size - 1)
1069 elog(ERROR, "invalid pointer to right operand");
1070
1071 item->qoperator.oper = oper;
1072 if (oper == OP_PHRASE)
1073 item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1074 }
1075 else
1076 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1077
1078 item++;
1079 }
1080
1081 /* Enlarge buffer to make room for the operand values. */
1082 query = (TSQuery) repalloc(query, len + datalen);
1083 item = GETQUERY(query);
1084 ptr = GETOPERAND(query);
1085
1086 /*
1087 * Fill in the left-pointers. Checks that the tree is well-formed as a
1088 * side-effect.
1089 */
1090 findoprnd(item, size, &needcleanup);
1091
1092 /* Can't have found any QI_VALSTOP nodes */
1093 Assert(!needcleanup);
1094
1095 /* Copy operands to output struct */
1096 for (i = 0; i < size; i++)
1097 {
1098 if (item->type == QI_VAL)
1099 {
1100 memcpy(ptr, operands[i], item->qoperand.length + 1);
1101 ptr += item->qoperand.length + 1;
1102 }
1103 item++;
1104 }
1105
1106 pfree(operands);
1107
1108 Assert(ptr - GETOPERAND(query) == datalen);
1109
1110 SET_VARSIZE(query, len + datalen);
1111
1112 PG_RETURN_TSQUERY(query);
1113 }
1114
1115 /*
1116 * debug function, used only for view query
1117 * which will be executed in non-leaf pages in index
1118 */
1119 Datum
tsquerytree(PG_FUNCTION_ARGS)1120 tsquerytree(PG_FUNCTION_ARGS)
1121 {
1122 TSQuery query = PG_GETARG_TSQUERY(0);
1123 INFIX nrm;
1124 text *res;
1125 QueryItem *q;
1126 int len;
1127
1128 if (query->size == 0)
1129 {
1130 res = (text *) palloc(VARHDRSZ);
1131 SET_VARSIZE(res, VARHDRSZ);
1132 PG_RETURN_POINTER(res);
1133 }
1134
1135 q = clean_NOT(GETQUERY(query), &len);
1136
1137 if (!q)
1138 {
1139 res = cstring_to_text("T");
1140 }
1141 else
1142 {
1143 nrm.curpol = q;
1144 nrm.buflen = 32;
1145 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1146 *(nrm.cur) = '\0';
1147 nrm.op = GETOPERAND(query);
1148 infix(&nrm, -1, false);
1149 res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
1150 pfree(q);
1151 }
1152
1153 PG_FREE_IF_COPY(query, 0);
1154
1155 PG_RETURN_TEXT_P(res);
1156 }
1157