1 /*-------------------------------------------------------------------------
2  *
3  * tsquery.c
4  *	  I/O functions for tsquery
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsquery.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "libpq/pqformat.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_utils.h"
22 #include "utils/builtins.h"
23 #include "utils/memutils.h"
24 #include "utils/pg_crc.h"
25 
26 /* FTS operator priorities, see ts_type.h */
27 const int	tsearch_op_priority[OP_COUNT] =
28 {
29 	4,							/* OP_NOT */
30 	2,							/* OP_AND */
31 	1,							/* OP_OR */
32 	3							/* OP_PHRASE */
33 };
34 
35 struct TSQueryParserStateData
36 {
37 	/* State for gettoken_query */
38 	char	   *buffer;			/* entire string we are scanning */
39 	char	   *buf;			/* current scan point */
40 	int			state;
41 	int			count;			/* nesting count, incremented by (,
42 								 * decremented by ) */
43 
44 	/* polish (prefix) notation in list, filled in by push* functions */
45 	List	   *polstr;
46 
47 	/*
48 	 * Strings from operands are collected in op. curop is a pointer to the
49 	 * end of used space of op.
50 	 */
51 	char	   *op;
52 	char	   *curop;
53 	int			lenop;			/* allocated size of op */
54 	int			sumlen;			/* used size of op */
55 
56 	/* state for value's parser */
57 	TSVectorParseState valstate;
58 };
59 
60 /* parser's states */
61 #define WAITOPERAND 1
62 #define WAITOPERATOR	2
63 #define WAITFIRSTOPERAND 3
64 #define WAITSINGLEOPERAND 4
65 
66 /*
67  * subroutine to parse the modifiers (weight and prefix flag currently)
68  * part, like ':AB*' of a query.
69  */
70 static char *
get_modifiers(char * buf,int16 * weight,bool * prefix)71 get_modifiers(char *buf, int16 *weight, bool *prefix)
72 {
73 	*weight = 0;
74 	*prefix = false;
75 
76 	if (!t_iseq(buf, ':'))
77 		return buf;
78 
79 	buf++;
80 	while (*buf && pg_mblen(buf) == 1)
81 	{
82 		switch (*buf)
83 		{
84 			case 'a':
85 			case 'A':
86 				*weight |= 1 << 3;
87 				break;
88 			case 'b':
89 			case 'B':
90 				*weight |= 1 << 2;
91 				break;
92 			case 'c':
93 			case 'C':
94 				*weight |= 1 << 1;
95 				break;
96 			case 'd':
97 			case 'D':
98 				*weight |= 1;
99 				break;
100 			case '*':
101 				*prefix = true;
102 				break;
103 			default:
104 				return buf;
105 		}
106 		buf++;
107 	}
108 
109 	return buf;
110 }
111 
112 /*
113  * Parse phrase operator. The operator
114  * may take the following forms:
115  *
116  *		a <N> b (distance is exactly N lexemes)
117  *		a <-> b (default distance = 1)
118  *
119  * The buffer should begin with '<' char
120  */
121 static char *
parse_phrase_operator(char * buf,int16 * distance)122 parse_phrase_operator(char *buf, int16 *distance)
123 {
124 	enum
125 	{
126 		PHRASE_OPEN = 0,
127 		PHRASE_DIST,
128 		PHRASE_CLOSE,
129 		PHRASE_ERR,
130 		PHRASE_FINISH
131 	}			state = PHRASE_OPEN;
132 	char	   *ptr = buf;
133 	char	   *endptr;
134 	long		l = 1;			/* default distance */
135 
136 	while (*ptr)
137 	{
138 		switch (state)
139 		{
140 			case PHRASE_OPEN:
141 				Assert(t_iseq(ptr, '<'));
142 				state = PHRASE_DIST;
143 				ptr++;
144 				break;
145 
146 			case PHRASE_DIST:
147 				if (t_iseq(ptr, '-'))
148 				{
149 					state = PHRASE_CLOSE;
150 					ptr++;
151 					break;
152 				}
153 				if (!t_isdigit(ptr))
154 				{
155 					state = PHRASE_ERR;
156 					break;
157 				}
158 
159 				errno = 0;
160 				l = strtol(ptr, &endptr, 10);
161 				if (ptr == endptr)
162 					state = PHRASE_ERR;
163 				else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
164 					ereport(ERROR,
165 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
166 							 errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
167 									MAXENTRYPOS)));
168 				else
169 				{
170 					state = PHRASE_CLOSE;
171 					ptr = endptr;
172 				}
173 				break;
174 
175 			case PHRASE_CLOSE:
176 				if (t_iseq(ptr, '>'))
177 				{
178 					state = PHRASE_FINISH;
179 					ptr++;
180 				}
181 				else
182 					state = PHRASE_ERR;
183 				break;
184 
185 			case PHRASE_FINISH:
186 				*distance = (int16) l;
187 				return ptr;
188 
189 			case PHRASE_ERR:
190 			default:
191 				goto err;
192 		}
193 	}
194 
195 err:
196 	*distance = -1;
197 	return buf;
198 }
199 
200 /*
201  * token types for parsing
202  */
203 typedef enum
204 {
205 	PT_END = 0,
206 	PT_ERR = 1,
207 	PT_VAL = 2,
208 	PT_OPR = 3,
209 	PT_OPEN = 4,
210 	PT_CLOSE = 5
211 } ts_tokentype;
212 
213 /*
214  * get token from query string
215  *
216  * *operator is filled in with OP_* when return values is PT_OPR,
217  * but *weight could contain a distance value in case of phrase operator.
218  * *strval, *lenval and *weight are filled in when return value is PT_VAL
219  *
220  */
221 static ts_tokentype
gettoken_query(TSQueryParserState state,int8 * operator,int * lenval,char ** strval,int16 * weight,bool * prefix)222 gettoken_query(TSQueryParserState state,
223 			   int8 *operator,
224 			   int *lenval, char **strval, int16 *weight, bool *prefix)
225 {
226 	*weight = 0;
227 	*prefix = false;
228 
229 	while (1)
230 	{
231 		switch (state->state)
232 		{
233 			case WAITFIRSTOPERAND:
234 			case WAITOPERAND:
235 				if (t_iseq(state->buf, '!'))
236 				{
237 					(state->buf)++;		/* can safely ++, t_iseq guarantee
238 										 * that pg_mblen()==1 */
239 					*operator = OP_NOT;
240 					state->state = WAITOPERAND;
241 					return PT_OPR;
242 				}
243 				else if (t_iseq(state->buf, '('))
244 				{
245 					state->count++;
246 					(state->buf)++;
247 					state->state = WAITOPERAND;
248 					return PT_OPEN;
249 				}
250 				else if (t_iseq(state->buf, ':'))
251 				{
252 					ereport(ERROR,
253 							(errcode(ERRCODE_SYNTAX_ERROR),
254 							 errmsg("syntax error in tsquery: \"%s\"",
255 									state->buffer)));
256 				}
257 				else if (!t_isspace(state->buf))
258 				{
259 					/*
260 					 * We rely on the tsvector parser to parse the value for
261 					 * us
262 					 */
263 					reset_tsvector_parser(state->valstate, state->buf);
264 					if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
265 					{
266 						state->buf = get_modifiers(state->buf, weight, prefix);
267 						state->state = WAITOPERATOR;
268 						return PT_VAL;
269 					}
270 					else if (state->state == WAITFIRSTOPERAND)
271 						return PT_END;
272 					else
273 						ereport(ERROR,
274 								(errcode(ERRCODE_SYNTAX_ERROR),
275 								 errmsg("no operand in tsquery: \"%s\"",
276 										state->buffer)));
277 				}
278 				break;
279 			case WAITOPERATOR:
280 				if (t_iseq(state->buf, '&'))
281 				{
282 					state->state = WAITOPERAND;
283 					*operator = OP_AND;
284 					(state->buf)++;
285 					return PT_OPR;
286 				}
287 				else if (t_iseq(state->buf, '|'))
288 				{
289 					state->state = WAITOPERAND;
290 					*operator = OP_OR;
291 					(state->buf)++;
292 					return PT_OPR;
293 				}
294 				else if (t_iseq(state->buf, '<'))
295 				{
296 					state->state = WAITOPERAND;
297 					*operator = OP_PHRASE;
298 					/* weight var is used as storage for distance */
299 					state->buf = parse_phrase_operator(state->buf, weight);
300 					if (*weight < 0)
301 						return PT_ERR;
302 					return PT_OPR;
303 				}
304 				else if (t_iseq(state->buf, ')'))
305 				{
306 					(state->buf)++;
307 					state->count--;
308 					return (state->count < 0) ? PT_ERR : PT_CLOSE;
309 				}
310 				else if (*(state->buf) == '\0')
311 					return (state->count) ? PT_ERR : PT_END;
312 				else if (!t_isspace(state->buf))
313 					return PT_ERR;
314 				break;
315 			case WAITSINGLEOPERAND:
316 				if (*(state->buf) == '\0')
317 					return PT_END;
318 				*strval = state->buf;
319 				*lenval = strlen(state->buf);
320 				state->buf += strlen(state->buf);
321 				state->count++;
322 				return PT_VAL;
323 			default:
324 				return PT_ERR;
325 				break;
326 		}
327 		state->buf += pg_mblen(state->buf);
328 	}
329 }
330 
331 /*
332  * Push an operator to state->polstr
333  */
334 void
pushOperator(TSQueryParserState state,int8 oper,int16 distance)335 pushOperator(TSQueryParserState state, int8 oper, int16 distance)
336 {
337 	QueryOperator *tmp;
338 
339 	Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
340 
341 	tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
342 	tmp->type = QI_OPR;
343 	tmp->oper = oper;
344 	tmp->distance = (oper == OP_PHRASE) ? distance : 0;
345 	/* left is filled in later with findoprnd */
346 
347 	state->polstr = lcons(tmp, state->polstr);
348 }
349 
350 static void
pushValue_internal(TSQueryParserState state,pg_crc32 valcrc,int distance,int lenval,int weight,bool prefix)351 pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
352 {
353 	QueryOperand *tmp;
354 
355 	if (distance >= MAXSTRPOS)
356 		ereport(ERROR,
357 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
358 				 errmsg("value is too big in tsquery: \"%s\"",
359 						state->buffer)));
360 	if (lenval >= MAXSTRLEN)
361 		ereport(ERROR,
362 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
363 				 errmsg("operand is too long in tsquery: \"%s\"",
364 						state->buffer)));
365 
366 	tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
367 	tmp->type = QI_VAL;
368 	tmp->weight = weight;
369 	tmp->prefix = prefix;
370 	tmp->valcrc = (int32) valcrc;
371 	tmp->length = lenval;
372 	tmp->distance = distance;
373 
374 	state->polstr = lcons(tmp, state->polstr);
375 }
376 
377 /*
378  * Push an operand to state->polstr.
379  *
380  * strval must point to a string equal to state->curop. lenval is the length
381  * of the string.
382  */
383 void
pushValue(TSQueryParserState state,char * strval,int lenval,int16 weight,bool prefix)384 pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
385 {
386 	pg_crc32	valcrc;
387 
388 	if (lenval >= MAXSTRLEN)
389 		ereport(ERROR,
390 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
391 				 errmsg("word is too long in tsquery: \"%s\"",
392 						state->buffer)));
393 
394 	INIT_LEGACY_CRC32(valcrc);
395 	COMP_LEGACY_CRC32(valcrc, strval, lenval);
396 	FIN_LEGACY_CRC32(valcrc);
397 	pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
398 
399 	/* append the value string to state.op, enlarging buffer if needed first */
400 	while (state->curop - state->op + lenval + 1 >= state->lenop)
401 	{
402 		int			used = state->curop - state->op;
403 
404 		state->lenop *= 2;
405 		state->op = (char *) repalloc((void *) state->op, state->lenop);
406 		state->curop = state->op + used;
407 	}
408 	memcpy((void *) state->curop, (void *) strval, lenval);
409 	state->curop += lenval;
410 	*(state->curop) = '\0';
411 	state->curop++;
412 	state->sumlen += lenval + 1 /* \0 */ ;
413 }
414 
415 
416 /*
417  * Push a stopword placeholder to state->polstr
418  */
419 void
pushStop(TSQueryParserState state)420 pushStop(TSQueryParserState state)
421 {
422 	QueryOperand *tmp;
423 
424 	tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
425 	tmp->type = QI_VALSTOP;
426 
427 	state->polstr = lcons(tmp, state->polstr);
428 }
429 
430 
431 #define STACKDEPTH	32
432 
433 typedef struct OperatorElement
434 {
435 	int8		op;
436 	int16		distance;
437 } OperatorElement;
438 
439 static void
pushOpStack(OperatorElement * stack,int * lenstack,int8 op,int16 distance)440 pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
441 {
442 	if (*lenstack == STACKDEPTH)	/* internal error */
443 		elog(ERROR, "tsquery stack too small");
444 
445 	stack[*lenstack].op = op;
446 	stack[*lenstack].distance = distance;
447 
448 	(*lenstack)++;
449 }
450 
451 static void
cleanOpStack(TSQueryParserState state,OperatorElement * stack,int * lenstack,int8 op)452 cleanOpStack(TSQueryParserState state,
453 			 OperatorElement *stack, int *lenstack, int8 op)
454 {
455 	int			opPriority = OP_PRIORITY(op);
456 
457 	while (*lenstack)
458 	{
459 		/* NOT is right associative unlike to others */
460 		if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
461 		(op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
462 			break;
463 
464 		(*lenstack)--;
465 		pushOperator(state, stack[*lenstack].op,
466 					 stack[*lenstack].distance);
467 	}
468 }
469 
470 /*
471  * Make polish (prefix) notation of query.
472  *
473  * See parse_tsquery for explanation of pushval.
474  */
475 static void
makepol(TSQueryParserState state,PushFunction pushval,Datum opaque)476 makepol(TSQueryParserState state,
477 		PushFunction pushval,
478 		Datum opaque)
479 {
480 	int8		operator = 0;
481 	ts_tokentype type;
482 	int			lenval = 0;
483 	char	   *strval = NULL;
484 	OperatorElement opstack[STACKDEPTH];
485 	int			lenstack = 0;
486 	int16		weight = 0;
487 	bool		prefix;
488 
489 	/* since this function recurses, it could be driven to stack overflow */
490 	check_stack_depth();
491 
492 	while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
493 	{
494 		switch (type)
495 		{
496 			case PT_VAL:
497 				pushval(opaque, state, strval, lenval, weight, prefix);
498 				break;
499 			case PT_OPR:
500 				cleanOpStack(state, opstack, &lenstack, operator);
501 				pushOpStack(opstack, &lenstack, operator, weight);
502 				break;
503 			case PT_OPEN:
504 				makepol(state, pushval, opaque);
505 				break;
506 			case PT_CLOSE:
507 				cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
508 				return;
509 			case PT_ERR:
510 			default:
511 				ereport(ERROR,
512 						(errcode(ERRCODE_SYNTAX_ERROR),
513 						 errmsg("syntax error in tsquery: \"%s\"",
514 								state->buffer)));
515 		}
516 	}
517 
518 	cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
519 }
520 
521 static void
findoprnd_recurse(QueryItem * ptr,uint32 * pos,int nnodes,bool * needcleanup)522 findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
523 {
524 	/* since this function recurses, it could be driven to stack overflow. */
525 	check_stack_depth();
526 
527 	if (*pos >= nnodes)
528 		elog(ERROR, "malformed tsquery: operand not found");
529 
530 	if (ptr[*pos].type == QI_VAL)
531 	{
532 		(*pos)++;
533 	}
534 	else if (ptr[*pos].type == QI_VALSTOP)
535 	{
536 		*needcleanup = true;	/* we'll have to remove stop words */
537 		(*pos)++;
538 	}
539 	else
540 	{
541 		Assert(ptr[*pos].type == QI_OPR);
542 
543 		if (ptr[*pos].qoperator.oper == OP_NOT)
544 		{
545 			ptr[*pos].qoperator.left = 1;		/* fixed offset */
546 			(*pos)++;
547 
548 			/* process the only argument */
549 			findoprnd_recurse(ptr, pos, nnodes, needcleanup);
550 		}
551 		else
552 		{
553 			QueryOperator *curitem = &ptr[*pos].qoperator;
554 			int			tmp = *pos;		/* save current position */
555 
556 			Assert(curitem->oper == OP_AND ||
557 				   curitem->oper == OP_OR ||
558 				   curitem->oper == OP_PHRASE);
559 
560 			(*pos)++;
561 
562 			/* process RIGHT argument */
563 			findoprnd_recurse(ptr, pos, nnodes, needcleanup);
564 
565 			curitem->left = *pos - tmp; /* set LEFT arg's offset */
566 
567 			/* process LEFT argument */
568 			findoprnd_recurse(ptr, pos, nnodes, needcleanup);
569 		}
570 	}
571 }
572 
573 
574 /*
575  * Fill in the left-fields previously left unfilled.
576  * The input QueryItems must be in polish (prefix) notation.
577  * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
578  */
579 static void
findoprnd(QueryItem * ptr,int size,bool * needcleanup)580 findoprnd(QueryItem *ptr, int size, bool *needcleanup)
581 {
582 	uint32		pos;
583 
584 	*needcleanup = false;
585 	pos = 0;
586 	findoprnd_recurse(ptr, &pos, size, needcleanup);
587 
588 	if (pos != size)
589 		elog(ERROR, "malformed tsquery: extra nodes");
590 }
591 
592 
593 /*
594  * Each value (operand) in the query is passed to pushval. pushval can
595  * transform the simple value to an arbitrarily complex expression using
596  * pushValue and pushOperator. It must push a single value with pushValue,
597  * a complete expression with all operands, or a stopword placeholder
598  * with pushStop, otherwise the prefix notation representation will be broken,
599  * having an operator with no operand.
600  *
601  * opaque is passed on to pushval as is, pushval can use it to store its
602  * private state.
603  */
604 TSQuery
parse_tsquery(char * buf,PushFunction pushval,Datum opaque,bool isplain)605 parse_tsquery(char *buf,
606 			  PushFunction pushval,
607 			  Datum opaque,
608 			  bool isplain)
609 {
610 	struct TSQueryParserStateData state;
611 	int			i;
612 	TSQuery		query;
613 	int			commonlen;
614 	QueryItem  *ptr;
615 	ListCell   *cell;
616 	bool		needcleanup;
617 
618 	/* init state */
619 	state.buffer = buf;
620 	state.buf = buf;
621 	state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
622 	state.count = 0;
623 	state.polstr = NIL;
624 
625 	/* init value parser's state */
626 	state.valstate = init_tsvector_parser(state.buffer, true, true);
627 
628 	/* init list of operand */
629 	state.sumlen = 0;
630 	state.lenop = 64;
631 	state.curop = state.op = (char *) palloc(state.lenop);
632 	*(state.curop) = '\0';
633 
634 	/* parse query & make polish notation (postfix, but in reverse order) */
635 	makepol(&state, pushval, opaque);
636 
637 	close_tsvector_parser(state.valstate);
638 
639 	if (list_length(state.polstr) == 0)
640 	{
641 		ereport(NOTICE,
642 				(errmsg("text-search query doesn't contain lexemes: \"%s\"",
643 						state.buffer)));
644 		query = (TSQuery) palloc(HDRSIZETQ);
645 		SET_VARSIZE(query, HDRSIZETQ);
646 		query->size = 0;
647 		return query;
648 	}
649 
650 	if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
651 		ereport(ERROR,
652 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
653 				 errmsg("tsquery is too large")));
654 	commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
655 
656 	/* Pack the QueryItems in the final TSQuery struct to return to caller */
657 	query = (TSQuery) palloc0(commonlen);
658 	SET_VARSIZE(query, commonlen);
659 	query->size = list_length(state.polstr);
660 	ptr = GETQUERY(query);
661 
662 	/* Copy QueryItems to TSQuery */
663 	i = 0;
664 	foreach(cell, state.polstr)
665 	{
666 		QueryItem  *item = (QueryItem *) lfirst(cell);
667 
668 		switch (item->type)
669 		{
670 			case QI_VAL:
671 				memcpy(&ptr[i], item, sizeof(QueryOperand));
672 				break;
673 			case QI_VALSTOP:
674 				ptr[i].type = QI_VALSTOP;
675 				break;
676 			case QI_OPR:
677 				memcpy(&ptr[i], item, sizeof(QueryOperator));
678 				break;
679 			default:
680 				elog(ERROR, "unrecognized QueryItem type: %d", item->type);
681 		}
682 		i++;
683 	}
684 
685 	/* Copy all the operand strings to TSQuery */
686 	memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
687 	pfree(state.op);
688 
689 	/*
690 	 * Set left operand pointers for every operator.  While we're at it,
691 	 * detect whether there are any QI_VALSTOP nodes.
692 	 */
693 	findoprnd(ptr, query->size, &needcleanup);
694 
695 	/*
696 	 * If there are QI_VALSTOP nodes, delete them and simplify the tree.
697 	 */
698 	if (needcleanup)
699 		query = cleanup_tsquery_stopwords(query);
700 
701 	return query;
702 }
703 
704 static void
pushval_asis(Datum opaque,TSQueryParserState state,char * strval,int lenval,int16 weight,bool prefix)705 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
706 			 int16 weight, bool prefix)
707 {
708 	pushValue(state, strval, lenval, weight, prefix);
709 }
710 
711 /*
712  * in without morphology
713  */
714 Datum
tsqueryin(PG_FUNCTION_ARGS)715 tsqueryin(PG_FUNCTION_ARGS)
716 {
717 	char	   *in = PG_GETARG_CSTRING(0);
718 
719 	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
720 }
721 
722 /*
723  * out function
724  */
725 typedef struct
726 {
727 	QueryItem  *curpol;
728 	char	   *buf;
729 	char	   *cur;
730 	char	   *op;
731 	int			buflen;
732 } INFIX;
733 
734 /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
735 #define RESIZEBUF(inf, addsize) \
736 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
737 { \
738 	int len = (inf)->cur - (inf)->buf; \
739 	(inf)->buflen *= 2; \
740 	(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
741 	(inf)->cur = (inf)->buf + len; \
742 }
743 
744 /*
745  * recursively traverse the tree and
746  * print it in infix (human-readable) form
747  */
748 static void
infix(INFIX * in,int parentPriority,bool rightPhraseOp)749 infix(INFIX *in, int parentPriority, bool rightPhraseOp)
750 {
751 	/* since this function recurses, it could be driven to stack overflow. */
752 	check_stack_depth();
753 
754 	if (in->curpol->type == QI_VAL)
755 	{
756 		QueryOperand *curpol = &in->curpol->qoperand;
757 		char	   *op = in->op + curpol->distance;
758 		int			clen;
759 
760 		RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
761 		*(in->cur) = '\'';
762 		in->cur++;
763 		while (*op)
764 		{
765 			if (t_iseq(op, '\''))
766 			{
767 				*(in->cur) = '\'';
768 				in->cur++;
769 			}
770 			else if (t_iseq(op, '\\'))
771 			{
772 				*(in->cur) = '\\';
773 				in->cur++;
774 			}
775 			COPYCHAR(in->cur, op);
776 
777 			clen = pg_mblen(op);
778 			op += clen;
779 			in->cur += clen;
780 		}
781 		*(in->cur) = '\'';
782 		in->cur++;
783 		if (curpol->weight || curpol->prefix)
784 		{
785 			*(in->cur) = ':';
786 			in->cur++;
787 			if (curpol->prefix)
788 			{
789 				*(in->cur) = '*';
790 				in->cur++;
791 			}
792 			if (curpol->weight & (1 << 3))
793 			{
794 				*(in->cur) = 'A';
795 				in->cur++;
796 			}
797 			if (curpol->weight & (1 << 2))
798 			{
799 				*(in->cur) = 'B';
800 				in->cur++;
801 			}
802 			if (curpol->weight & (1 << 1))
803 			{
804 				*(in->cur) = 'C';
805 				in->cur++;
806 			}
807 			if (curpol->weight & 1)
808 			{
809 				*(in->cur) = 'D';
810 				in->cur++;
811 			}
812 		}
813 		*(in->cur) = '\0';
814 		in->curpol++;
815 	}
816 	else if (in->curpol->qoperator.oper == OP_NOT)
817 	{
818 		int			priority = QO_PRIORITY(in->curpol);
819 
820 		if (priority < parentPriority)
821 		{
822 			RESIZEBUF(in, 2);
823 			sprintf(in->cur, "( ");
824 			in->cur = strchr(in->cur, '\0');
825 		}
826 		RESIZEBUF(in, 1);
827 		*(in->cur) = '!';
828 		in->cur++;
829 		*(in->cur) = '\0';
830 		in->curpol++;
831 
832 		infix(in, priority, false);
833 		if (priority < parentPriority)
834 		{
835 			RESIZEBUF(in, 2);
836 			sprintf(in->cur, " )");
837 			in->cur = strchr(in->cur, '\0');
838 		}
839 	}
840 	else
841 	{
842 		int8		op = in->curpol->qoperator.oper;
843 		int			priority = QO_PRIORITY(in->curpol);
844 		int16		distance = in->curpol->qoperator.distance;
845 		INFIX		nrm;
846 		bool		needParenthesis = false;
847 
848 		in->curpol++;
849 		if (priority < parentPriority ||
850 		/* phrase operator depends on order */
851 			(op == OP_PHRASE && rightPhraseOp))
852 		{
853 			needParenthesis = true;
854 			RESIZEBUF(in, 2);
855 			sprintf(in->cur, "( ");
856 			in->cur = strchr(in->cur, '\0');
857 		}
858 
859 		nrm.curpol = in->curpol;
860 		nrm.op = in->op;
861 		nrm.buflen = 16;
862 		nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
863 
864 		/* get right operand */
865 		infix(&nrm, priority, (op == OP_PHRASE));
866 
867 		/* get & print left operand */
868 		in->curpol = nrm.curpol;
869 		infix(in, priority, false);
870 
871 		/* print operator & right operand */
872 		RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
873 		switch (op)
874 		{
875 			case OP_OR:
876 				sprintf(in->cur, " | %s", nrm.buf);
877 				break;
878 			case OP_AND:
879 				sprintf(in->cur, " & %s", nrm.buf);
880 				break;
881 			case OP_PHRASE:
882 				if (distance != 1)
883 					sprintf(in->cur, " <%d> %s", distance, nrm.buf);
884 				else
885 					sprintf(in->cur, " <-> %s", nrm.buf);
886 				break;
887 			default:
888 				/* OP_NOT is handled in above if-branch */
889 				elog(ERROR, "unrecognized operator type: %d", op);
890 		}
891 		in->cur = strchr(in->cur, '\0');
892 		pfree(nrm.buf);
893 
894 		if (needParenthesis)
895 		{
896 			RESIZEBUF(in, 2);
897 			sprintf(in->cur, " )");
898 			in->cur = strchr(in->cur, '\0');
899 		}
900 	}
901 }
902 
903 Datum
tsqueryout(PG_FUNCTION_ARGS)904 tsqueryout(PG_FUNCTION_ARGS)
905 {
906 	TSQuery		query = PG_GETARG_TSQUERY(0);
907 	INFIX		nrm;
908 
909 	if (query->size == 0)
910 	{
911 		char	   *b = palloc(1);
912 
913 		*b = '\0';
914 		PG_RETURN_POINTER(b);
915 	}
916 	nrm.curpol = GETQUERY(query);
917 	nrm.buflen = 32;
918 	nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
919 	*(nrm.cur) = '\0';
920 	nrm.op = GETOPERAND(query);
921 	infix(&nrm, -1 /* lowest priority */ , false);
922 
923 	PG_FREE_IF_COPY(query, 0);
924 	PG_RETURN_CSTRING(nrm.buf);
925 }
926 
927 /*
928  * Binary Input / Output functions. The binary format is as follows:
929  *
930  * uint32	 number of operators/operands in the query
931  *
932  * Followed by the operators and operands, in prefix notation. For each
933  * operand:
934  *
935  * uint8	type, QI_VAL
936  * uint8	weight
937  *			operand text in client encoding, null-terminated
938  * uint8	prefix
939  *
940  * For each operator:
941  * uint8	type, QI_OPR
942  * uint8	operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
943  * uint16	distance (only for OP_PHRASE)
944  */
945 Datum
tsquerysend(PG_FUNCTION_ARGS)946 tsquerysend(PG_FUNCTION_ARGS)
947 {
948 	TSQuery		query = PG_GETARG_TSQUERY(0);
949 	StringInfoData buf;
950 	int			i;
951 	QueryItem  *item = GETQUERY(query);
952 
953 	pq_begintypsend(&buf);
954 
955 	pq_sendint(&buf, query->size, sizeof(uint32));
956 	for (i = 0; i < query->size; i++)
957 	{
958 		pq_sendint(&buf, item->type, sizeof(item->type));
959 
960 		switch (item->type)
961 		{
962 			case QI_VAL:
963 				pq_sendint(&buf, item->qoperand.weight, sizeof(uint8));
964 				pq_sendint(&buf, item->qoperand.prefix, sizeof(uint8));
965 				pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
966 				break;
967 			case QI_OPR:
968 				pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
969 				if (item->qoperator.oper == OP_PHRASE)
970 					pq_sendint(&buf, item->qoperator.distance,
971 							   sizeof(item->qoperator.distance));
972 				break;
973 			default:
974 				elog(ERROR, "unrecognized tsquery node type: %d", item->type);
975 		}
976 		item++;
977 	}
978 
979 	PG_FREE_IF_COPY(query, 0);
980 
981 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
982 }
983 
984 Datum
tsqueryrecv(PG_FUNCTION_ARGS)985 tsqueryrecv(PG_FUNCTION_ARGS)
986 {
987 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
988 	TSQuery		query;
989 	int			i,
990 				len;
991 	QueryItem  *item;
992 	int			datalen;
993 	char	   *ptr;
994 	uint32		size;
995 	const char **operands;
996 	bool		needcleanup;
997 
998 	size = pq_getmsgint(buf, sizeof(uint32));
999 	if (size > (MaxAllocSize / sizeof(QueryItem)))
1000 		elog(ERROR, "invalid size of tsquery");
1001 
1002 	/* Allocate space to temporarily hold operand strings */
1003 	operands = palloc(size * sizeof(char *));
1004 
1005 	/* Allocate space for all the QueryItems. */
1006 	len = HDRSIZETQ + sizeof(QueryItem) * size;
1007 	query = (TSQuery) palloc0(len);
1008 	query->size = size;
1009 	item = GETQUERY(query);
1010 
1011 	datalen = 0;
1012 	for (i = 0; i < size; i++)
1013 	{
1014 		item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1015 
1016 		if (item->type == QI_VAL)
1017 		{
1018 			size_t		val_len;	/* length after recoding to server encoding */
1019 			uint8		weight;
1020 			uint8		prefix;
1021 			const char *val;
1022 			pg_crc32	valcrc;
1023 
1024 			weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
1025 			prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
1026 			val = pq_getmsgstring(buf);
1027 			val_len = strlen(val);
1028 
1029 			/* Sanity checks */
1030 
1031 			if (weight > 0xF)
1032 				elog(ERROR, "invalid tsquery: invalid weight bitmap");
1033 
1034 			if (val_len > MAXSTRLEN)
1035 				elog(ERROR, "invalid tsquery: operand too long");
1036 
1037 			if (datalen > MAXSTRPOS)
1038 				elog(ERROR, "invalid tsquery: total operand length exceeded");
1039 
1040 			/* Looks valid. */
1041 
1042 			INIT_LEGACY_CRC32(valcrc);
1043 			COMP_LEGACY_CRC32(valcrc, val, val_len);
1044 			FIN_LEGACY_CRC32(valcrc);
1045 
1046 			item->qoperand.weight = weight;
1047 			item->qoperand.prefix = (prefix) ? true : false;
1048 			item->qoperand.valcrc = (int32) valcrc;
1049 			item->qoperand.length = val_len;
1050 			item->qoperand.distance = datalen;
1051 
1052 			/*
1053 			 * Operand strings are copied to the final struct after this loop;
1054 			 * here we just collect them to an array
1055 			 */
1056 			operands[i] = val;
1057 
1058 			datalen += val_len + 1;		/* + 1 for the '\0' terminator */
1059 		}
1060 		else if (item->type == QI_OPR)
1061 		{
1062 			int8		oper;
1063 
1064 			oper = (int8) pq_getmsgint(buf, sizeof(int8));
1065 			if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
1066 				elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1067 					 (int) oper);
1068 			if (i == size - 1)
1069 				elog(ERROR, "invalid pointer to right operand");
1070 
1071 			item->qoperator.oper = oper;
1072 			if (oper == OP_PHRASE)
1073 				item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1074 		}
1075 		else
1076 			elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1077 
1078 		item++;
1079 	}
1080 
1081 	/* Enlarge buffer to make room for the operand values. */
1082 	query = (TSQuery) repalloc(query, len + datalen);
1083 	item = GETQUERY(query);
1084 	ptr = GETOPERAND(query);
1085 
1086 	/*
1087 	 * Fill in the left-pointers. Checks that the tree is well-formed as a
1088 	 * side-effect.
1089 	 */
1090 	findoprnd(item, size, &needcleanup);
1091 
1092 	/* Can't have found any QI_VALSTOP nodes */
1093 	Assert(!needcleanup);
1094 
1095 	/* Copy operands to output struct */
1096 	for (i = 0; i < size; i++)
1097 	{
1098 		if (item->type == QI_VAL)
1099 		{
1100 			memcpy(ptr, operands[i], item->qoperand.length + 1);
1101 			ptr += item->qoperand.length + 1;
1102 		}
1103 		item++;
1104 	}
1105 
1106 	pfree(operands);
1107 
1108 	Assert(ptr - GETOPERAND(query) == datalen);
1109 
1110 	SET_VARSIZE(query, len + datalen);
1111 
1112 	PG_RETURN_TSQUERY(query);
1113 }
1114 
1115 /*
1116  * debug function, used only for view query
1117  * which will be executed in non-leaf pages in index
1118  */
1119 Datum
tsquerytree(PG_FUNCTION_ARGS)1120 tsquerytree(PG_FUNCTION_ARGS)
1121 {
1122 	TSQuery		query = PG_GETARG_TSQUERY(0);
1123 	INFIX		nrm;
1124 	text	   *res;
1125 	QueryItem  *q;
1126 	int			len;
1127 
1128 	if (query->size == 0)
1129 	{
1130 		res = (text *) palloc(VARHDRSZ);
1131 		SET_VARSIZE(res, VARHDRSZ);
1132 		PG_RETURN_POINTER(res);
1133 	}
1134 
1135 	q = clean_NOT(GETQUERY(query), &len);
1136 
1137 	if (!q)
1138 	{
1139 		res = cstring_to_text("T");
1140 	}
1141 	else
1142 	{
1143 		nrm.curpol = q;
1144 		nrm.buflen = 32;
1145 		nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1146 		*(nrm.cur) = '\0';
1147 		nrm.op = GETOPERAND(query);
1148 		infix(&nrm, -1, false);
1149 		res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
1150 		pfree(q);
1151 	}
1152 
1153 	PG_FREE_IF_COPY(query, 0);
1154 
1155 	PG_RETURN_TEXT_P(res);
1156 }
1157