1 /*-------------------------------------------------------------------------
2  *
3  * tsvector_op.c
4  *	  operations over tsvector
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsvector_op.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 
18 #include "access/htup_details.h"
19 #include "catalog/namespace.h"
20 #include "catalog/pg_type.h"
21 #include "commands/trigger.h"
22 #include "executor/spi.h"
23 #include "funcapi.h"
24 #include "lib/qunique.h"
25 #include "mb/pg_wchar.h"
26 #include "miscadmin.h"
27 #include "parser/parse_coerce.h"
28 #include "tsearch/ts_utils.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/lsyscache.h"
32 #include "utils/regproc.h"
33 #include "utils/rel.h"
34 
35 
36 typedef struct
37 {
38 	WordEntry  *arrb;
39 	WordEntry  *arre;
40 	char	   *values;
41 	char	   *operand;
42 } CHKVAL;
43 
44 
45 typedef struct StatEntry
46 {
47 	uint32		ndoc;			/* zero indicates that we were already here
48 								 * while walking through the tree */
49 	uint32		nentry;
50 	struct StatEntry *left;
51 	struct StatEntry *right;
52 	uint32		lenlexeme;
53 	char		lexeme[FLEXIBLE_ARRAY_MEMBER];
54 } StatEntry;
55 
56 #define STATENTRYHDRSZ	(offsetof(StatEntry, lexeme))
57 
58 typedef struct
59 {
60 	int32		weight;
61 
62 	uint32		maxdepth;
63 
64 	StatEntry **stack;
65 	uint32		stackpos;
66 
67 	StatEntry  *root;
68 } TSVectorStat;
69 
70 
71 static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72 										 uint32 flags,
73 										 TSExecuteCallback chkcond);
74 static int	tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
75 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
76 
77 
78 /*
79  * Order: haspos, len, word, for all positions (pos, weight)
80  */
81 static int
silly_cmp_tsvector(const TSVector a,const TSVector b)82 silly_cmp_tsvector(const TSVector a, const TSVector b)
83 {
84 	if (VARSIZE(a) < VARSIZE(b))
85 		return -1;
86 	else if (VARSIZE(a) > VARSIZE(b))
87 		return 1;
88 	else if (a->size < b->size)
89 		return -1;
90 	else if (a->size > b->size)
91 		return 1;
92 	else
93 	{
94 		WordEntry  *aptr = ARRPTR(a);
95 		WordEntry  *bptr = ARRPTR(b);
96 		int			i = 0;
97 		int			res;
98 
99 
100 		for (i = 0; i < a->size; i++)
101 		{
102 			if (aptr->haspos != bptr->haspos)
103 			{
104 				return (aptr->haspos > bptr->haspos) ? -1 : 1;
105 			}
106 			else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
107 			{
108 				return res;
109 			}
110 			else if (aptr->haspos)
111 			{
112 				WordEntryPos *ap = POSDATAPTR(a, aptr);
113 				WordEntryPos *bp = POSDATAPTR(b, bptr);
114 				int			j;
115 
116 				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
117 					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
118 
119 				for (j = 0; j < POSDATALEN(a, aptr); j++)
120 				{
121 					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
122 					{
123 						return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
124 					}
125 					else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
126 					{
127 						return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
128 					}
129 					ap++, bp++;
130 				}
131 			}
132 
133 			aptr++;
134 			bptr++;
135 		}
136 	}
137 
138 	return 0;
139 }
140 
141 #define TSVECTORCMPFUNC( type, action, ret )			\
142 Datum													\
143 tsvector_##type(PG_FUNCTION_ARGS)						\
144 {														\
145 	TSVector	a = PG_GETARG_TSVECTOR(0);				\
146 	TSVector	b = PG_GETARG_TSVECTOR(1);				\
147 	int			res = silly_cmp_tsvector(a, b);			\
148 	PG_FREE_IF_COPY(a,0);								\
149 	PG_FREE_IF_COPY(b,1);								\
150 	PG_RETURN_##ret( res action 0 );					\
151 }	\
152 /* keep compiler quiet - no extra ; */					\
153 extern int no_such_variable
154 
155 TSVECTORCMPFUNC(lt, <, BOOL);
156 TSVECTORCMPFUNC(le, <=, BOOL);
157 TSVECTORCMPFUNC(eq, ==, BOOL);
158 TSVECTORCMPFUNC(ge, >=, BOOL);
159 TSVECTORCMPFUNC(gt, >, BOOL);
160 TSVECTORCMPFUNC(ne, !=, BOOL);
161 TSVECTORCMPFUNC(cmp, +, INT32);
162 
163 Datum
tsvector_strip(PG_FUNCTION_ARGS)164 tsvector_strip(PG_FUNCTION_ARGS)
165 {
166 	TSVector	in = PG_GETARG_TSVECTOR(0);
167 	TSVector	out;
168 	int			i,
169 				len = 0;
170 	WordEntry  *arrin = ARRPTR(in),
171 			   *arrout;
172 	char	   *cur;
173 
174 	for (i = 0; i < in->size; i++)
175 		len += arrin[i].len;
176 
177 	len = CALCDATASIZE(in->size, len);
178 	out = (TSVector) palloc0(len);
179 	SET_VARSIZE(out, len);
180 	out->size = in->size;
181 	arrout = ARRPTR(out);
182 	cur = STRPTR(out);
183 	for (i = 0; i < in->size; i++)
184 	{
185 		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
186 		arrout[i].haspos = 0;
187 		arrout[i].len = arrin[i].len;
188 		arrout[i].pos = cur - STRPTR(out);
189 		cur += arrout[i].len;
190 	}
191 
192 	PG_FREE_IF_COPY(in, 0);
193 	PG_RETURN_POINTER(out);
194 }
195 
196 Datum
tsvector_length(PG_FUNCTION_ARGS)197 tsvector_length(PG_FUNCTION_ARGS)
198 {
199 	TSVector	in = PG_GETARG_TSVECTOR(0);
200 	int32		ret = in->size;
201 
202 	PG_FREE_IF_COPY(in, 0);
203 	PG_RETURN_INT32(ret);
204 }
205 
206 Datum
tsvector_setweight(PG_FUNCTION_ARGS)207 tsvector_setweight(PG_FUNCTION_ARGS)
208 {
209 	TSVector	in = PG_GETARG_TSVECTOR(0);
210 	char		cw = PG_GETARG_CHAR(1);
211 	TSVector	out;
212 	int			i,
213 				j;
214 	WordEntry  *entry;
215 	WordEntryPos *p;
216 	int			w = 0;
217 
218 	switch (cw)
219 	{
220 		case 'A':
221 		case 'a':
222 			w = 3;
223 			break;
224 		case 'B':
225 		case 'b':
226 			w = 2;
227 			break;
228 		case 'C':
229 		case 'c':
230 			w = 1;
231 			break;
232 		case 'D':
233 		case 'd':
234 			w = 0;
235 			break;
236 		default:
237 			/* internal error */
238 			elog(ERROR, "unrecognized weight: %d", cw);
239 	}
240 
241 	out = (TSVector) palloc(VARSIZE(in));
242 	memcpy(out, in, VARSIZE(in));
243 	entry = ARRPTR(out);
244 	i = out->size;
245 	while (i--)
246 	{
247 		if ((j = POSDATALEN(out, entry)) != 0)
248 		{
249 			p = POSDATAPTR(out, entry);
250 			while (j--)
251 			{
252 				WEP_SETWEIGHT(*p, w);
253 				p++;
254 			}
255 		}
256 		entry++;
257 	}
258 
259 	PG_FREE_IF_COPY(in, 0);
260 	PG_RETURN_POINTER(out);
261 }
262 
263 /*
264  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
265  *
266  * Assign weight w to elements of tsin that are listed in lexemes.
267  */
268 Datum
tsvector_setweight_by_filter(PG_FUNCTION_ARGS)269 tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
270 {
271 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
272 	char		char_weight = PG_GETARG_CHAR(1);
273 	ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
274 
275 	TSVector	tsout;
276 	int			i,
277 				j,
278 				nlexemes,
279 				weight;
280 	WordEntry  *entry;
281 	Datum	   *dlexemes;
282 	bool	   *nulls;
283 
284 	switch (char_weight)
285 	{
286 		case 'A':
287 		case 'a':
288 			weight = 3;
289 			break;
290 		case 'B':
291 		case 'b':
292 			weight = 2;
293 			break;
294 		case 'C':
295 		case 'c':
296 			weight = 1;
297 			break;
298 		case 'D':
299 		case 'd':
300 			weight = 0;
301 			break;
302 		default:
303 			/* internal error */
304 			elog(ERROR, "unrecognized weight: %c", char_weight);
305 	}
306 
307 	tsout = (TSVector) palloc(VARSIZE(tsin));
308 	memcpy(tsout, tsin, VARSIZE(tsin));
309 	entry = ARRPTR(tsout);
310 
311 	deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
312 					  &dlexemes, &nulls, &nlexemes);
313 
314 	/*
315 	 * Assuming that lexemes array is significantly shorter than tsvector we
316 	 * can iterate through lexemes performing binary search of each lexeme
317 	 * from lexemes in tsvector.
318 	 */
319 	for (i = 0; i < nlexemes; i++)
320 	{
321 		char	   *lex;
322 		int			lex_len,
323 					lex_pos;
324 
325 		if (nulls[i])
326 			ereport(ERROR,
327 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
328 					 errmsg("lexeme array may not contain nulls")));
329 
330 		lex = VARDATA(dlexemes[i]);
331 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
332 		lex_pos = tsvector_bsearch(tsout, lex, lex_len);
333 
334 		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
335 		{
336 			WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
337 
338 			while (j--)
339 			{
340 				WEP_SETWEIGHT(*p, weight);
341 				p++;
342 			}
343 		}
344 	}
345 
346 	PG_FREE_IF_COPY(tsin, 0);
347 	PG_FREE_IF_COPY(lexemes, 2);
348 
349 	PG_RETURN_POINTER(tsout);
350 }
351 
352 #define compareEntry(pa, a, pb, b) \
353 	tsCompareString((pa) + (a)->pos, (a)->len,	\
354 					(pb) + (b)->pos, (b)->len,	\
355 					false)
356 
357 /*
358  * Add positions from src to dest after offsetting them by maxpos.
359  * Return the number added (might be less than expected due to overflow)
360  */
361 static int32
add_pos(TSVector src,WordEntry * srcptr,TSVector dest,WordEntry * destptr,int32 maxpos)362 add_pos(TSVector src, WordEntry *srcptr,
363 		TSVector dest, WordEntry *destptr,
364 		int32 maxpos)
365 {
366 	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
367 	int			i;
368 	uint16		slen = POSDATALEN(src, srcptr),
369 				startlen;
370 	WordEntryPos *spos = POSDATAPTR(src, srcptr),
371 			   *dpos = POSDATAPTR(dest, destptr);
372 
373 	if (!destptr->haspos)
374 		*clen = 0;
375 
376 	startlen = *clen;
377 	for (i = 0;
378 		 i < slen && *clen < MAXNUMPOS &&
379 		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
380 		 i++)
381 	{
382 		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
383 		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
384 		(*clen)++;
385 	}
386 
387 	if (*clen != startlen)
388 		destptr->haspos = 1;
389 	return *clen - startlen;
390 }
391 
392 /*
393  * Perform binary search of given lexeme in TSVector.
394  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
395  * found.
396  */
397 static int
tsvector_bsearch(const TSVector tsv,char * lexeme,int lexeme_len)398 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
399 {
400 	WordEntry  *arrin = ARRPTR(tsv);
401 	int			StopLow = 0,
402 				StopHigh = tsv->size,
403 				StopMiddle,
404 				cmp;
405 
406 	while (StopLow < StopHigh)
407 	{
408 		StopMiddle = (StopLow + StopHigh) / 2;
409 
410 		cmp = tsCompareString(lexeme, lexeme_len,
411 							  STRPTR(tsv) + arrin[StopMiddle].pos,
412 							  arrin[StopMiddle].len,
413 							  false);
414 
415 		if (cmp < 0)
416 			StopHigh = StopMiddle;
417 		else if (cmp > 0)
418 			StopLow = StopMiddle + 1;
419 		else					/* found it */
420 			return StopMiddle;
421 	}
422 
423 	return -1;
424 }
425 
426 /*
427  * qsort comparator functions
428  */
429 
430 static int
compare_int(const void * va,const void * vb)431 compare_int(const void *va, const void *vb)
432 {
433 	int			a = *((const int *) va);
434 	int			b = *((const int *) vb);
435 
436 	if (a == b)
437 		return 0;
438 	return (a > b) ? 1 : -1;
439 }
440 
441 static int
compare_text_lexemes(const void * va,const void * vb)442 compare_text_lexemes(const void *va, const void *vb)
443 {
444 	Datum		a = *((const Datum *) va);
445 	Datum		b = *((const Datum *) vb);
446 	char	   *alex = VARDATA_ANY(a);
447 	int			alex_len = VARSIZE_ANY_EXHDR(a);
448 	char	   *blex = VARDATA_ANY(b);
449 	int			blex_len = VARSIZE_ANY_EXHDR(b);
450 
451 	return tsCompareString(alex, alex_len, blex, blex_len, false);
452 }
453 
454 /*
455  * Internal routine to delete lexemes from TSVector by array of offsets.
456  *
457  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
458  * int indices_count -- size of that array
459  *
460  * Returns new TSVector without given lexemes along with their positions
461  * and weights.
462  */
463 static TSVector
tsvector_delete_by_indices(TSVector tsv,int * indices_to_delete,int indices_count)464 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
465 						   int indices_count)
466 {
467 	TSVector	tsout;
468 	WordEntry  *arrin = ARRPTR(tsv),
469 			   *arrout;
470 	char	   *data = STRPTR(tsv),
471 			   *dataout;
472 	int			i,				/* index in arrin */
473 				j,				/* index in arrout */
474 				k,				/* index in indices_to_delete */
475 				curoff;			/* index in dataout area */
476 
477 	/*
478 	 * Sort the filter array to simplify membership checks below.  Also, get
479 	 * rid of any duplicate entries, so that we can assume that indices_count
480 	 * is exactly equal to the number of lexemes that will be removed.
481 	 */
482 	if (indices_count > 1)
483 	{
484 		qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
485 		indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
486 								compare_int);
487 	}
488 
489 	/*
490 	 * Here we overestimate tsout size, since we don't know how much space is
491 	 * used by the deleted lexeme(s).  We will set exact size below.
492 	 */
493 	tsout = (TSVector) palloc0(VARSIZE(tsv));
494 
495 	/* This count must be correct because STRPTR(tsout) relies on it. */
496 	tsout->size = tsv->size - indices_count;
497 
498 	/*
499 	 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
500 	 */
501 	arrout = ARRPTR(tsout);
502 	dataout = STRPTR(tsout);
503 	curoff = 0;
504 	for (i = j = k = 0; i < tsv->size; i++)
505 	{
506 		/*
507 		 * If current i is present in indices_to_delete, skip this lexeme.
508 		 * Since indices_to_delete is already sorted, we only need to check
509 		 * the current (k'th) entry.
510 		 */
511 		if (k < indices_count && i == indices_to_delete[k])
512 		{
513 			k++;
514 			continue;
515 		}
516 
517 		/* Copy lexeme and its positions and weights */
518 		memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
519 		arrout[j].haspos = arrin[i].haspos;
520 		arrout[j].len = arrin[i].len;
521 		arrout[j].pos = curoff;
522 		curoff += arrin[i].len;
523 		if (arrin[i].haspos)
524 		{
525 			int			len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
526 			+ sizeof(uint16);
527 
528 			curoff = SHORTALIGN(curoff);
529 			memcpy(dataout + curoff,
530 				   STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
531 				   len);
532 			curoff += len;
533 		}
534 
535 		j++;
536 	}
537 
538 	/*
539 	 * k should now be exactly equal to indices_count. If it isn't then the
540 	 * caller provided us with indices outside of [0, tsv->size) range and
541 	 * estimation of tsout's size is wrong.
542 	 */
543 	Assert(k == indices_count);
544 
545 	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
546 	return tsout;
547 }
548 
549 /*
550  * Delete given lexeme from tsvector.
551  * Implementation of user-level ts_delete(tsvector, text).
552  */
553 Datum
tsvector_delete_str(PG_FUNCTION_ARGS)554 tsvector_delete_str(PG_FUNCTION_ARGS)
555 {
556 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
557 				tsout;
558 	text	   *tlexeme = PG_GETARG_TEXT_PP(1);
559 	char	   *lexeme = VARDATA_ANY(tlexeme);
560 	int			lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
561 				skip_index;
562 
563 	if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
564 		PG_RETURN_POINTER(tsin);
565 
566 	tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
567 
568 	PG_FREE_IF_COPY(tsin, 0);
569 	PG_FREE_IF_COPY(tlexeme, 1);
570 	PG_RETURN_POINTER(tsout);
571 }
572 
573 /*
574  * Delete given array of lexemes from tsvector.
575  * Implementation of user-level ts_delete(tsvector, text[]).
576  */
577 Datum
tsvector_delete_arr(PG_FUNCTION_ARGS)578 tsvector_delete_arr(PG_FUNCTION_ARGS)
579 {
580 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
581 				tsout;
582 	ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
583 	int			i,
584 				nlex,
585 				skip_count,
586 			   *skip_indices;
587 	Datum	   *dlexemes;
588 	bool	   *nulls;
589 
590 	deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
591 					  &dlexemes, &nulls, &nlex);
592 
593 	/*
594 	 * In typical use case array of lexemes to delete is relatively small. So
595 	 * here we optimize things for that scenario: iterate through lexarr
596 	 * performing binary search of each lexeme from lexarr in tsvector.
597 	 */
598 	skip_indices = palloc0(nlex * sizeof(int));
599 	for (i = skip_count = 0; i < nlex; i++)
600 	{
601 		char	   *lex;
602 		int			lex_len,
603 					lex_pos;
604 
605 		if (nulls[i])
606 			ereport(ERROR,
607 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
608 					 errmsg("lexeme array may not contain nulls")));
609 
610 		lex = VARDATA(dlexemes[i]);
611 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
612 		lex_pos = tsvector_bsearch(tsin, lex, lex_len);
613 
614 		if (lex_pos >= 0)
615 			skip_indices[skip_count++] = lex_pos;
616 	}
617 
618 	tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
619 
620 	pfree(skip_indices);
621 	PG_FREE_IF_COPY(tsin, 0);
622 	PG_FREE_IF_COPY(lexemes, 1);
623 
624 	PG_RETURN_POINTER(tsout);
625 }
626 
627 /*
628  * Expand tsvector as table with following columns:
629  *	   lexeme: lexeme text
630  *	   positions: integer array of lexeme positions
631  *	   weights: char array of weights corresponding to positions
632  */
633 Datum
tsvector_unnest(PG_FUNCTION_ARGS)634 tsvector_unnest(PG_FUNCTION_ARGS)
635 {
636 	FuncCallContext *funcctx;
637 	TSVector	tsin;
638 
639 	if (SRF_IS_FIRSTCALL())
640 	{
641 		MemoryContext oldcontext;
642 		TupleDesc	tupdesc;
643 
644 		funcctx = SRF_FIRSTCALL_INIT();
645 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
646 
647 		tupdesc = CreateTemplateTupleDesc(3);
648 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
649 						   TEXTOID, -1, 0);
650 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
651 						   INT2ARRAYOID, -1, 0);
652 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
653 						   TEXTARRAYOID, -1, 0);
654 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
655 
656 		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
657 
658 		MemoryContextSwitchTo(oldcontext);
659 	}
660 
661 	funcctx = SRF_PERCALL_SETUP();
662 	tsin = (TSVector) funcctx->user_fctx;
663 
664 	if (funcctx->call_cntr < tsin->size)
665 	{
666 		WordEntry  *arrin = ARRPTR(tsin);
667 		char	   *data = STRPTR(tsin);
668 		HeapTuple	tuple;
669 		int			j,
670 					i = funcctx->call_cntr;
671 		bool		nulls[] = {false, false, false};
672 		Datum		values[3];
673 
674 		values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
675 
676 		if (arrin[i].haspos)
677 		{
678 			WordEntryPosVector *posv;
679 			Datum	   *positions;
680 			Datum	   *weights;
681 			char		weight;
682 
683 			/*
684 			 * Internally tsvector stores position and weight in the same
685 			 * uint16 (2 bits for weight, 14 for position). Here we extract
686 			 * that in two separate arrays.
687 			 */
688 			posv = _POSVECPTR(tsin, arrin + i);
689 			positions = palloc(posv->npos * sizeof(Datum));
690 			weights = palloc(posv->npos * sizeof(Datum));
691 			for (j = 0; j < posv->npos; j++)
692 			{
693 				positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
694 				weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
695 				weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
696 																	  1));
697 			}
698 
699 			values[1] = PointerGetDatum(construct_array(positions, posv->npos,
700 														INT2OID, 2, true, TYPALIGN_SHORT));
701 			values[2] = PointerGetDatum(construct_array(weights, posv->npos,
702 														TEXTOID, -1, false, TYPALIGN_INT));
703 		}
704 		else
705 		{
706 			nulls[1] = nulls[2] = true;
707 		}
708 
709 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
710 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
711 	}
712 	else
713 	{
714 		SRF_RETURN_DONE(funcctx);
715 	}
716 }
717 
718 /*
719  * Convert tsvector to array of lexemes.
720  */
721 Datum
tsvector_to_array(PG_FUNCTION_ARGS)722 tsvector_to_array(PG_FUNCTION_ARGS)
723 {
724 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
725 	WordEntry  *arrin = ARRPTR(tsin);
726 	Datum	   *elements;
727 	int			i;
728 	ArrayType  *array;
729 
730 	elements = palloc(tsin->size * sizeof(Datum));
731 
732 	for (i = 0; i < tsin->size; i++)
733 	{
734 		elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
735 															   arrin[i].len));
736 	}
737 
738 	array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
739 
740 	pfree(elements);
741 	PG_FREE_IF_COPY(tsin, 0);
742 	PG_RETURN_POINTER(array);
743 }
744 
745 /*
746  * Build tsvector from array of lexemes.
747  */
748 Datum
array_to_tsvector(PG_FUNCTION_ARGS)749 array_to_tsvector(PG_FUNCTION_ARGS)
750 {
751 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
752 	TSVector	tsout;
753 	Datum	   *dlexemes;
754 	WordEntry  *arrout;
755 	bool	   *nulls;
756 	int			nitems,
757 				i,
758 				tslen,
759 				datalen = 0;
760 	char	   *cur;
761 
762 	deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
763 
764 	/* Reject nulls (maybe we should just ignore them, instead?) */
765 	for (i = 0; i < nitems; i++)
766 	{
767 		if (nulls[i])
768 			ereport(ERROR,
769 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
770 					 errmsg("lexeme array may not contain nulls")));
771 	}
772 
773 	/* Sort and de-dup, because this is required for a valid tsvector. */
774 	if (nitems > 1)
775 	{
776 		qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
777 		nitems = qunique(dlexemes, nitems, sizeof(Datum),
778 						 compare_text_lexemes);
779 	}
780 
781 	/* Calculate space needed for surviving lexemes. */
782 	for (i = 0; i < nitems; i++)
783 		datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
784 	tslen = CALCDATASIZE(nitems, datalen);
785 
786 	/* Allocate and fill tsvector. */
787 	tsout = (TSVector) palloc0(tslen);
788 	SET_VARSIZE(tsout, tslen);
789 	tsout->size = nitems;
790 
791 	arrout = ARRPTR(tsout);
792 	cur = STRPTR(tsout);
793 	for (i = 0; i < nitems; i++)
794 	{
795 		char	   *lex = VARDATA(dlexemes[i]);
796 		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
797 
798 		memcpy(cur, lex, lex_len);
799 		arrout[i].haspos = 0;
800 		arrout[i].len = lex_len;
801 		arrout[i].pos = cur - STRPTR(tsout);
802 		cur += lex_len;
803 	}
804 
805 	PG_FREE_IF_COPY(v, 0);
806 	PG_RETURN_POINTER(tsout);
807 }
808 
809 /*
810  * ts_filter(): keep only lexemes with given weights in tsvector.
811  */
812 Datum
tsvector_filter(PG_FUNCTION_ARGS)813 tsvector_filter(PG_FUNCTION_ARGS)
814 {
815 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
816 				tsout;
817 	ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
818 	WordEntry  *arrin = ARRPTR(tsin),
819 			   *arrout;
820 	char	   *datain = STRPTR(tsin),
821 			   *dataout;
822 	Datum	   *dweights;
823 	bool	   *nulls;
824 	int			nweights;
825 	int			i,
826 				j;
827 	int			cur_pos = 0;
828 	char		mask = 0;
829 
830 	deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
831 					  &dweights, &nulls, &nweights);
832 
833 	for (i = 0; i < nweights; i++)
834 	{
835 		char		char_weight;
836 
837 		if (nulls[i])
838 			ereport(ERROR,
839 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
840 					 errmsg("weight array may not contain nulls")));
841 
842 		char_weight = DatumGetChar(dweights[i]);
843 		switch (char_weight)
844 		{
845 			case 'A':
846 			case 'a':
847 				mask = mask | 8;
848 				break;
849 			case 'B':
850 			case 'b':
851 				mask = mask | 4;
852 				break;
853 			case 'C':
854 			case 'c':
855 				mask = mask | 2;
856 				break;
857 			case 'D':
858 			case 'd':
859 				mask = mask | 1;
860 				break;
861 			default:
862 				ereport(ERROR,
863 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
864 						 errmsg("unrecognized weight: \"%c\"", char_weight)));
865 		}
866 	}
867 
868 	tsout = (TSVector) palloc0(VARSIZE(tsin));
869 	tsout->size = tsin->size;
870 	arrout = ARRPTR(tsout);
871 	dataout = STRPTR(tsout);
872 
873 	for (i = j = 0; i < tsin->size; i++)
874 	{
875 		WordEntryPosVector *posvin,
876 				   *posvout;
877 		int			npos = 0;
878 		int			k;
879 
880 		if (!arrin[i].haspos)
881 			continue;
882 
883 		posvin = _POSVECPTR(tsin, arrin + i);
884 		posvout = (WordEntryPosVector *)
885 			(dataout + SHORTALIGN(cur_pos + arrin[i].len));
886 
887 		for (k = 0; k < posvin->npos; k++)
888 		{
889 			if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
890 				posvout->pos[npos++] = posvin->pos[k];
891 		}
892 
893 		/* if no satisfactory positions found, skip lexeme */
894 		if (!npos)
895 			continue;
896 
897 		arrout[j].haspos = true;
898 		arrout[j].len = arrin[i].len;
899 		arrout[j].pos = cur_pos;
900 
901 		memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
902 		posvout->npos = npos;
903 		cur_pos += SHORTALIGN(arrin[i].len);
904 		cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
905 			sizeof(uint16);
906 		j++;
907 	}
908 
909 	tsout->size = j;
910 	if (dataout != STRPTR(tsout))
911 		memmove(STRPTR(tsout), dataout, cur_pos);
912 
913 	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
914 
915 	PG_FREE_IF_COPY(tsin, 0);
916 	PG_RETURN_POINTER(tsout);
917 }
918 
919 Datum
tsvector_concat(PG_FUNCTION_ARGS)920 tsvector_concat(PG_FUNCTION_ARGS)
921 {
922 	TSVector	in1 = PG_GETARG_TSVECTOR(0);
923 	TSVector	in2 = PG_GETARG_TSVECTOR(1);
924 	TSVector	out;
925 	WordEntry  *ptr;
926 	WordEntry  *ptr1,
927 			   *ptr2;
928 	WordEntryPos *p;
929 	int			maxpos = 0,
930 				i,
931 				j,
932 				i1,
933 				i2,
934 				dataoff,
935 				output_bytes,
936 				output_size;
937 	char	   *data,
938 			   *data1,
939 			   *data2;
940 
941 	/* Get max position in in1; we'll need this to offset in2's positions */
942 	ptr = ARRPTR(in1);
943 	i = in1->size;
944 	while (i--)
945 	{
946 		if ((j = POSDATALEN(in1, ptr)) != 0)
947 		{
948 			p = POSDATAPTR(in1, ptr);
949 			while (j--)
950 			{
951 				if (WEP_GETPOS(*p) > maxpos)
952 					maxpos = WEP_GETPOS(*p);
953 				p++;
954 			}
955 		}
956 		ptr++;
957 	}
958 
959 	ptr1 = ARRPTR(in1);
960 	ptr2 = ARRPTR(in2);
961 	data1 = STRPTR(in1);
962 	data2 = STRPTR(in2);
963 	i1 = in1->size;
964 	i2 = in2->size;
965 
966 	/*
967 	 * Conservative estimate of space needed.  We might need all the data in
968 	 * both inputs, and conceivably add a pad byte before position data for
969 	 * each item where there was none before.
970 	 */
971 	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
972 
973 	out = (TSVector) palloc0(output_bytes);
974 	SET_VARSIZE(out, output_bytes);
975 
976 	/*
977 	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
978 	 * collapse out any unused space at the end.
979 	 */
980 	out->size = in1->size + in2->size;
981 
982 	ptr = ARRPTR(out);
983 	data = STRPTR(out);
984 	dataoff = 0;
985 	while (i1 && i2)
986 	{
987 		int			cmp = compareEntry(data1, ptr1, data2, ptr2);
988 
989 		if (cmp < 0)
990 		{						/* in1 first */
991 			ptr->haspos = ptr1->haspos;
992 			ptr->len = ptr1->len;
993 			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
994 			ptr->pos = dataoff;
995 			dataoff += ptr1->len;
996 			if (ptr->haspos)
997 			{
998 				dataoff = SHORTALIGN(dataoff);
999 				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1000 				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1001 			}
1002 
1003 			ptr++;
1004 			ptr1++;
1005 			i1--;
1006 		}
1007 		else if (cmp > 0)
1008 		{						/* in2 first */
1009 			ptr->haspos = ptr2->haspos;
1010 			ptr->len = ptr2->len;
1011 			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1012 			ptr->pos = dataoff;
1013 			dataoff += ptr2->len;
1014 			if (ptr->haspos)
1015 			{
1016 				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1017 
1018 				if (addlen == 0)
1019 					ptr->haspos = 0;
1020 				else
1021 				{
1022 					dataoff = SHORTALIGN(dataoff);
1023 					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1024 				}
1025 			}
1026 
1027 			ptr++;
1028 			ptr2++;
1029 			i2--;
1030 		}
1031 		else
1032 		{
1033 			ptr->haspos = ptr1->haspos | ptr2->haspos;
1034 			ptr->len = ptr1->len;
1035 			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1036 			ptr->pos = dataoff;
1037 			dataoff += ptr1->len;
1038 			if (ptr->haspos)
1039 			{
1040 				if (ptr1->haspos)
1041 				{
1042 					dataoff = SHORTALIGN(dataoff);
1043 					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1044 					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1045 					if (ptr2->haspos)
1046 						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1047 				}
1048 				else			/* must have ptr2->haspos */
1049 				{
1050 					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1051 
1052 					if (addlen == 0)
1053 						ptr->haspos = 0;
1054 					else
1055 					{
1056 						dataoff = SHORTALIGN(dataoff);
1057 						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1058 					}
1059 				}
1060 			}
1061 
1062 			ptr++;
1063 			ptr1++;
1064 			ptr2++;
1065 			i1--;
1066 			i2--;
1067 		}
1068 	}
1069 
1070 	while (i1)
1071 	{
1072 		ptr->haspos = ptr1->haspos;
1073 		ptr->len = ptr1->len;
1074 		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1075 		ptr->pos = dataoff;
1076 		dataoff += ptr1->len;
1077 		if (ptr->haspos)
1078 		{
1079 			dataoff = SHORTALIGN(dataoff);
1080 			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1081 			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1082 		}
1083 
1084 		ptr++;
1085 		ptr1++;
1086 		i1--;
1087 	}
1088 
1089 	while (i2)
1090 	{
1091 		ptr->haspos = ptr2->haspos;
1092 		ptr->len = ptr2->len;
1093 		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1094 		ptr->pos = dataoff;
1095 		dataoff += ptr2->len;
1096 		if (ptr->haspos)
1097 		{
1098 			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1099 
1100 			if (addlen == 0)
1101 				ptr->haspos = 0;
1102 			else
1103 			{
1104 				dataoff = SHORTALIGN(dataoff);
1105 				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1106 			}
1107 		}
1108 
1109 		ptr++;
1110 		ptr2++;
1111 		i2--;
1112 	}
1113 
1114 	/*
1115 	 * Instead of checking each offset individually, we check for overflow of
1116 	 * pos fields once at the end.
1117 	 */
1118 	if (dataoff > MAXSTRPOS)
1119 		ereport(ERROR,
1120 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1121 				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1122 
1123 	/*
1124 	 * Adjust sizes (asserting that we didn't overrun the original estimates)
1125 	 * and collapse out any unused array entries.
1126 	 */
1127 	output_size = ptr - ARRPTR(out);
1128 	Assert(output_size <= out->size);
1129 	out->size = output_size;
1130 	if (data != STRPTR(out))
1131 		memmove(STRPTR(out), data, dataoff);
1132 	output_bytes = CALCDATASIZE(out->size, dataoff);
1133 	Assert(output_bytes <= VARSIZE(out));
1134 	SET_VARSIZE(out, output_bytes);
1135 
1136 	PG_FREE_IF_COPY(in1, 0);
1137 	PG_FREE_IF_COPY(in2, 1);
1138 	PG_RETURN_POINTER(out);
1139 }
1140 
1141 /*
1142  * Compare two strings by tsvector rules.
1143  *
1144  * if prefix = true then it returns zero value iff b has prefix a
1145  */
1146 int32
tsCompareString(char * a,int lena,char * b,int lenb,bool prefix)1147 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1148 {
1149 	int			cmp;
1150 
1151 	if (lena == 0)
1152 	{
1153 		if (prefix)
1154 			cmp = 0;			/* empty string is prefix of anything */
1155 		else
1156 			cmp = (lenb > 0) ? -1 : 0;
1157 	}
1158 	else if (lenb == 0)
1159 	{
1160 		cmp = (lena > 0) ? 1 : 0;
1161 	}
1162 	else
1163 	{
1164 		cmp = memcmp(a, b, Min(lena, lenb));
1165 
1166 		if (prefix)
1167 		{
1168 			if (cmp == 0 && lena > lenb)
1169 				cmp = 1;		/* a is longer, so not a prefix of b */
1170 		}
1171 		else if (cmp == 0 && lena != lenb)
1172 		{
1173 			cmp = (lena < lenb) ? -1 : 1;
1174 		}
1175 	}
1176 
1177 	return cmp;
1178 }
1179 
1180 /*
1181  * Check weight info or/and fill 'data' with the required positions
1182  */
1183 static TSTernaryValue
checkclass_str(CHKVAL * chkval,WordEntry * entry,QueryOperand * val,ExecPhraseData * data)1184 checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
1185 			   ExecPhraseData *data)
1186 {
1187 	TSTernaryValue result = TS_NO;
1188 
1189 	Assert(data == NULL || data->npos == 0);
1190 
1191 	if (entry->haspos)
1192 	{
1193 		WordEntryPosVector *posvec;
1194 
1195 		/*
1196 		 * We can't use the _POSVECPTR macro here because the pointer to the
1197 		 * tsvector's lexeme storage is already contained in chkval->values.
1198 		 */
1199 		posvec = (WordEntryPosVector *)
1200 			(chkval->values + SHORTALIGN(entry->pos + entry->len));
1201 
1202 		if (val->weight && data)
1203 		{
1204 			WordEntryPos *posvec_iter = posvec->pos;
1205 			WordEntryPos *dptr;
1206 
1207 			/*
1208 			 * Filter position information by weights
1209 			 */
1210 			dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1211 			data->allocated = true;
1212 
1213 			/* Is there a position with a matching weight? */
1214 			while (posvec_iter < posvec->pos + posvec->npos)
1215 			{
1216 				/* If true, append this position to the data->pos */
1217 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1218 				{
1219 					*dptr = WEP_GETPOS(*posvec_iter);
1220 					dptr++;
1221 				}
1222 
1223 				posvec_iter++;
1224 			}
1225 
1226 			data->npos = dptr - data->pos;
1227 
1228 			if (data->npos > 0)
1229 				result = TS_YES;
1230 			else
1231 			{
1232 				pfree(data->pos);
1233 				data->pos = NULL;
1234 				data->allocated = false;
1235 			}
1236 		}
1237 		else if (val->weight)
1238 		{
1239 			WordEntryPos *posvec_iter = posvec->pos;
1240 
1241 			/* Is there a position with a matching weight? */
1242 			while (posvec_iter < posvec->pos + posvec->npos)
1243 			{
1244 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1245 				{
1246 					result = TS_YES;
1247 					break;		/* no need to go further */
1248 				}
1249 
1250 				posvec_iter++;
1251 			}
1252 		}
1253 		else if (data)
1254 		{
1255 			data->npos = posvec->npos;
1256 			data->pos = posvec->pos;
1257 			data->allocated = false;
1258 			result = TS_YES;
1259 		}
1260 		else
1261 		{
1262 			/* simplest case: no weight check, positions not needed */
1263 			result = TS_YES;
1264 		}
1265 	}
1266 	else
1267 	{
1268 		/*
1269 		 * Position info is lacking, so if the caller requires it, we can only
1270 		 * say that maybe there is a match.
1271 		 *
1272 		 * Notice, however, that we *don't* check val->weight here.
1273 		 * Historically, stripped tsvectors are considered to match queries
1274 		 * whether or not the query has a weight restriction; that's a little
1275 		 * dubious but we'll preserve the behavior.
1276 		 */
1277 		if (data)
1278 			result = TS_MAYBE;
1279 		else
1280 			result = TS_YES;
1281 	}
1282 
1283 	return result;
1284 }
1285 
1286 /*
1287  * TS_execute callback for matching a tsquery operand to plain tsvector data
1288  */
1289 static TSTernaryValue
checkcondition_str(void * checkval,QueryOperand * val,ExecPhraseData * data)1290 checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
1291 {
1292 	CHKVAL	   *chkval = (CHKVAL *) checkval;
1293 	WordEntry  *StopLow = chkval->arrb;
1294 	WordEntry  *StopHigh = chkval->arre;
1295 	WordEntry  *StopMiddle = StopHigh;
1296 	TSTernaryValue res = TS_NO;
1297 
1298 	/* Loop invariant: StopLow <= val < StopHigh */
1299 	while (StopLow < StopHigh)
1300 	{
1301 		int			difference;
1302 
1303 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1304 		difference = tsCompareString(chkval->operand + val->distance,
1305 									 val->length,
1306 									 chkval->values + StopMiddle->pos,
1307 									 StopMiddle->len,
1308 									 false);
1309 
1310 		if (difference == 0)
1311 		{
1312 			/* Check weight info & fill 'data' with positions */
1313 			res = checkclass_str(chkval, StopMiddle, val, data);
1314 			break;
1315 		}
1316 		else if (difference > 0)
1317 			StopLow = StopMiddle + 1;
1318 		else
1319 			StopHigh = StopMiddle;
1320 	}
1321 
1322 	/*
1323 	 * If it's a prefix search, we should also consider lexemes that the
1324 	 * search term is a prefix of (which will necessarily immediately follow
1325 	 * the place we found in the above loop).  But we can skip them if there
1326 	 * was a definite match on the exact term AND the caller doesn't need
1327 	 * position info.
1328 	 */
1329 	if (val->prefix && (res != TS_YES || data))
1330 	{
1331 		WordEntryPos *allpos = NULL;
1332 		int			npos = 0,
1333 					totalpos = 0;
1334 
1335 		/* adjust start position for corner case */
1336 		if (StopLow >= StopHigh)
1337 			StopMiddle = StopHigh;
1338 
1339 		/* we don't try to re-use any data from the initial match */
1340 		if (data)
1341 		{
1342 			if (data->allocated)
1343 				pfree(data->pos);
1344 			data->pos = NULL;
1345 			data->allocated = false;
1346 			data->npos = 0;
1347 		}
1348 		res = TS_NO;
1349 
1350 		while ((res != TS_YES || data) &&
1351 			   StopMiddle < chkval->arre &&
1352 			   tsCompareString(chkval->operand + val->distance,
1353 							   val->length,
1354 							   chkval->values + StopMiddle->pos,
1355 							   StopMiddle->len,
1356 							   true) == 0)
1357 		{
1358 			TSTernaryValue subres;
1359 
1360 			subres = checkclass_str(chkval, StopMiddle, val, data);
1361 
1362 			if (subres != TS_NO)
1363 			{
1364 				if (data)
1365 				{
1366 					/*
1367 					 * We need to join position information
1368 					 */
1369 					if (subres == TS_MAYBE)
1370 					{
1371 						/*
1372 						 * No position info for this match, so we must report
1373 						 * MAYBE overall.
1374 						 */
1375 						res = TS_MAYBE;
1376 						/* forget any previous positions */
1377 						npos = 0;
1378 						/* don't leak storage */
1379 						if (allpos)
1380 							pfree(allpos);
1381 						break;
1382 					}
1383 
1384 					while (npos + data->npos > totalpos)
1385 					{
1386 						if (totalpos == 0)
1387 						{
1388 							totalpos = 256;
1389 							allpos = palloc(sizeof(WordEntryPos) * totalpos);
1390 						}
1391 						else
1392 						{
1393 							totalpos *= 2;
1394 							allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1395 						}
1396 					}
1397 
1398 					memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1399 					npos += data->npos;
1400 
1401 					/* don't leak storage from individual matches */
1402 					if (data->allocated)
1403 						pfree(data->pos);
1404 					data->pos = NULL;
1405 					data->allocated = false;
1406 					/* it's important to reset data->npos before next loop */
1407 					data->npos = 0;
1408 				}
1409 				else
1410 				{
1411 					/* Don't need positions, just handle YES/MAYBE */
1412 					if (subres == TS_YES || res == TS_NO)
1413 						res = subres;
1414 				}
1415 			}
1416 
1417 			StopMiddle++;
1418 		}
1419 
1420 		if (data && npos > 0)
1421 		{
1422 			/* Sort and make unique array of found positions */
1423 			data->pos = allpos;
1424 			qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1425 			data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1426 								 compareWordEntryPos);
1427 			data->allocated = true;
1428 			res = TS_YES;
1429 		}
1430 	}
1431 
1432 	return res;
1433 }
1434 
1435 /*
1436  * Compute output position list for a tsquery operator in phrase mode.
1437  *
1438  * Merge the position lists in Ldata and Rdata as specified by "emit",
1439  * returning the result list into *data.  The input position lists must be
1440  * sorted and unique, and the output will be as well.
1441  *
1442  * data: pointer to initially-all-zeroes output struct, or NULL
1443  * Ldata, Rdata: input position lists
1444  * emit: bitmask of TSPO_XXX flags
1445  * Loffset: offset to be added to Ldata positions before comparing/outputting
1446  * Roffset: offset to be added to Rdata positions before comparing/outputting
1447  * max_npos: maximum possible required size of output position array
1448  *
1449  * Loffset and Roffset should not be negative, else we risk trying to output
1450  * negative positions, which won't fit into WordEntryPos.
1451  *
1452  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1453  * we return it as TSTernaryValue.
1454  *
1455  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1456  * returns TS_YES if any positions would have been emitted.
1457  */
1458 #define TSPO_L_ONLY		0x01	/* emit positions appearing only in L */
1459 #define TSPO_R_ONLY		0x02	/* emit positions appearing only in R */
1460 #define TSPO_BOTH		0x04	/* emit positions appearing in both L&R */
1461 
1462 static TSTernaryValue
TS_phrase_output(ExecPhraseData * data,ExecPhraseData * Ldata,ExecPhraseData * Rdata,int emit,int Loffset,int Roffset,int max_npos)1463 TS_phrase_output(ExecPhraseData *data,
1464 				 ExecPhraseData *Ldata,
1465 				 ExecPhraseData *Rdata,
1466 				 int emit,
1467 				 int Loffset,
1468 				 int Roffset,
1469 				 int max_npos)
1470 {
1471 	int			Lindex,
1472 				Rindex;
1473 
1474 	/* Loop until both inputs are exhausted */
1475 	Lindex = Rindex = 0;
1476 	while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1477 	{
1478 		int			Lpos,
1479 					Rpos;
1480 		int			output_pos = 0;
1481 
1482 		/*
1483 		 * Fetch current values to compare.  WEP_GETPOS() is needed because
1484 		 * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1485 		 */
1486 		if (Lindex < Ldata->npos)
1487 			Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1488 		else
1489 		{
1490 			/* L array exhausted, so we're done if R_ONLY isn't set */
1491 			if (!(emit & TSPO_R_ONLY))
1492 				break;
1493 			Lpos = INT_MAX;
1494 		}
1495 		if (Rindex < Rdata->npos)
1496 			Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1497 		else
1498 		{
1499 			/* R array exhausted, so we're done if L_ONLY isn't set */
1500 			if (!(emit & TSPO_L_ONLY))
1501 				break;
1502 			Rpos = INT_MAX;
1503 		}
1504 
1505 		/* Merge-join the two input lists */
1506 		if (Lpos < Rpos)
1507 		{
1508 			/* Lpos is not matched in Rdata, should we output it? */
1509 			if (emit & TSPO_L_ONLY)
1510 				output_pos = Lpos;
1511 			Lindex++;
1512 		}
1513 		else if (Lpos == Rpos)
1514 		{
1515 			/* Lpos and Rpos match ... should we output it? */
1516 			if (emit & TSPO_BOTH)
1517 				output_pos = Rpos;
1518 			Lindex++;
1519 			Rindex++;
1520 		}
1521 		else					/* Lpos > Rpos */
1522 		{
1523 			/* Rpos is not matched in Ldata, should we output it? */
1524 			if (emit & TSPO_R_ONLY)
1525 				output_pos = Rpos;
1526 			Rindex++;
1527 		}
1528 
1529 		if (output_pos > 0)
1530 		{
1531 			if (data)
1532 			{
1533 				/* Store position, first allocating output array if needed */
1534 				if (data->pos == NULL)
1535 				{
1536 					data->pos = (WordEntryPos *)
1537 						palloc(max_npos * sizeof(WordEntryPos));
1538 					data->allocated = true;
1539 				}
1540 				data->pos[data->npos++] = output_pos;
1541 			}
1542 			else
1543 			{
1544 				/*
1545 				 * Exact positions not needed, so return TS_YES as soon as we
1546 				 * know there is at least one.
1547 				 */
1548 				return TS_YES;
1549 			}
1550 		}
1551 	}
1552 
1553 	if (data && data->npos > 0)
1554 	{
1555 		/* Let's assert we didn't overrun the array */
1556 		Assert(data->npos <= max_npos);
1557 		return TS_YES;
1558 	}
1559 	return TS_NO;
1560 }
1561 
1562 /*
1563  * Execute tsquery at or below an OP_PHRASE operator.
1564  *
1565  * This handles tsquery execution at recursion levels where we need to care
1566  * about match locations.
1567  *
1568  * In addition to the same arguments used for TS_execute, the caller may pass
1569  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1570  * match position info on success.  data == NULL if no position data need be
1571  * returned.  (In practice, outside callers pass NULL, and only the internal
1572  * recursion cases pass a data pointer.)
1573  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1574  * This is OK because an outside call always starts from an OP_PHRASE node.
1575  *
1576  * The detailed semantics of the match data, given that the function returned
1577  * TS_YES (successful match), are:
1578  *
1579  * npos > 0, negate = false:
1580  *	 query is matched at specified position(s) (and only those positions)
1581  * npos > 0, negate = true:
1582  *	 query is matched at all positions *except* specified position(s)
1583  * npos = 0, negate = true:
1584  *	 query is matched at all positions
1585  * npos = 0, negate = false:
1586  *	 disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1587  *
1588  * Successful matches also return a "width" value which is the match width in
1589  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
1590  * and is the sum of the phrase operator distances for phrase matches.  Note
1591  * that when width > 0, the listed positions represent the ends of matches not
1592  * the starts.  (This unintuitive rule is needed to avoid possibly generating
1593  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1594  *
1595  * If the TSExecuteCallback function reports that an operand is present
1596  * but fails to provide position(s) for it, we will return TS_MAYBE when
1597  * it is possible but not certain that the query is matched.
1598  *
1599  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1600  * negate = false (which is the state initialized by the caller); but the
1601  * "width" output in such cases is undefined.
1602  */
1603 static TSTernaryValue
TS_phrase_execute(QueryItem * curitem,void * arg,uint32 flags,TSExecuteCallback chkcond,ExecPhraseData * data)1604 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1605 				  TSExecuteCallback chkcond,
1606 				  ExecPhraseData *data)
1607 {
1608 	ExecPhraseData Ldata,
1609 				Rdata;
1610 	TSTernaryValue lmatch,
1611 				rmatch;
1612 	int			Loffset,
1613 				Roffset,
1614 				maxwidth;
1615 
1616 	/* since this function recurses, it could be driven to stack overflow */
1617 	check_stack_depth();
1618 
1619 	if (curitem->type == QI_VAL)
1620 		return chkcond(arg, (QueryOperand *) curitem, data);
1621 
1622 	switch (curitem->qoperator.oper)
1623 	{
1624 		case OP_NOT:
1625 
1626 			/*
1627 			 * We need not touch data->width, since a NOT operation does not
1628 			 * change the match width.
1629 			 */
1630 			if (flags & TS_EXEC_SKIP_NOT)
1631 			{
1632 				/* with SKIP_NOT, report NOT as "match everywhere" */
1633 				Assert(data->npos == 0 && !data->negate);
1634 				data->negate = true;
1635 				return TS_YES;
1636 			}
1637 			switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1638 			{
1639 				case TS_NO:
1640 					/* change "match nowhere" to "match everywhere" */
1641 					Assert(data->npos == 0 && !data->negate);
1642 					data->negate = true;
1643 					return TS_YES;
1644 				case TS_YES:
1645 					if (data->npos > 0)
1646 					{
1647 						/* we have some positions, invert negate flag */
1648 						data->negate = !data->negate;
1649 						return TS_YES;
1650 					}
1651 					else if (data->negate)
1652 					{
1653 						/* change "match everywhere" to "match nowhere" */
1654 						data->negate = false;
1655 						return TS_NO;
1656 					}
1657 					/* Should not get here if result was TS_YES */
1658 					Assert(false);
1659 					break;
1660 				case TS_MAYBE:
1661 					/* match positions are, and remain, uncertain */
1662 					return TS_MAYBE;
1663 			}
1664 			break;
1665 
1666 		case OP_PHRASE:
1667 		case OP_AND:
1668 			memset(&Ldata, 0, sizeof(Ldata));
1669 			memset(&Rdata, 0, sizeof(Rdata));
1670 
1671 			lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1672 									   arg, flags, chkcond, &Ldata);
1673 			if (lmatch == TS_NO)
1674 				return TS_NO;
1675 
1676 			rmatch = TS_phrase_execute(curitem + 1,
1677 									   arg, flags, chkcond, &Rdata);
1678 			if (rmatch == TS_NO)
1679 				return TS_NO;
1680 
1681 			/*
1682 			 * If either operand has no position information, then we can't
1683 			 * return reliable position data, only a MAYBE result.
1684 			 */
1685 			if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1686 				return TS_MAYBE;
1687 
1688 			if (curitem->qoperator.oper == OP_PHRASE)
1689 			{
1690 				/*
1691 				 * Compute Loffset and Roffset suitable for phrase match, and
1692 				 * compute overall width of whole phrase match.
1693 				 */
1694 				Loffset = curitem->qoperator.distance + Rdata.width;
1695 				Roffset = 0;
1696 				if (data)
1697 					data->width = curitem->qoperator.distance +
1698 						Ldata.width + Rdata.width;
1699 			}
1700 			else
1701 			{
1702 				/*
1703 				 * For OP_AND, set output width and alignment like OP_OR (see
1704 				 * comment below)
1705 				 */
1706 				maxwidth = Max(Ldata.width, Rdata.width);
1707 				Loffset = maxwidth - Ldata.width;
1708 				Roffset = maxwidth - Rdata.width;
1709 				if (data)
1710 					data->width = maxwidth;
1711 			}
1712 
1713 			if (Ldata.negate && Rdata.negate)
1714 			{
1715 				/* !L & !R: treat as !(L | R) */
1716 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1717 										TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1718 										Loffset, Roffset,
1719 										Ldata.npos + Rdata.npos);
1720 				if (data)
1721 					data->negate = true;
1722 				return TS_YES;
1723 			}
1724 			else if (Ldata.negate)
1725 			{
1726 				/* !L & R */
1727 				return TS_phrase_output(data, &Ldata, &Rdata,
1728 										TSPO_R_ONLY,
1729 										Loffset, Roffset,
1730 										Rdata.npos);
1731 			}
1732 			else if (Rdata.negate)
1733 			{
1734 				/* L & !R */
1735 				return TS_phrase_output(data, &Ldata, &Rdata,
1736 										TSPO_L_ONLY,
1737 										Loffset, Roffset,
1738 										Ldata.npos);
1739 			}
1740 			else
1741 			{
1742 				/* straight AND */
1743 				return TS_phrase_output(data, &Ldata, &Rdata,
1744 										TSPO_BOTH,
1745 										Loffset, Roffset,
1746 										Min(Ldata.npos, Rdata.npos));
1747 			}
1748 
1749 		case OP_OR:
1750 			memset(&Ldata, 0, sizeof(Ldata));
1751 			memset(&Rdata, 0, sizeof(Rdata));
1752 
1753 			lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1754 									   arg, flags, chkcond, &Ldata);
1755 			rmatch = TS_phrase_execute(curitem + 1,
1756 									   arg, flags, chkcond, &Rdata);
1757 
1758 			if (lmatch == TS_NO && rmatch == TS_NO)
1759 				return TS_NO;
1760 
1761 			/*
1762 			 * If either operand has no position information, then we can't
1763 			 * return reliable position data, only a MAYBE result.
1764 			 */
1765 			if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1766 				return TS_MAYBE;
1767 
1768 			/*
1769 			 * Cope with undefined output width from failed submatch.  (This
1770 			 * takes less code than trying to ensure that all failure returns
1771 			 * set data->width to zero.)
1772 			 */
1773 			if (lmatch == TS_NO)
1774 				Ldata.width = 0;
1775 			if (rmatch == TS_NO)
1776 				Rdata.width = 0;
1777 
1778 			/*
1779 			 * For OP_AND and OP_OR, report the width of the wider of the two
1780 			 * inputs, and align the narrower input's positions to the right
1781 			 * end of that width.  This rule deals at least somewhat
1782 			 * reasonably with cases like "x <-> (y | z <-> q)".
1783 			 */
1784 			maxwidth = Max(Ldata.width, Rdata.width);
1785 			Loffset = maxwidth - Ldata.width;
1786 			Roffset = maxwidth - Rdata.width;
1787 			data->width = maxwidth;
1788 
1789 			if (Ldata.negate && Rdata.negate)
1790 			{
1791 				/* !L | !R: treat as !(L & R) */
1792 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1793 										TSPO_BOTH,
1794 										Loffset, Roffset,
1795 										Min(Ldata.npos, Rdata.npos));
1796 				data->negate = true;
1797 				return TS_YES;
1798 			}
1799 			else if (Ldata.negate)
1800 			{
1801 				/* !L | R: treat as !(L & !R) */
1802 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1803 										TSPO_L_ONLY,
1804 										Loffset, Roffset,
1805 										Ldata.npos);
1806 				data->negate = true;
1807 				return TS_YES;
1808 			}
1809 			else if (Rdata.negate)
1810 			{
1811 				/* L | !R: treat as !(!L & R) */
1812 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1813 										TSPO_R_ONLY,
1814 										Loffset, Roffset,
1815 										Rdata.npos);
1816 				data->negate = true;
1817 				return TS_YES;
1818 			}
1819 			else
1820 			{
1821 				/* straight OR */
1822 				return TS_phrase_output(data, &Ldata, &Rdata,
1823 										TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1824 										Loffset, Roffset,
1825 										Ldata.npos + Rdata.npos);
1826 			}
1827 
1828 		default:
1829 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1830 	}
1831 
1832 	/* not reachable, but keep compiler quiet */
1833 	return TS_NO;
1834 }
1835 
1836 
1837 /*
1838  * Evaluate tsquery boolean expression.
1839  *
1840  * curitem: current tsquery item (initially, the first one)
1841  * arg: opaque value to pass through to callback function
1842  * flags: bitmask of flag bits shown in ts_utils.h
1843  * chkcond: callback function to check whether a primitive value is present
1844  */
1845 bool
TS_execute(QueryItem * curitem,void * arg,uint32 flags,TSExecuteCallback chkcond)1846 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1847 		   TSExecuteCallback chkcond)
1848 {
1849 	/*
1850 	 * If we get TS_MAYBE from the recursion, return true.  We could only see
1851 	 * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1852 	 * need to check again.
1853 	 */
1854 	return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1855 }
1856 
1857 /*
1858  * Evaluate tsquery boolean expression.
1859  *
1860  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1861  */
1862 TSTernaryValue
TS_execute_ternary(QueryItem * curitem,void * arg,uint32 flags,TSExecuteCallback chkcond)1863 TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1864 				   TSExecuteCallback chkcond)
1865 {
1866 	return TS_execute_recurse(curitem, arg, flags, chkcond);
1867 }
1868 
1869 /*
1870  * TS_execute recursion for operators above any phrase operator.  Here we do
1871  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
1872  * operator, we pass it off to TS_phrase_execute which does worry.
1873  */
1874 static TSTernaryValue
TS_execute_recurse(QueryItem * curitem,void * arg,uint32 flags,TSExecuteCallback chkcond)1875 TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1876 				   TSExecuteCallback chkcond)
1877 {
1878 	TSTernaryValue lmatch;
1879 
1880 	/* since this function recurses, it could be driven to stack overflow */
1881 	check_stack_depth();
1882 
1883 	/* ... and let's check for query cancel while we're at it */
1884 	CHECK_FOR_INTERRUPTS();
1885 
1886 	if (curitem->type == QI_VAL)
1887 		return chkcond(arg, (QueryOperand *) curitem,
1888 					   NULL /* don't need position info */ );
1889 
1890 	switch (curitem->qoperator.oper)
1891 	{
1892 		case OP_NOT:
1893 			if (flags & TS_EXEC_SKIP_NOT)
1894 				return TS_YES;
1895 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1896 			{
1897 				case TS_NO:
1898 					return TS_YES;
1899 				case TS_YES:
1900 					return TS_NO;
1901 				case TS_MAYBE:
1902 					return TS_MAYBE;
1903 			}
1904 			break;
1905 
1906 		case OP_AND:
1907 			lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1908 										flags, chkcond);
1909 			if (lmatch == TS_NO)
1910 				return TS_NO;
1911 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1912 			{
1913 				case TS_NO:
1914 					return TS_NO;
1915 				case TS_YES:
1916 					return lmatch;
1917 				case TS_MAYBE:
1918 					return TS_MAYBE;
1919 			}
1920 			break;
1921 
1922 		case OP_OR:
1923 			lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1924 										flags, chkcond);
1925 			if (lmatch == TS_YES)
1926 				return TS_YES;
1927 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1928 			{
1929 				case TS_NO:
1930 					return lmatch;
1931 				case TS_YES:
1932 					return TS_YES;
1933 				case TS_MAYBE:
1934 					return TS_MAYBE;
1935 			}
1936 			break;
1937 
1938 		case OP_PHRASE:
1939 
1940 			/*
1941 			 * If we get a MAYBE result, and the caller doesn't want that,
1942 			 * convert it to NO.  It would be more consistent, perhaps, to
1943 			 * return the result of TS_phrase_execute() verbatim and then
1944 			 * convert MAYBE results at the top of the recursion.  But
1945 			 * converting at the topmost phrase operator gives results that
1946 			 * are bug-compatible with the old implementation, so do it like
1947 			 * this for now.
1948 			 */
1949 			switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1950 			{
1951 				case TS_NO:
1952 					return TS_NO;
1953 				case TS_YES:
1954 					return TS_YES;
1955 				case TS_MAYBE:
1956 					return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1957 			}
1958 			break;
1959 
1960 		default:
1961 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1962 	}
1963 
1964 	/* not reachable, but keep compiler quiet */
1965 	return TS_NO;
1966 }
1967 
1968 /*
1969  * Detect whether a tsquery boolean expression requires any positive matches
1970  * to values shown in the tsquery.
1971  *
1972  * This is needed to know whether a GIN index search requires full index scan.
1973  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1974  * entries for x; but 'x | !y' could match rows containing neither x nor y.
1975  */
1976 bool
tsquery_requires_match(QueryItem * curitem)1977 tsquery_requires_match(QueryItem *curitem)
1978 {
1979 	/* since this function recurses, it could be driven to stack overflow */
1980 	check_stack_depth();
1981 
1982 	if (curitem->type == QI_VAL)
1983 		return true;
1984 
1985 	switch (curitem->qoperator.oper)
1986 	{
1987 		case OP_NOT:
1988 
1989 			/*
1990 			 * Assume there are no required matches underneath a NOT.  For
1991 			 * some cases with nested NOTs, we could prove there's a required
1992 			 * match, but it seems unlikely to be worth the trouble.
1993 			 */
1994 			return false;
1995 
1996 		case OP_PHRASE:
1997 
1998 			/*
1999 			 * Treat OP_PHRASE as OP_AND here
2000 			 */
2001 		case OP_AND:
2002 			/* If either side requires a match, we're good */
2003 			if (tsquery_requires_match(curitem + curitem->qoperator.left))
2004 				return true;
2005 			else
2006 				return tsquery_requires_match(curitem + 1);
2007 
2008 		case OP_OR:
2009 			/* Both sides must require a match */
2010 			if (tsquery_requires_match(curitem + curitem->qoperator.left))
2011 				return tsquery_requires_match(curitem + 1);
2012 			else
2013 				return false;
2014 
2015 		default:
2016 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2017 	}
2018 
2019 	/* not reachable, but keep compiler quiet */
2020 	return false;
2021 }
2022 
2023 /*
2024  * boolean operations
2025  */
2026 Datum
ts_match_qv(PG_FUNCTION_ARGS)2027 ts_match_qv(PG_FUNCTION_ARGS)
2028 {
2029 	PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
2030 										PG_GETARG_DATUM(1),
2031 										PG_GETARG_DATUM(0)));
2032 }
2033 
2034 Datum
ts_match_vq(PG_FUNCTION_ARGS)2035 ts_match_vq(PG_FUNCTION_ARGS)
2036 {
2037 	TSVector	val = PG_GETARG_TSVECTOR(0);
2038 	TSQuery		query = PG_GETARG_TSQUERY(1);
2039 	CHKVAL		chkval;
2040 	bool		result;
2041 
2042 	/* empty query matches nothing */
2043 	if (!query->size)
2044 	{
2045 		PG_FREE_IF_COPY(val, 0);
2046 		PG_FREE_IF_COPY(query, 1);
2047 		PG_RETURN_BOOL(false);
2048 	}
2049 
2050 	chkval.arrb = ARRPTR(val);
2051 	chkval.arre = chkval.arrb + val->size;
2052 	chkval.values = STRPTR(val);
2053 	chkval.operand = GETOPERAND(query);
2054 	result = TS_execute(GETQUERY(query),
2055 						&chkval,
2056 						TS_EXEC_EMPTY,
2057 						checkcondition_str);
2058 
2059 	PG_FREE_IF_COPY(val, 0);
2060 	PG_FREE_IF_COPY(query, 1);
2061 	PG_RETURN_BOOL(result);
2062 }
2063 
2064 Datum
ts_match_tt(PG_FUNCTION_ARGS)2065 ts_match_tt(PG_FUNCTION_ARGS)
2066 {
2067 	TSVector	vector;
2068 	TSQuery		query;
2069 	bool		res;
2070 
2071 	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2072 												  PG_GETARG_DATUM(0)));
2073 	query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
2074 												PG_GETARG_DATUM(1)));
2075 
2076 	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2077 										   TSVectorGetDatum(vector),
2078 										   TSQueryGetDatum(query)));
2079 
2080 	pfree(vector);
2081 	pfree(query);
2082 
2083 	PG_RETURN_BOOL(res);
2084 }
2085 
2086 Datum
ts_match_tq(PG_FUNCTION_ARGS)2087 ts_match_tq(PG_FUNCTION_ARGS)
2088 {
2089 	TSVector	vector;
2090 	TSQuery		query = PG_GETARG_TSQUERY(1);
2091 	bool		res;
2092 
2093 	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2094 												  PG_GETARG_DATUM(0)));
2095 
2096 	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2097 										   TSVectorGetDatum(vector),
2098 										   TSQueryGetDatum(query)));
2099 
2100 	pfree(vector);
2101 	PG_FREE_IF_COPY(query, 1);
2102 
2103 	PG_RETURN_BOOL(res);
2104 }
2105 
2106 /*
2107  * ts_stat statistic function support
2108  */
2109 
2110 
2111 /*
2112  * Returns the number of positions in value 'wptr' within tsvector 'txt',
2113  * that have a weight equal to one of the weights in 'weight' bitmask.
2114  */
2115 static int
check_weight(TSVector txt,WordEntry * wptr,int8 weight)2116 check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2117 {
2118 	int			len = POSDATALEN(txt, wptr);
2119 	int			num = 0;
2120 	WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2121 
2122 	while (len--)
2123 	{
2124 		if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2125 			num++;
2126 		ptr++;
2127 	}
2128 	return num;
2129 }
2130 
2131 #define compareStatWord(a,e,t)							\
2132 	tsCompareString((a)->lexeme, (a)->lenlexeme,		\
2133 					STRPTR(t) + (e)->pos, (e)->len,		\
2134 					false)
2135 
2136 static void
insertStatEntry(MemoryContext persistentContext,TSVectorStat * stat,TSVector txt,uint32 off)2137 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2138 {
2139 	WordEntry  *we = ARRPTR(txt) + off;
2140 	StatEntry  *node = stat->root,
2141 			   *pnode = NULL;
2142 	int			n,
2143 				res = 0;
2144 	uint32		depth = 1;
2145 
2146 	if (stat->weight == 0)
2147 		n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2148 	else
2149 		n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2150 
2151 	if (n == 0)
2152 		return;					/* nothing to insert */
2153 
2154 	while (node)
2155 	{
2156 		res = compareStatWord(node, we, txt);
2157 
2158 		if (res == 0)
2159 		{
2160 			break;
2161 		}
2162 		else
2163 		{
2164 			pnode = node;
2165 			node = (res < 0) ? node->left : node->right;
2166 		}
2167 		depth++;
2168 	}
2169 
2170 	if (depth > stat->maxdepth)
2171 		stat->maxdepth = depth;
2172 
2173 	if (node == NULL)
2174 	{
2175 		node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2176 		node->left = node->right = NULL;
2177 		node->ndoc = 1;
2178 		node->nentry = n;
2179 		node->lenlexeme = we->len;
2180 		memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2181 
2182 		if (pnode == NULL)
2183 		{
2184 			stat->root = node;
2185 		}
2186 		else
2187 		{
2188 			if (res < 0)
2189 				pnode->left = node;
2190 			else
2191 				pnode->right = node;
2192 		}
2193 
2194 	}
2195 	else
2196 	{
2197 		node->ndoc++;
2198 		node->nentry += n;
2199 	}
2200 }
2201 
2202 static void
chooseNextStatEntry(MemoryContext persistentContext,TSVectorStat * stat,TSVector txt,uint32 low,uint32 high,uint32 offset)2203 chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2204 					uint32 low, uint32 high, uint32 offset)
2205 {
2206 	uint32		pos;
2207 	uint32		middle = (low + high) >> 1;
2208 
2209 	pos = (low + middle) >> 1;
2210 	if (low != middle && pos >= offset && pos - offset < txt->size)
2211 		insertStatEntry(persistentContext, stat, txt, pos - offset);
2212 	pos = (high + middle + 1) >> 1;
2213 	if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2214 		insertStatEntry(persistentContext, stat, txt, pos - offset);
2215 
2216 	if (low != middle)
2217 		chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2218 	if (high != middle + 1)
2219 		chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2220 }
2221 
2222 /*
2223  * This is written like a custom aggregate function, because the
2224  * original plan was to do just that. Unfortunately, an aggregate function
2225  * can't return a set, so that plan was abandoned. If that limitation is
2226  * lifted in the future, ts_stat could be a real aggregate function so that
2227  * you could use it like this:
2228  *
2229  *	 SELECT ts_stat(vector_column) FROM vector_table;
2230  *
2231  *	where vector_column is a tsvector-type column in vector_table.
2232  */
2233 
2234 static TSVectorStat *
ts_accum(MemoryContext persistentContext,TSVectorStat * stat,Datum data)2235 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2236 {
2237 	TSVector	txt = DatumGetTSVector(data);
2238 	uint32		i,
2239 				nbit = 0,
2240 				offset;
2241 
2242 	if (stat == NULL)
2243 	{							/* Init in first */
2244 		stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2245 		stat->maxdepth = 1;
2246 	}
2247 
2248 	/* simple check of correctness */
2249 	if (txt == NULL || txt->size == 0)
2250 	{
2251 		if (txt && txt != (TSVector) DatumGetPointer(data))
2252 			pfree(txt);
2253 		return stat;
2254 	}
2255 
2256 	i = txt->size - 1;
2257 	for (; i > 0; i >>= 1)
2258 		nbit++;
2259 
2260 	nbit = 1 << nbit;
2261 	offset = (nbit - txt->size) / 2;
2262 
2263 	insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2264 	chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2265 
2266 	return stat;
2267 }
2268 
2269 static void
ts_setup_firstcall(FunctionCallInfo fcinfo,FuncCallContext * funcctx,TSVectorStat * stat)2270 ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2271 				   TSVectorStat *stat)
2272 {
2273 	TupleDesc	tupdesc;
2274 	MemoryContext oldcontext;
2275 	StatEntry  *node;
2276 
2277 	funcctx->user_fctx = (void *) stat;
2278 
2279 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2280 
2281 	stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2282 	stat->stackpos = 0;
2283 
2284 	node = stat->root;
2285 	/* find leftmost value */
2286 	if (node == NULL)
2287 		stat->stack[stat->stackpos] = NULL;
2288 	else
2289 		for (;;)
2290 		{
2291 			stat->stack[stat->stackpos] = node;
2292 			if (node->left)
2293 			{
2294 				stat->stackpos++;
2295 				node = node->left;
2296 			}
2297 			else
2298 				break;
2299 		}
2300 	Assert(stat->stackpos <= stat->maxdepth);
2301 
2302 	tupdesc = CreateTemplateTupleDesc(3);
2303 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2304 					   TEXTOID, -1, 0);
2305 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2306 					   INT4OID, -1, 0);
2307 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2308 					   INT4OID, -1, 0);
2309 	funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2310 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2311 
2312 	MemoryContextSwitchTo(oldcontext);
2313 }
2314 
2315 static StatEntry *
walkStatEntryTree(TSVectorStat * stat)2316 walkStatEntryTree(TSVectorStat *stat)
2317 {
2318 	StatEntry  *node = stat->stack[stat->stackpos];
2319 
2320 	if (node == NULL)
2321 		return NULL;
2322 
2323 	if (node->ndoc != 0)
2324 	{
2325 		/* return entry itself: we already was at left sublink */
2326 		return node;
2327 	}
2328 	else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2329 	{
2330 		/* go on right sublink */
2331 		stat->stackpos++;
2332 		node = node->right;
2333 
2334 		/* find most-left value */
2335 		for (;;)
2336 		{
2337 			stat->stack[stat->stackpos] = node;
2338 			if (node->left)
2339 			{
2340 				stat->stackpos++;
2341 				node = node->left;
2342 			}
2343 			else
2344 				break;
2345 		}
2346 		Assert(stat->stackpos <= stat->maxdepth);
2347 	}
2348 	else
2349 	{
2350 		/* we already return all left subtree, itself and  right subtree */
2351 		if (stat->stackpos == 0)
2352 			return NULL;
2353 
2354 		stat->stackpos--;
2355 		return walkStatEntryTree(stat);
2356 	}
2357 
2358 	return node;
2359 }
2360 
2361 static Datum
ts_process_call(FuncCallContext * funcctx)2362 ts_process_call(FuncCallContext *funcctx)
2363 {
2364 	TSVectorStat *st;
2365 	StatEntry  *entry;
2366 
2367 	st = (TSVectorStat *) funcctx->user_fctx;
2368 
2369 	entry = walkStatEntryTree(st);
2370 
2371 	if (entry != NULL)
2372 	{
2373 		Datum		result;
2374 		char	   *values[3];
2375 		char		ndoc[16];
2376 		char		nentry[16];
2377 		HeapTuple	tuple;
2378 
2379 		values[0] = palloc(entry->lenlexeme + 1);
2380 		memcpy(values[0], entry->lexeme, entry->lenlexeme);
2381 		(values[0])[entry->lenlexeme] = '\0';
2382 		sprintf(ndoc, "%d", entry->ndoc);
2383 		values[1] = ndoc;
2384 		sprintf(nentry, "%d", entry->nentry);
2385 		values[2] = nentry;
2386 
2387 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2388 		result = HeapTupleGetDatum(tuple);
2389 
2390 		pfree(values[0]);
2391 
2392 		/* mark entry as already visited */
2393 		entry->ndoc = 0;
2394 
2395 		return result;
2396 	}
2397 
2398 	return (Datum) 0;
2399 }
2400 
2401 static TSVectorStat *
ts_stat_sql(MemoryContext persistentContext,text * txt,text * ws)2402 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2403 {
2404 	char	   *query = text_to_cstring(txt);
2405 	TSVectorStat *stat;
2406 	bool		isnull;
2407 	Portal		portal;
2408 	SPIPlanPtr	plan;
2409 
2410 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2411 		/* internal error */
2412 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2413 
2414 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2415 		/* internal error */
2416 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2417 
2418 	SPI_cursor_fetch(portal, true, 100);
2419 
2420 	if (SPI_tuptable == NULL ||
2421 		SPI_tuptable->tupdesc->natts != 1 ||
2422 		!IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
2423 						   TSVECTOROID))
2424 		ereport(ERROR,
2425 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2426 				 errmsg("ts_stat query must return one tsvector column")));
2427 
2428 	stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2429 	stat->maxdepth = 1;
2430 
2431 	if (ws)
2432 	{
2433 		char	   *buf;
2434 
2435 		buf = VARDATA_ANY(ws);
2436 		while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2437 		{
2438 			if (pg_mblen(buf) == 1)
2439 			{
2440 				switch (*buf)
2441 				{
2442 					case 'A':
2443 					case 'a':
2444 						stat->weight |= 1 << 3;
2445 						break;
2446 					case 'B':
2447 					case 'b':
2448 						stat->weight |= 1 << 2;
2449 						break;
2450 					case 'C':
2451 					case 'c':
2452 						stat->weight |= 1 << 1;
2453 						break;
2454 					case 'D':
2455 					case 'd':
2456 						stat->weight |= 1;
2457 						break;
2458 					default:
2459 						stat->weight |= 0;
2460 				}
2461 			}
2462 			buf += pg_mblen(buf);
2463 		}
2464 	}
2465 
2466 	while (SPI_processed > 0)
2467 	{
2468 		uint64		i;
2469 
2470 		for (i = 0; i < SPI_processed; i++)
2471 		{
2472 			Datum		data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2473 
2474 			if (!isnull)
2475 				stat = ts_accum(persistentContext, stat, data);
2476 		}
2477 
2478 		SPI_freetuptable(SPI_tuptable);
2479 		SPI_cursor_fetch(portal, true, 100);
2480 	}
2481 
2482 	SPI_freetuptable(SPI_tuptable);
2483 	SPI_cursor_close(portal);
2484 	SPI_freeplan(plan);
2485 	pfree(query);
2486 
2487 	return stat;
2488 }
2489 
2490 Datum
ts_stat1(PG_FUNCTION_ARGS)2491 ts_stat1(PG_FUNCTION_ARGS)
2492 {
2493 	FuncCallContext *funcctx;
2494 	Datum		result;
2495 
2496 	if (SRF_IS_FIRSTCALL())
2497 	{
2498 		TSVectorStat *stat;
2499 		text	   *txt = PG_GETARG_TEXT_PP(0);
2500 
2501 		funcctx = SRF_FIRSTCALL_INIT();
2502 		SPI_connect();
2503 		stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2504 		PG_FREE_IF_COPY(txt, 0);
2505 		ts_setup_firstcall(fcinfo, funcctx, stat);
2506 		SPI_finish();
2507 	}
2508 
2509 	funcctx = SRF_PERCALL_SETUP();
2510 	if ((result = ts_process_call(funcctx)) != (Datum) 0)
2511 		SRF_RETURN_NEXT(funcctx, result);
2512 	SRF_RETURN_DONE(funcctx);
2513 }
2514 
2515 Datum
ts_stat2(PG_FUNCTION_ARGS)2516 ts_stat2(PG_FUNCTION_ARGS)
2517 {
2518 	FuncCallContext *funcctx;
2519 	Datum		result;
2520 
2521 	if (SRF_IS_FIRSTCALL())
2522 	{
2523 		TSVectorStat *stat;
2524 		text	   *txt = PG_GETARG_TEXT_PP(0);
2525 		text	   *ws = PG_GETARG_TEXT_PP(1);
2526 
2527 		funcctx = SRF_FIRSTCALL_INIT();
2528 		SPI_connect();
2529 		stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2530 		PG_FREE_IF_COPY(txt, 0);
2531 		PG_FREE_IF_COPY(ws, 1);
2532 		ts_setup_firstcall(fcinfo, funcctx, stat);
2533 		SPI_finish();
2534 	}
2535 
2536 	funcctx = SRF_PERCALL_SETUP();
2537 	if ((result = ts_process_call(funcctx)) != (Datum) 0)
2538 		SRF_RETURN_NEXT(funcctx, result);
2539 	SRF_RETURN_DONE(funcctx);
2540 }
2541 
2542 
2543 /*
2544  * Triggers for automatic update of a tsvector column from text column(s)
2545  *
2546  * Trigger arguments are either
2547  *		name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2548  *		name of tsvector col, name of regconfig col, name(s) of text col(s)
2549  * ie, tsconfig can either be specified by name, or indirectly as the
2550  * contents of a regconfig field in the row.  If the name is used, it must
2551  * be explicitly schema-qualified.
2552  */
2553 Datum
tsvector_update_trigger_byid(PG_FUNCTION_ARGS)2554 tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2555 {
2556 	return tsvector_update_trigger(fcinfo, false);
2557 }
2558 
2559 Datum
tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)2560 tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2561 {
2562 	return tsvector_update_trigger(fcinfo, true);
2563 }
2564 
2565 static Datum
tsvector_update_trigger(PG_FUNCTION_ARGS,bool config_column)2566 tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2567 {
2568 	TriggerData *trigdata;
2569 	Trigger    *trigger;
2570 	Relation	rel;
2571 	HeapTuple	rettuple = NULL;
2572 	int			tsvector_attr_num,
2573 				i;
2574 	ParsedText	prs;
2575 	Datum		datum;
2576 	bool		isnull;
2577 	text	   *txt;
2578 	Oid			cfgId;
2579 	bool		update_needed;
2580 
2581 	/* Check call context */
2582 	if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2583 		elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2584 
2585 	trigdata = (TriggerData *) fcinfo->context;
2586 	if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2587 		elog(ERROR, "tsvector_update_trigger: must be fired for row");
2588 	if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2589 		elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2590 
2591 	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2592 	{
2593 		rettuple = trigdata->tg_trigtuple;
2594 		update_needed = true;
2595 	}
2596 	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2597 	{
2598 		rettuple = trigdata->tg_newtuple;
2599 		update_needed = false;	/* computed below */
2600 	}
2601 	else
2602 		elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2603 
2604 	trigger = trigdata->tg_trigger;
2605 	rel = trigdata->tg_relation;
2606 
2607 	if (trigger->tgnargs < 3)
2608 		elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2609 
2610 	/* Find the target tsvector column */
2611 	tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2612 	if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2613 		ereport(ERROR,
2614 				(errcode(ERRCODE_UNDEFINED_COLUMN),
2615 				 errmsg("tsvector column \"%s\" does not exist",
2616 						trigger->tgargs[0])));
2617 	/* This will effectively reject system columns, so no separate test: */
2618 	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2619 						   TSVECTOROID))
2620 		ereport(ERROR,
2621 				(errcode(ERRCODE_DATATYPE_MISMATCH),
2622 				 errmsg("column \"%s\" is not of tsvector type",
2623 						trigger->tgargs[0])));
2624 
2625 	/* Find the configuration to use */
2626 	if (config_column)
2627 	{
2628 		int			config_attr_num;
2629 
2630 		config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2631 		if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2632 			ereport(ERROR,
2633 					(errcode(ERRCODE_UNDEFINED_COLUMN),
2634 					 errmsg("configuration column \"%s\" does not exist",
2635 							trigger->tgargs[1])));
2636 		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2637 							   REGCONFIGOID))
2638 			ereport(ERROR,
2639 					(errcode(ERRCODE_DATATYPE_MISMATCH),
2640 					 errmsg("column \"%s\" is not of regconfig type",
2641 							trigger->tgargs[1])));
2642 
2643 		datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2644 		if (isnull)
2645 			ereport(ERROR,
2646 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2647 					 errmsg("configuration column \"%s\" must not be null",
2648 							trigger->tgargs[1])));
2649 		cfgId = DatumGetObjectId(datum);
2650 	}
2651 	else
2652 	{
2653 		List	   *names;
2654 
2655 		names = stringToQualifiedNameList(trigger->tgargs[1]);
2656 		/* require a schema so that results are not search path dependent */
2657 		if (list_length(names) < 2)
2658 			ereport(ERROR,
2659 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2660 					 errmsg("text search configuration name \"%s\" must be schema-qualified",
2661 							trigger->tgargs[1])));
2662 		cfgId = get_ts_config_oid(names, false);
2663 	}
2664 
2665 	/* initialize parse state */
2666 	prs.lenwords = 32;
2667 	prs.curwords = 0;
2668 	prs.pos = 0;
2669 	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2670 
2671 	/* find all words in indexable column(s) */
2672 	for (i = 2; i < trigger->tgnargs; i++)
2673 	{
2674 		int			numattr;
2675 
2676 		numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2677 		if (numattr == SPI_ERROR_NOATTRIBUTE)
2678 			ereport(ERROR,
2679 					(errcode(ERRCODE_UNDEFINED_COLUMN),
2680 					 errmsg("column \"%s\" does not exist",
2681 							trigger->tgargs[i])));
2682 		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2683 			ereport(ERROR,
2684 					(errcode(ERRCODE_DATATYPE_MISMATCH),
2685 					 errmsg("column \"%s\" is not of a character type",
2686 							trigger->tgargs[i])));
2687 
2688 		if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
2689 			update_needed = true;
2690 
2691 		datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2692 		if (isnull)
2693 			continue;
2694 
2695 		txt = DatumGetTextPP(datum);
2696 
2697 		parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2698 
2699 		if (txt != (text *) DatumGetPointer(datum))
2700 			pfree(txt);
2701 	}
2702 
2703 	if (update_needed)
2704 	{
2705 		/* make tsvector value */
2706 		datum = TSVectorGetDatum(make_tsvector(&prs));
2707 		isnull = false;
2708 
2709 		/* and insert it into tuple */
2710 		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2711 											 1, &tsvector_attr_num,
2712 											 &datum, &isnull);
2713 
2714 		pfree(DatumGetPointer(datum));
2715 	}
2716 
2717 	return PointerGetDatum(rettuple);
2718 }
2719