1 /*-------------------------------------------------------------------------
2  *
3  * tsvector_op.c
4  *	  operations over tsvector
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsvector_op.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 
18 #include "access/htup_details.h"
19 #include "catalog/namespace.h"
20 #include "catalog/pg_type.h"
21 #include "commands/trigger.h"
22 #include "executor/spi.h"
23 #include "funcapi.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/parse_coerce.h"
27 #include "tsearch/ts_utils.h"
28 #include "utils/builtins.h"
29 #include "utils/lsyscache.h"
30 #include "utils/regproc.h"
31 #include "utils/rel.h"
32 
33 
34 typedef struct
35 {
36 	WordEntry  *arrb;
37 	WordEntry  *arre;
clone(&self) -> timezone38 	char	   *values;
39 	char	   *operand;
40 } CHKVAL;
41 
42 
43 typedef struct StatEntry
si_addr(&self) -> *mut ::c_void44 {
45 	uint32		ndoc;			/* zero indicates that we were already here
46 								 * while walking through the tree */
47 	uint32		nentry;
48 	struct StatEntry *left;
49 	struct StatEntry *right;
50 	uint32		lenlexeme;
51 	char		lexeme[FLEXIBLE_ARRAY_MEMBER];
52 } StatEntry;
53 
54 #define STATENTRYHDRSZ	(offsetof(StatEntry, lexeme))
55 
si_status(&self) -> ::c_int56 typedef struct
57 {
58 	int32		weight;
59 
60 	uint32		maxdepth;
61 
62 	StatEntry **stack;
63 	uint32		stackpos;
64 
65 	StatEntry  *root;
66 } TSVectorStat;
67 
68 #define STATHDRSIZE (offsetof(TSVectorStat, data))
69 
70 /* TS_execute requires ternary logic to handle NOT with phrase matches */
71 typedef enum
72 {
73 	TS_NO,						/* definitely no match */
74 	TS_YES,						/* definitely does match */
75 	TS_MAYBE					/* can't verify match for lack of pos data */
76 } TSTernaryValue;
77 
78 
79 static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
80 										 uint32 flags,
81 										 TSExecuteCallback chkcond);
82 static int	tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
83 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
84 
85 
86 /*
87  * Order: haspos, len, word, for all positions (pos, weight)
88  */
89 static int
90 silly_cmp_tsvector(const TSVector a, const TSVector b)
91 {
92 	if (VARSIZE(a) < VARSIZE(b))
93 		return -1;
94 	else if (VARSIZE(a) > VARSIZE(b))
95 		return 1;
96 	else if (a->size < b->size)
97 		return -1;
98 	else if (a->size > b->size)
99 		return 1;
100 	else
101 	{
102 		WordEntry  *aptr = ARRPTR(a);
103 		WordEntry  *bptr = ARRPTR(b);
104 		int			i = 0;
105 		int			res;
106 
107 
108 		for (i = 0; i < a->size; i++)
109 		{
110 			if (aptr->haspos != bptr->haspos)
111 			{
112 				return (aptr->haspos > bptr->haspos) ? -1 : 1;
113 			}
114 			else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
115 			{
116 				return res;
117 			}
118 			else if (aptr->haspos)
119 			{
120 				WordEntryPos *ap = POSDATAPTR(a, aptr);
121 				WordEntryPos *bp = POSDATAPTR(b, bptr);
122 				int			j;
123 
124 				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
125 					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
126 
127 				for (j = 0; j < POSDATALEN(a, aptr); j++)
128 				{
129 					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
130 					{
131 						return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
132 					}
133 					else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
134 					{
135 						return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
136 					}
137 					ap++, bp++;
138 				}
139 			}
140 
141 			aptr++;
142 			bptr++;
143 		}
144 	}
145 
146 	return 0;
147 }
148 
149 #define TSVECTORCMPFUNC( type, action, ret )			\
150 Datum													\
151 tsvector_##type(PG_FUNCTION_ARGS)						\
152 {														\
153 	TSVector	a = PG_GETARG_TSVECTOR(0);				\
154 	TSVector	b = PG_GETARG_TSVECTOR(1);				\
155 	int			res = silly_cmp_tsvector(a, b);			\
156 	PG_FREE_IF_COPY(a,0);								\
157 	PG_FREE_IF_COPY(b,1);								\
158 	PG_RETURN_##ret( res action 0 );					\
159 }	\
160 /* keep compiler quiet - no extra ; */					\
161 extern int no_such_variable
162 
163 TSVECTORCMPFUNC(lt, <, BOOL);
164 TSVECTORCMPFUNC(le, <=, BOOL);
165 TSVECTORCMPFUNC(eq, ==, BOOL);
166 TSVECTORCMPFUNC(ge, >=, BOOL);
167 TSVECTORCMPFUNC(gt, >, BOOL);
168 TSVECTORCMPFUNC(ne, !=, BOOL);
169 TSVECTORCMPFUNC(cmp, +, INT32);
170 
171 Datum
172 tsvector_strip(PG_FUNCTION_ARGS)
173 {
174 	TSVector	in = PG_GETARG_TSVECTOR(0);
175 	TSVector	out;
176 	int			i,
177 				len = 0;
178 	WordEntry  *arrin = ARRPTR(in),
179 			   *arrout;
180 	char	   *cur;
181 
182 	for (i = 0; i < in->size; i++)
183 		len += arrin[i].len;
184 
185 	len = CALCDATASIZE(in->size, len);
186 	out = (TSVector) palloc0(len);
187 	SET_VARSIZE(out, len);
188 	out->size = in->size;
189 	arrout = ARRPTR(out);
190 	cur = STRPTR(out);
191 	for (i = 0; i < in->size; i++)
192 	{
193 		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
194 		arrout[i].haspos = 0;
195 		arrout[i].len = arrin[i].len;
196 		arrout[i].pos = cur - STRPTR(out);
197 		cur += arrout[i].len;
198 	}
199 
200 	PG_FREE_IF_COPY(in, 0);
201 	PG_RETURN_POINTER(out);
202 }
203 
204 Datum
205 tsvector_length(PG_FUNCTION_ARGS)
206 {
207 	TSVector	in = PG_GETARG_TSVECTOR(0);
208 	int32		ret = in->size;
209 
210 	PG_FREE_IF_COPY(in, 0);
211 	PG_RETURN_INT32(ret);
212 }
213 
214 Datum
215 tsvector_setweight(PG_FUNCTION_ARGS)
216 {
217 	TSVector	in = PG_GETARG_TSVECTOR(0);
218 	char		cw = PG_GETARG_CHAR(1);
219 	TSVector	out;
220 	int			i,
221 				j;
222 	WordEntry  *entry;
223 	WordEntryPos *p;
224 	int			w = 0;
225 
226 	switch (cw)
227 	{
228 		case 'A':
229 		case 'a':
230 			w = 3;
231 			break;
232 		case 'B':
233 		case 'b':
234 			w = 2;
235 			break;
236 		case 'C':
237 		case 'c':
238 			w = 1;
239 			break;
240 		case 'D':
241 		case 'd':
242 			w = 0;
243 			break;
244 		default:
245 			/* internal error */
246 			elog(ERROR, "unrecognized weight: %d", cw);
247 	}
248 
249 	out = (TSVector) palloc(VARSIZE(in));
250 	memcpy(out, in, VARSIZE(in));
251 	entry = ARRPTR(out);
252 	i = out->size;
253 	while (i--)
254 	{
255 		if ((j = POSDATALEN(out, entry)) != 0)
256 		{
257 			p = POSDATAPTR(out, entry);
258 			while (j--)
259 			{
260 				WEP_SETWEIGHT(*p, w);
261 				p++;
262 			}
263 		}
264 		entry++;
265 	}
266 
267 	PG_FREE_IF_COPY(in, 0);
268 	PG_RETURN_POINTER(out);
269 }
270 
271 /*
272  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
273  *
274  * Assign weight w to elements of tsin that are listed in lexemes.
275  */
276 Datum
277 tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
278 {
279 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
280 	char		char_weight = PG_GETARG_CHAR(1);
281 	ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
282 
283 	TSVector	tsout;
284 	int			i,
285 				j,
286 				nlexemes,
287 				weight;
288 	WordEntry  *entry;
289 	Datum	   *dlexemes;
290 	bool	   *nulls;
291 
292 	switch (char_weight)
293 	{
294 		case 'A':
295 		case 'a':
296 			weight = 3;
297 			break;
298 		case 'B':
299 		case 'b':
300 			weight = 2;
301 			break;
302 		case 'C':
303 		case 'c':
304 			weight = 1;
305 			break;
306 		case 'D':
307 		case 'd':
308 			weight = 0;
309 			break;
310 		default:
311 			/* internal error */
312 			elog(ERROR, "unrecognized weight: %c", char_weight);
313 	}
314 
315 	tsout = (TSVector) palloc(VARSIZE(tsin));
316 	memcpy(tsout, tsin, VARSIZE(tsin));
317 	entry = ARRPTR(tsout);
318 
319 	deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
320 					  &dlexemes, &nulls, &nlexemes);
321 
322 	/*
323 	 * Assuming that lexemes array is significantly shorter than tsvector we
324 	 * can iterate through lexemes performing binary search of each lexeme
325 	 * from lexemes in tsvector.
326 	 */
327 	for (i = 0; i < nlexemes; i++)
328 	{
329 		char	   *lex;
330 		int			lex_len,
331 					lex_pos;
332 
333 		if (nulls[i])
334 			ereport(ERROR,
335 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
336 					 errmsg("lexeme array may not contain nulls")));
337 
338 		lex = VARDATA(dlexemes[i]);
339 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
340 		lex_pos = tsvector_bsearch(tsout, lex, lex_len);
341 
342 		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
343 		{
344 			WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
345 
346 			while (j--)
347 			{
348 				WEP_SETWEIGHT(*p, weight);
349 				p++;
350 			}
351 		}
352 	}
353 
354 	PG_FREE_IF_COPY(tsin, 0);
355 	PG_FREE_IF_COPY(lexemes, 2);
356 
357 	PG_RETURN_POINTER(tsout);
358 }
359 
360 #define compareEntry(pa, a, pb, b) \
361 	tsCompareString((pa) + (a)->pos, (a)->len,	\
362 					(pb) + (b)->pos, (b)->len,	\
363 					false)
364 
365 /*
366  * Add positions from src to dest after offsetting them by maxpos.
367  * Return the number added (might be less than expected due to overflow)
368  */
369 static int32
370 add_pos(TSVector src, WordEntry *srcptr,
371 		TSVector dest, WordEntry *destptr,
372 		int32 maxpos)
373 {
374 	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
375 	int			i;
376 	uint16		slen = POSDATALEN(src, srcptr),
377 				startlen;
378 	WordEntryPos *spos = POSDATAPTR(src, srcptr),
379 			   *dpos = POSDATAPTR(dest, destptr);
380 
381 	if (!destptr->haspos)
382 		*clen = 0;
383 
384 	startlen = *clen;
385 	for (i = 0;
386 		 i < slen && *clen < MAXNUMPOS &&
387 		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
388 		 i++)
389 	{
390 		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
391 		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
392 		(*clen)++;
393 	}
394 
395 	if (*clen != startlen)
396 		destptr->haspos = 1;
397 	return *clen - startlen;
398 }
399 
400 /*
401  * Perform binary search of given lexeme in TSVector.
402  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
403  * found.
404  */
405 static int
406 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
407 {
408 	WordEntry  *arrin = ARRPTR(tsv);
409 	int			StopLow = 0,
410 				StopHigh = tsv->size,
411 				StopMiddle,
412 				cmp;
413 
414 	while (StopLow < StopHigh)
415 	{
416 		StopMiddle = (StopLow + StopHigh) / 2;
417 
418 		cmp = tsCompareString(lexeme, lexeme_len,
419 							  STRPTR(tsv) + arrin[StopMiddle].pos,
420 							  arrin[StopMiddle].len,
421 							  false);
422 
423 		if (cmp < 0)
424 			StopHigh = StopMiddle;
425 		else if (cmp > 0)
426 			StopLow = StopMiddle + 1;
427 		else					/* found it */
428 			return StopMiddle;
429 	}
430 
431 	return -1;
432 }
433 
434 /*
435  * qsort comparator functions
436  */
437 
438 static int
439 compare_int(const void *va, const void *vb)
440 {
441 	int			a = *((const int *) va);
442 	int			b = *((const int *) vb);
443 
444 	if (a == b)
445 		return 0;
446 	return (a > b) ? 1 : -1;
447 }
448 
449 static int
450 compare_text_lexemes(const void *va, const void *vb)
451 {
452 	Datum		a = *((const Datum *) va);
453 	Datum		b = *((const Datum *) vb);
454 	char	   *alex = VARDATA_ANY(a);
455 	int			alex_len = VARSIZE_ANY_EXHDR(a);
456 	char	   *blex = VARDATA_ANY(b);
457 	int			blex_len = VARSIZE_ANY_EXHDR(b);
458 
459 	return tsCompareString(alex, alex_len, blex, blex_len, false);
460 }
461 
462 /*
463  * Internal routine to delete lexemes from TSVector by array of offsets.
464  *
465  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
466  * int indices_count -- size of that array
467  *
468  * Returns new TSVector without given lexemes along with their positions
469  * and weights.
470  */
471 static TSVector
472 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
473 						   int indices_count)
474 {
475 	TSVector	tsout;
476 	WordEntry  *arrin = ARRPTR(tsv),
477 			   *arrout;
478 	char	   *data = STRPTR(tsv),
479 			   *dataout;
480 	int			i,				/* index in arrin */
481 				j,				/* index in arrout */
482 				k,				/* index in indices_to_delete */
483 				curoff;			/* index in dataout area */
484 
485 	/*
486 	 * Sort the filter array to simplify membership checks below.  Also, get
487 	 * rid of any duplicate entries, so that we can assume that indices_count
488 	 * is exactly equal to the number of lexemes that will be removed.
489 	 */
490 	if (indices_count > 1)
491 	{
492 		int			kp;
493 
494 		qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
495 		kp = 0;
496 		for (k = 1; k < indices_count; k++)
497 		{
498 			if (indices_to_delete[k] != indices_to_delete[kp])
499 				indices_to_delete[++kp] = indices_to_delete[k];
500 		}
501 		indices_count = ++kp;
502 	}
503 
504 	/*
505 	 * Here we overestimate tsout size, since we don't know how much space is
506 	 * used by the deleted lexeme(s).  We will set exact size below.
507 	 */
508 	tsout = (TSVector) palloc0(VARSIZE(tsv));
509 
510 	/* This count must be correct because STRPTR(tsout) relies on it. */
511 	tsout->size = tsv->size - indices_count;
512 
513 	/*
514 	 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
515 	 */
516 	arrout = ARRPTR(tsout);
517 	dataout = STRPTR(tsout);
518 	curoff = 0;
519 	for (i = j = k = 0; i < tsv->size; i++)
520 	{
521 		/*
522 		 * If current i is present in indices_to_delete, skip this lexeme.
523 		 * Since indices_to_delete is already sorted, we only need to check
524 		 * the current (k'th) entry.
525 		 */
526 		if (k < indices_count && i == indices_to_delete[k])
527 		{
528 			k++;
529 			continue;
530 		}
531 
532 		/* Copy lexeme and its positions and weights */
533 		memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
534 		arrout[j].haspos = arrin[i].haspos;
535 		arrout[j].len = arrin[i].len;
536 		arrout[j].pos = curoff;
537 		curoff += arrin[i].len;
538 		if (arrin[i].haspos)
539 		{
540 			int			len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
541 			+ sizeof(uint16);
542 
543 			curoff = SHORTALIGN(curoff);
544 			memcpy(dataout + curoff,
545 				   STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
546 				   len);
547 			curoff += len;
548 		}
549 
550 		j++;
551 	}
552 
553 	/*
554 	 * k should now be exactly equal to indices_count. If it isn't then the
555 	 * caller provided us with indices outside of [0, tsv->size) range and
556 	 * estimation of tsout's size is wrong.
557 	 */
558 	Assert(k == indices_count);
559 
560 	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
561 	return tsout;
562 }
563 
564 /*
565  * Delete given lexeme from tsvector.
566  * Implementation of user-level ts_delete(tsvector, text).
567  */
568 Datum
569 tsvector_delete_str(PG_FUNCTION_ARGS)
570 {
571 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
572 				tsout;
573 	text	   *tlexeme = PG_GETARG_TEXT_PP(1);
574 	char	   *lexeme = VARDATA_ANY(tlexeme);
575 	int			lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
576 				skip_index;
577 
578 	if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
579 		PG_RETURN_POINTER(tsin);
580 
581 	tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
582 
583 	PG_FREE_IF_COPY(tsin, 0);
584 	PG_FREE_IF_COPY(tlexeme, 1);
585 	PG_RETURN_POINTER(tsout);
586 }
587 
588 /*
589  * Delete given array of lexemes from tsvector.
590  * Implementation of user-level ts_delete(tsvector, text[]).
591  */
592 Datum
593 tsvector_delete_arr(PG_FUNCTION_ARGS)
594 {
595 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
596 				tsout;
597 	ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
598 	int			i,
599 				nlex,
600 				skip_count,
601 			   *skip_indices;
602 	Datum	   *dlexemes;
603 	bool	   *nulls;
604 
605 	deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
606 					  &dlexemes, &nulls, &nlex);
607 
608 	/*
609 	 * In typical use case array of lexemes to delete is relatively small. So
610 	 * here we optimize things for that scenario: iterate through lexarr
611 	 * performing binary search of each lexeme from lexarr in tsvector.
612 	 */
613 	skip_indices = palloc0(nlex * sizeof(int));
614 	for (i = skip_count = 0; i < nlex; i++)
615 	{
616 		char	   *lex;
617 		int			lex_len,
618 					lex_pos;
619 
620 		if (nulls[i])
621 			ereport(ERROR,
622 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
623 					 errmsg("lexeme array may not contain nulls")));
624 
625 		lex = VARDATA(dlexemes[i]);
626 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
627 		lex_pos = tsvector_bsearch(tsin, lex, lex_len);
628 
629 		if (lex_pos >= 0)
630 			skip_indices[skip_count++] = lex_pos;
631 	}
632 
633 	tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
634 
635 	pfree(skip_indices);
636 	PG_FREE_IF_COPY(tsin, 0);
637 	PG_FREE_IF_COPY(lexemes, 1);
638 
639 	PG_RETURN_POINTER(tsout);
640 }
641 
642 /*
643  * Expand tsvector as table with following columns:
644  *	   lexeme: lexeme text
645  *	   positions: integer array of lexeme positions
646  *	   weights: char array of weights corresponding to positions
647  */
648 Datum
649 tsvector_unnest(PG_FUNCTION_ARGS)
650 {
651 	FuncCallContext *funcctx;
652 	TSVector	tsin;
653 
654 	if (SRF_IS_FIRSTCALL())
655 	{
656 		MemoryContext oldcontext;
657 		TupleDesc	tupdesc;
658 
659 		funcctx = SRF_FIRSTCALL_INIT();
660 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
661 
662 		tupdesc = CreateTemplateTupleDesc(3, false);
663 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
664 						   TEXTOID, -1, 0);
665 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
666 						   INT2ARRAYOID, -1, 0);
667 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
668 						   TEXTARRAYOID, -1, 0);
669 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
670 
671 		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
672 
673 		MemoryContextSwitchTo(oldcontext);
674 	}
675 
676 	funcctx = SRF_PERCALL_SETUP();
677 	tsin = (TSVector) funcctx->user_fctx;
678 
679 	if (funcctx->call_cntr < tsin->size)
680 	{
681 		WordEntry  *arrin = ARRPTR(tsin);
682 		char	   *data = STRPTR(tsin);
683 		HeapTuple	tuple;
684 		int			j,
685 					i = funcctx->call_cntr;
686 		bool		nulls[] = {false, false, false};
687 		Datum		values[3];
688 
689 		values[0] = PointerGetDatum(
690 									cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
691 			);
692 
693 		if (arrin[i].haspos)
694 		{
695 			WordEntryPosVector *posv;
696 			Datum	   *positions;
697 			Datum	   *weights;
698 			char		weight;
699 
700 			/*
701 			 * Internally tsvector stores position and weight in the same
702 			 * uint16 (2 bits for weight, 14 for position). Here we extract
703 			 * that in two separate arrays.
704 			 */
705 			posv = _POSVECPTR(tsin, arrin + i);
706 			positions = palloc(posv->npos * sizeof(Datum));
707 			weights = palloc(posv->npos * sizeof(Datum));
708 			for (j = 0; j < posv->npos; j++)
709 			{
710 				positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
711 				weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
712 				weights[j] = PointerGetDatum(
713 											 cstring_to_text_with_len(&weight, 1)
714 					);
715 			}
716 
717 			values[1] = PointerGetDatum(
718 										construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
719 			values[2] = PointerGetDatum(
720 										construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
721 		}
722 		else
723 		{
724 			nulls[1] = nulls[2] = true;
725 		}
726 
727 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
728 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
729 	}
730 	else
731 	{
732 		pfree(tsin);
733 		SRF_RETURN_DONE(funcctx);
734 	}
735 }
736 
737 /*
738  * Convert tsvector to array of lexemes.
739  */
740 Datum
741 tsvector_to_array(PG_FUNCTION_ARGS)
742 {
743 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
744 	WordEntry  *arrin = ARRPTR(tsin);
745 	Datum	   *elements;
746 	int			i;
747 	ArrayType  *array;
748 
749 	elements = palloc(tsin->size * sizeof(Datum));
750 
751 	for (i = 0; i < tsin->size; i++)
752 	{
753 		elements[i] = PointerGetDatum(
754 									  cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
755 			);
756 	}
757 
758 	array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
759 
760 	pfree(elements);
761 	PG_FREE_IF_COPY(tsin, 0);
762 	PG_RETURN_POINTER(array);
763 }
764 
765 /*
766  * Build tsvector from array of lexemes.
767  */
768 Datum
769 array_to_tsvector(PG_FUNCTION_ARGS)
770 {
771 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
772 	TSVector	tsout;
773 	Datum	   *dlexemes;
774 	WordEntry  *arrout;
775 	bool	   *nulls;
776 	int			nitems,
777 				i,
778 				j,
779 				tslen,
780 				datalen = 0;
781 	char	   *cur;
782 
783 	deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
784 
785 	/* Reject nulls (maybe we should just ignore them, instead?) */
786 	for (i = 0; i < nitems; i++)
787 	{
788 		if (nulls[i])
789 			ereport(ERROR,
790 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
791 					 errmsg("lexeme array may not contain nulls")));
792 	}
793 
794 	/* Sort and de-dup, because this is required for a valid tsvector. */
795 	if (nitems > 1)
796 	{
797 		qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
798 		j = 0;
799 		for (i = 1; i < nitems; i++)
800 		{
801 			if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
802 				dlexemes[++j] = dlexemes[i];
803 		}
804 		nitems = ++j;
805 	}
806 
807 	/* Calculate space needed for surviving lexemes. */
808 	for (i = 0; i < nitems; i++)
809 		datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
810 	tslen = CALCDATASIZE(nitems, datalen);
811 
812 	/* Allocate and fill tsvector. */
813 	tsout = (TSVector) palloc0(tslen);
814 	SET_VARSIZE(tsout, tslen);
815 	tsout->size = nitems;
816 
817 	arrout = ARRPTR(tsout);
818 	cur = STRPTR(tsout);
819 	for (i = 0; i < nitems; i++)
820 	{
821 		char	   *lex = VARDATA(dlexemes[i]);
822 		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
823 
824 		memcpy(cur, lex, lex_len);
825 		arrout[i].haspos = 0;
826 		arrout[i].len = lex_len;
827 		arrout[i].pos = cur - STRPTR(tsout);
828 		cur += lex_len;
829 	}
830 
831 	PG_FREE_IF_COPY(v, 0);
832 	PG_RETURN_POINTER(tsout);
833 }
834 
835 /*
836  * ts_filter(): keep only lexemes with given weights in tsvector.
837  */
838 Datum
839 tsvector_filter(PG_FUNCTION_ARGS)
840 {
841 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
842 				tsout;
843 	ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
844 	WordEntry  *arrin = ARRPTR(tsin),
845 			   *arrout;
846 	char	   *datain = STRPTR(tsin),
847 			   *dataout;
848 	Datum	   *dweights;
849 	bool	   *nulls;
850 	int			nweights;
851 	int			i,
852 				j;
853 	int			cur_pos = 0;
854 	char		mask = 0;
855 
856 	deconstruct_array(weights, CHAROID, 1, true, 'c',
857 					  &dweights, &nulls, &nweights);
858 
859 	for (i = 0; i < nweights; i++)
860 	{
861 		char		char_weight;
862 
863 		if (nulls[i])
864 			ereport(ERROR,
865 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
866 					 errmsg("weight array may not contain nulls")));
867 
868 		char_weight = DatumGetChar(dweights[i]);
869 		switch (char_weight)
870 		{
871 			case 'A':
872 			case 'a':
873 				mask = mask | 8;
874 				break;
875 			case 'B':
876 			case 'b':
877 				mask = mask | 4;
878 				break;
879 			case 'C':
880 			case 'c':
881 				mask = mask | 2;
882 				break;
883 			case 'D':
884 			case 'd':
885 				mask = mask | 1;
886 				break;
887 			default:
888 				ereport(ERROR,
889 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
890 						 errmsg("unrecognized weight: \"%c\"", char_weight)));
891 		}
892 	}
893 
894 	tsout = (TSVector) palloc0(VARSIZE(tsin));
895 	tsout->size = tsin->size;
896 	arrout = ARRPTR(tsout);
897 	dataout = STRPTR(tsout);
898 
899 	for (i = j = 0; i < tsin->size; i++)
900 	{
901 		WordEntryPosVector *posvin,
902 				   *posvout;
903 		int			npos = 0;
904 		int			k;
905 
906 		if (!arrin[i].haspos)
907 			continue;
908 
909 		posvin = _POSVECPTR(tsin, arrin + i);
910 		posvout = (WordEntryPosVector *)
911 			(dataout + SHORTALIGN(cur_pos + arrin[i].len));
912 
913 		for (k = 0; k < posvin->npos; k++)
914 		{
915 			if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
916 				posvout->pos[npos++] = posvin->pos[k];
917 		}
918 
919 		/* if no satisfactory positions found, skip lexeme */
920 		if (!npos)
921 			continue;
922 
923 		arrout[j].haspos = true;
924 		arrout[j].len = arrin[i].len;
925 		arrout[j].pos = cur_pos;
926 
927 		memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
928 		posvout->npos = npos;
929 		cur_pos += SHORTALIGN(arrin[i].len);
930 		cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
931 			sizeof(uint16);
932 		j++;
933 	}
934 
935 	tsout->size = j;
936 	if (dataout != STRPTR(tsout))
937 		memmove(STRPTR(tsout), dataout, cur_pos);
938 
939 	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
940 
941 	PG_FREE_IF_COPY(tsin, 0);
942 	PG_RETURN_POINTER(tsout);
943 }
944 
945 Datum
946 tsvector_concat(PG_FUNCTION_ARGS)
947 {
948 	TSVector	in1 = PG_GETARG_TSVECTOR(0);
949 	TSVector	in2 = PG_GETARG_TSVECTOR(1);
950 	TSVector	out;
951 	WordEntry  *ptr;
952 	WordEntry  *ptr1,
953 			   *ptr2;
954 	WordEntryPos *p;
955 	int			maxpos = 0,
956 				i,
957 				j,
958 				i1,
959 				i2,
960 				dataoff,
961 				output_bytes,
962 				output_size;
963 	char	   *data,
964 			   *data1,
965 			   *data2;
966 
967 	/* Get max position in in1; we'll need this to offset in2's positions */
968 	ptr = ARRPTR(in1);
969 	i = in1->size;
970 	while (i--)
971 	{
972 		if ((j = POSDATALEN(in1, ptr)) != 0)
973 		{
974 			p = POSDATAPTR(in1, ptr);
975 			while (j--)
976 			{
977 				if (WEP_GETPOS(*p) > maxpos)
978 					maxpos = WEP_GETPOS(*p);
979 				p++;
980 			}
981 		}
982 		ptr++;
983 	}
984 
985 	ptr1 = ARRPTR(in1);
986 	ptr2 = ARRPTR(in2);
987 	data1 = STRPTR(in1);
988 	data2 = STRPTR(in2);
989 	i1 = in1->size;
990 	i2 = in2->size;
991 
992 	/*
993 	 * Conservative estimate of space needed.  We might need all the data in
994 	 * both inputs, and conceivably add a pad byte before position data for
995 	 * each item where there was none before.
996 	 */
997 	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
998 
999 	out = (TSVector) palloc0(output_bytes);
1000 	SET_VARSIZE(out, output_bytes);
1001 
1002 	/*
1003 	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
1004 	 * collapse out any unused space at the end.
1005 	 */
1006 	out->size = in1->size + in2->size;
1007 
1008 	ptr = ARRPTR(out);
1009 	data = STRPTR(out);
1010 	dataoff = 0;
1011 	while (i1 && i2)
1012 	{
1013 		int			cmp = compareEntry(data1, ptr1, data2, ptr2);
1014 
1015 		if (cmp < 0)
1016 		{						/* in1 first */
1017 			ptr->haspos = ptr1->haspos;
1018 			ptr->len = ptr1->len;
1019 			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1020 			ptr->pos = dataoff;
1021 			dataoff += ptr1->len;
1022 			if (ptr->haspos)
1023 			{
1024 				dataoff = SHORTALIGN(dataoff);
1025 				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1026 				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1027 			}
1028 
1029 			ptr++;
1030 			ptr1++;
1031 			i1--;
1032 		}
1033 		else if (cmp > 0)
1034 		{						/* in2 first */
1035 			ptr->haspos = ptr2->haspos;
1036 			ptr->len = ptr2->len;
1037 			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1038 			ptr->pos = dataoff;
1039 			dataoff += ptr2->len;
1040 			if (ptr->haspos)
1041 			{
1042 				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1043 
1044 				if (addlen == 0)
1045 					ptr->haspos = 0;
1046 				else
1047 				{
1048 					dataoff = SHORTALIGN(dataoff);
1049 					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1050 				}
1051 			}
1052 
1053 			ptr++;
1054 			ptr2++;
1055 			i2--;
1056 		}
1057 		else
1058 		{
1059 			ptr->haspos = ptr1->haspos | ptr2->haspos;
1060 			ptr->len = ptr1->len;
1061 			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1062 			ptr->pos = dataoff;
1063 			dataoff += ptr1->len;
1064 			if (ptr->haspos)
1065 			{
1066 				if (ptr1->haspos)
1067 				{
1068 					dataoff = SHORTALIGN(dataoff);
1069 					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1070 					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1071 					if (ptr2->haspos)
1072 						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1073 				}
1074 				else			/* must have ptr2->haspos */
1075 				{
1076 					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1077 
1078 					if (addlen == 0)
1079 						ptr->haspos = 0;
1080 					else
1081 					{
1082 						dataoff = SHORTALIGN(dataoff);
1083 						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1084 					}
1085 				}
1086 			}
1087 
1088 			ptr++;
1089 			ptr1++;
1090 			ptr2++;
1091 			i1--;
1092 			i2--;
1093 		}
1094 	}
1095 
1096 	while (i1)
1097 	{
1098 		ptr->haspos = ptr1->haspos;
1099 		ptr->len = ptr1->len;
1100 		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1101 		ptr->pos = dataoff;
1102 		dataoff += ptr1->len;
1103 		if (ptr->haspos)
1104 		{
1105 			dataoff = SHORTALIGN(dataoff);
1106 			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1107 			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1108 		}
1109 
1110 		ptr++;
1111 		ptr1++;
1112 		i1--;
1113 	}
1114 
1115 	while (i2)
1116 	{
1117 		ptr->haspos = ptr2->haspos;
1118 		ptr->len = ptr2->len;
1119 		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1120 		ptr->pos = dataoff;
1121 		dataoff += ptr2->len;
1122 		if (ptr->haspos)
1123 		{
1124 			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1125 
1126 			if (addlen == 0)
1127 				ptr->haspos = 0;
1128 			else
1129 			{
1130 				dataoff = SHORTALIGN(dataoff);
1131 				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1132 			}
1133 		}
1134 
1135 		ptr++;
1136 		ptr2++;
1137 		i2--;
1138 	}
1139 
1140 	/*
1141 	 * Instead of checking each offset individually, we check for overflow of
1142 	 * pos fields once at the end.
1143 	 */
1144 	if (dataoff > MAXSTRPOS)
1145 		ereport(ERROR,
1146 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1147 				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1148 
1149 	/*
1150 	 * Adjust sizes (asserting that we didn't overrun the original estimates)
1151 	 * and collapse out any unused array entries.
1152 	 */
1153 	output_size = ptr - ARRPTR(out);
1154 	Assert(output_size <= out->size);
1155 	out->size = output_size;
1156 	if (data != STRPTR(out))
1157 		memmove(STRPTR(out), data, dataoff);
1158 	output_bytes = CALCDATASIZE(out->size, dataoff);
1159 	Assert(output_bytes <= VARSIZE(out));
1160 	SET_VARSIZE(out, output_bytes);
1161 
1162 	PG_FREE_IF_COPY(in1, 0);
1163 	PG_FREE_IF_COPY(in2, 1);
1164 	PG_RETURN_POINTER(out);
1165 }
1166 
1167 /*
1168  * Compare two strings by tsvector rules.
1169  *
1170  * if isPrefix = true then it returns zero value iff b has prefix a
1171  */
1172 int32
1173 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1174 {
1175 	int			cmp;
1176 
1177 	if (lena == 0)
1178 	{
1179 		if (prefix)
1180 			cmp = 0;			/* empty string is prefix of anything */
1181 		else
1182 			cmp = (lenb > 0) ? -1 : 0;
1183 	}
1184 	else if (lenb == 0)
1185 	{
1186 		cmp = (lena > 0) ? 1 : 0;
1187 	}
1188 	else
1189 	{
1190 		cmp = memcmp(a, b, Min(lena, lenb));
1191 
1192 		if (prefix)
1193 		{
1194 			if (cmp == 0 && lena > lenb)
1195 				cmp = 1;		/* a is longer, so not a prefix of b */
1196 		}
1197 		else if (cmp == 0 && lena != lenb)
1198 		{
1199 			cmp = (lena < lenb) ? -1 : 1;
1200 		}
1201 	}
1202 
1203 	return cmp;
1204 }
1205 
1206 /*
1207  * Check weight info or/and fill 'data' with the required positions
1208  */
1209 static bool
1210 checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
1211 			   ExecPhraseData *data)
1212 {
1213 	bool		result = false;
1214 
1215 	if (entry->haspos && (val->weight || data))
1216 	{
1217 		WordEntryPosVector *posvec;
1218 
1219 		/*
1220 		 * We can't use the _POSVECPTR macro here because the pointer to the
1221 		 * tsvector's lexeme storage is already contained in chkval->values.
1222 		 */
1223 		posvec = (WordEntryPosVector *)
1224 			(chkval->values + SHORTALIGN(entry->pos + entry->len));
1225 
1226 		if (val->weight && data)
1227 		{
1228 			WordEntryPos *posvec_iter = posvec->pos;
1229 			WordEntryPos *dptr;
1230 
1231 			/*
1232 			 * Filter position information by weights
1233 			 */
1234 			dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1235 			data->allocated = true;
1236 
1237 			/* Is there a position with a matching weight? */
1238 			while (posvec_iter < posvec->pos + posvec->npos)
1239 			{
1240 				/* If true, append this position to the data->pos */
1241 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1242 				{
1243 					*dptr = WEP_GETPOS(*posvec_iter);
1244 					dptr++;
1245 				}
1246 
1247 				posvec_iter++;
1248 			}
1249 
1250 			data->npos = dptr - data->pos;
1251 
1252 			if (data->npos > 0)
1253 				result = true;
1254 		}
1255 		else if (val->weight)
1256 		{
1257 			WordEntryPos *posvec_iter = posvec->pos;
1258 
1259 			/* Is there a position with a matching weight? */
1260 			while (posvec_iter < posvec->pos + posvec->npos)
1261 			{
1262 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1263 				{
1264 					result = true;
1265 					break;		/* no need to go further */
1266 				}
1267 
1268 				posvec_iter++;
1269 			}
1270 		}
1271 		else					/* data != NULL */
1272 		{
1273 			data->npos = posvec->npos;
1274 			data->pos = posvec->pos;
1275 			data->allocated = false;
1276 			result = true;
1277 		}
1278 	}
1279 	else
1280 	{
1281 		result = true;
1282 	}
1283 
1284 	return result;
1285 }
1286 
1287 /*
1288  * Removes duplicate pos entries. We can't use uniquePos() from
1289  * tsvector.c because array might be longer than MAXENTRYPOS
1290  *
1291  * Returns new length.
1292  */
1293 static int
1294 uniqueLongPos(WordEntryPos *pos, int npos)
1295 {
1296 	WordEntryPos *pos_iter,
1297 			   *result;
1298 
1299 	if (npos <= 1)
1300 		return npos;
1301 
1302 	qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1303 
1304 	result = pos;
1305 	pos_iter = pos + 1;
1306 	while (pos_iter < pos + npos)
1307 	{
1308 		if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
1309 		{
1310 			result++;
1311 			*result = WEP_GETPOS(*pos_iter);
1312 		}
1313 
1314 		pos_iter++;
1315 	}
1316 
1317 	return result + 1 - pos;
1318 }
1319 
1320 /*
1321  * is there value 'val' in array or not ?
1322  */
1323 static bool
1324 checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
1325 {
1326 	CHKVAL	   *chkval = (CHKVAL *) checkval;
1327 	WordEntry  *StopLow = chkval->arrb;
1328 	WordEntry  *StopHigh = chkval->arre;
1329 	WordEntry  *StopMiddle = StopHigh;
1330 	bool		res = false;
1331 
1332 	/* Loop invariant: StopLow <= val < StopHigh */
1333 	while (StopLow < StopHigh)
1334 	{
1335 		int			difference;
1336 
1337 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1338 		difference = tsCompareString(chkval->operand + val->distance,
1339 									 val->length,
1340 									 chkval->values + StopMiddle->pos,
1341 									 StopMiddle->len,
1342 									 false);
1343 
1344 		if (difference == 0)
1345 		{
1346 			/* Check weight info & fill 'data' with positions */
1347 			res = checkclass_str(chkval, StopMiddle, val, data);
1348 			break;
1349 		}
1350 		else if (difference > 0)
1351 			StopLow = StopMiddle + 1;
1352 		else
1353 			StopHigh = StopMiddle;
1354 	}
1355 
1356 	if ((!res || data) && val->prefix)
1357 	{
1358 		WordEntryPos *allpos = NULL;
1359 		int			npos = 0,
1360 					totalpos = 0;
1361 
1362 		/*
1363 		 * there was a failed exact search, so we should scan further to find
1364 		 * a prefix match. We also need to do so if caller needs position info
1365 		 */
1366 		if (StopLow >= StopHigh)
1367 			StopMiddle = StopHigh;
1368 
1369 		while ((!res || data) && StopMiddle < chkval->arre &&
1370 			   tsCompareString(chkval->operand + val->distance,
1371 							   val->length,
1372 							   chkval->values + StopMiddle->pos,
1373 							   StopMiddle->len,
1374 							   true) == 0)
1375 		{
1376 			if (data)
1377 			{
1378 				/*
1379 				 * We need to join position information
1380 				 */
1381 				res = checkclass_str(chkval, StopMiddle, val, data);
1382 
1383 				if (res)
1384 				{
1385 					while (npos + data->npos >= totalpos)
1386 					{
1387 						if (totalpos == 0)
1388 						{
1389 							totalpos = 256;
1390 							allpos = palloc(sizeof(WordEntryPos) * totalpos);
1391 						}
1392 						else
1393 						{
1394 							totalpos *= 2;
1395 							allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1396 						}
1397 					}
1398 
1399 					memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1400 					npos += data->npos;
1401 				}
1402 				else
1403 				{
1404 					/* at loop exit, res must be true if we found matches */
1405 					res = (npos > 0);
1406 				}
1407 			}
1408 			else
1409 			{
1410 				res = checkclass_str(chkval, StopMiddle, val, NULL);
1411 			}
1412 
1413 			StopMiddle++;
1414 		}
1415 
1416 		if (res && data)
1417 		{
1418 			/* Sort and make unique array of found positions */
1419 			data->pos = allpos;
1420 			data->npos = uniqueLongPos(allpos, npos);
1421 			data->allocated = true;
1422 		}
1423 	}
1424 
1425 	return res;
1426 }
1427 
1428 /*
1429  * Compute output position list for a tsquery operator in phrase mode.
1430  *
1431  * Merge the position lists in Ldata and Rdata as specified by "emit",
1432  * returning the result list into *data.  The input position lists must be
1433  * sorted and unique, and the output will be as well.
1434  *
1435  * data: pointer to initially-all-zeroes output struct, or NULL
1436  * Ldata, Rdata: input position lists
1437  * emit: bitmask of TSPO_XXX flags
1438  * Loffset: offset to be added to Ldata positions before comparing/outputting
1439  * Roffset: offset to be added to Rdata positions before comparing/outputting
1440  * max_npos: maximum possible required size of output position array
1441  *
1442  * Loffset and Roffset should not be negative, else we risk trying to output
1443  * negative positions, which won't fit into WordEntryPos.
1444  *
1445  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1446  * we return it as TSTernaryValue.
1447  *
getrlimit(resource: ::c_int, rlim: *mut ::rlimit) -> ::c_int1448  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1449  * returns TS_YES if any positions would have been emitted.
1450  */
1451 #define TSPO_L_ONLY		0x01	/* emit positions appearing only in L */
1452 #define TSPO_R_ONLY		0x02	/* emit positions appearing only in R */
1453 #define TSPO_BOTH		0x04	/* emit positions appearing in both L&R */
1454 
1455 static TSTernaryValue
1456 TS_phrase_output(ExecPhraseData *data,
1457 				 ExecPhraseData *Ldata,
1458 				 ExecPhraseData *Rdata,
1459 				 int emit,
1460 				 int Loffset,
1461 				 int Roffset,
1462 				 int max_npos)
1463 {
1464 	int			Lindex,
1465 				Rindex;
1466 
1467 	/* Loop until both inputs are exhausted */
1468 	Lindex = Rindex = 0;
1469 	while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1470 	{
1471 		int			Lpos,
1472 					Rpos;
1473 		int			output_pos = 0;
1474 
1475 		/*
1476 		 * Fetch current values to compare.  WEP_GETPOS() is needed because
1477 		 * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1478 		 */
1479 		if (Lindex < Ldata->npos)
1480 			Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1481 		else
1482 		{
1483 			/* L array exhausted, so we're done if R_ONLY isn't set */
1484 			if (!(emit & TSPO_R_ONLY))
1485 				break;
1486 			Lpos = INT_MAX;
1487 		}
1488 		if (Rindex < Rdata->npos)
1489 			Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1490 		else
1491 		{
1492 			/* R array exhausted, so we're done if L_ONLY isn't set */
1493 			if (!(emit & TSPO_L_ONLY))
1494 				break;
1495 			Rpos = INT_MAX;
1496 		}
1497 
1498 		/* Merge-join the two input lists */
1499 		if (Lpos < Rpos)
1500 		{
1501 			/* Lpos is not matched in Rdata, should we output it? */
1502 			if (emit & TSPO_L_ONLY)
1503 				output_pos = Lpos;
1504 			Lindex++;
1505 		}
1506 		else if (Lpos == Rpos)
1507 		{
1508 			/* Lpos and Rpos match ... should we output it? */
1509 			if (emit & TSPO_BOTH)
1510 				output_pos = Rpos;
1511 			Lindex++;
1512 			Rindex++;
1513 		}
1514 		else					/* Lpos > Rpos */
1515 		{
1516 			/* Rpos is not matched in Ldata, should we output it? */
1517 			if (emit & TSPO_R_ONLY)
1518 				output_pos = Rpos;
1519 			Rindex++;
1520 		}
1521 
1522 		if (output_pos > 0)
1523 		{
1524 			if (data)
1525 			{
1526 				/* Store position, first allocating output array if needed */
1527 				if (data->pos == NULL)
1528 				{
1529 					data->pos = (WordEntryPos *)
1530 						palloc(max_npos * sizeof(WordEntryPos));
1531 					data->allocated = true;
1532 				}
1533 				data->pos[data->npos++] = output_pos;
1534 			}
1535 			else
1536 			{
1537 				/*
1538 				 * Exact positions not needed, so return TS_YES as soon as we
1539 				 * know there is at least one.
1540 				 */
1541 				return TS_YES;
1542 			}
1543 		}
1544 	}
1545 
1546 	if (data && data->npos > 0)
1547 	{
1548 		/* Let's assert we didn't overrun the array */
1549 		Assert(data->npos <= max_npos);
1550 		return TS_YES;
1551 	}
1552 	return TS_NO;
1553 }
lutimes(file: *const ::c_char, times: *const ::timeval) -> ::c_int1554 
1555 /*
1556  * Execute tsquery at or below an OP_PHRASE operator.
1557  *
1558  * This handles tsquery execution at recursion levels where we need to care
1559  * about match locations.
1560  *
1561  * In addition to the same arguments used for TS_execute, the caller may pass
1562  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1563  * match position info on success.  data == NULL if no position data need be
1564  * returned.  (In practice, outside callers pass NULL, and only the internal
1565  * recursion cases pass a data pointer.)
1566  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1567  * This is OK because an outside call always starts from an OP_PHRASE node.
1568  *
1569  * The detailed semantics of the match data, given that the function returned
1570  * TS_YES (successful match), are:
1571  *
1572  * npos > 0, negate = false:
1573  *	 query is matched at specified position(s) (and only those positions)
1574  * npos > 0, negate = true:
1575  *	 query is matched at all positions *except* specified position(s)
1576  * npos = 0, negate = true:
1577  *	 query is matched at all positions
1578  * npos = 0, negate = false:
1579  *	 disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1580  *
1581  * Successful matches also return a "width" value which is the match width in
1582  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
1583  * and is the sum of the phrase operator distances for phrase matches.  Note
1584  * that when width > 0, the listed positions represent the ends of matches not
1585  * the starts.  (This unintuitive rule is needed to avoid possibly generating
1586  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1587  *
1588  * If the TSExecuteCallback function reports that an operand is present
1589  * but fails to provide position(s) for it, we will return TS_MAYBE when
1590  * it is possible but not certain that the query is matched.
1591  *
1592  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1593  * negate = false (which is the state initialized by the caller); but the
1594  * "width" output in such cases is undefined.
1595  */
1596 static TSTernaryValue
1597 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1598 				  TSExecuteCallback chkcond,
1599 				  ExecPhraseData *data)
1600 {
1601 	ExecPhraseData Ldata,
1602 				Rdata;
1603 	TSTernaryValue lmatch,
1604 				rmatch;
1605 	int			Loffset,
1606 				Roffset,
1607 				maxwidth;
1608 
1609 	/* since this function recurses, it could be driven to stack overflow */
1610 	check_stack_depth();
1611 
1612 	if (curitem->type == QI_VAL)
1613 	{
1614 		if (!chkcond(arg, (QueryOperand *) curitem, data))
1615 			return TS_NO;
1616 		if (data->npos > 0 || data->negate)
1617 			return TS_YES;
1618 		/* If we have no position data, we must return TS_MAYBE */
1619 		return TS_MAYBE;
1620 	}
1621 
1622 	switch (curitem->qoperator.oper)
1623 	{
1624 		case OP_NOT:
1625 
1626 			/*
1627 			 * We need not touch data->width, since a NOT operation does not
1628 			 * change the match width.
1629 			 */
1630 			if (!(flags & TS_EXEC_CALC_NOT))
1631 			{
1632 				/* without CALC_NOT, report NOT as "match everywhere" */
1633 				Assert(data->npos == 0 && !data->negate);
1634 				data->negate = true;
1635 				return TS_YES;
1636 			}
1637 			switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1638 			{
1639 				case TS_NO:
1640 					/* change "match nowhere" to "match everywhere" */
1641 					Assert(data->npos == 0 && !data->negate);
1642 					data->negate = true;
1643 					return TS_YES;
1644 				case TS_YES:
1645 					if (data->npos > 0)
1646 					{
1647 						/* we have some positions, invert negate flag */
1648 						data->negate = !data->negate;
1649 						return TS_YES;
1650 					}
1651 					else if (data->negate)
1652 					{
1653 						/* change "match everywhere" to "match nowhere" */
1654 						data->negate = false;
1655 						return TS_NO;
1656 					}
1657 					/* Should not get here if result was TS_YES */
1658 					Assert(false);
1659 					break;
1660 				case TS_MAYBE:
1661 					/* match positions are, and remain, uncertain */
1662 					return TS_MAYBE;
1663 			}
1664 			break;
1665 
1666 		case OP_PHRASE:
1667 		case OP_AND:
1668 			memset(&Ldata, 0, sizeof(Ldata));
1669 			memset(&Rdata, 0, sizeof(Rdata));
1670 
1671 			lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1672 									   arg, flags, chkcond, &Ldata);
1673 			if (lmatch == TS_NO)
1674 				return TS_NO;
1675 
1676 			rmatch = TS_phrase_execute(curitem + 1,
1677 									   arg, flags, chkcond, &Rdata);
1678 			if (rmatch == TS_NO)
1679 				return TS_NO;
1680 
1681 			/*
1682 			 * If either operand has no position information, then we can't
1683 			 * return reliable position data, only a MAYBE result.
1684 			 */
1685 			if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1686 				return TS_MAYBE;
1687 
1688 			if (curitem->qoperator.oper == OP_PHRASE)
1689 			{
1690 				/*
1691 				 * Compute Loffset and Roffset suitable for phrase match, and
1692 				 * compute overall width of whole phrase match.
1693 				 */
1694 				Loffset = curitem->qoperator.distance + Rdata.width;
1695 				Roffset = 0;
1696 				if (data)
1697 					data->width = curitem->qoperator.distance +
1698 						Ldata.width + Rdata.width;
1699 			}
1700 			else
1701 			{
1702 				/*
1703 				 * For OP_AND, set output width and alignment like OP_OR (see
1704 				 * comment below)
1705 				 */
1706 				maxwidth = Max(Ldata.width, Rdata.width);
1707 				Loffset = maxwidth - Ldata.width;
1708 				Roffset = maxwidth - Rdata.width;
1709 				if (data)
1710 					data->width = maxwidth;
1711 			}
1712 
1713 			if (Ldata.negate && Rdata.negate)
1714 			{
1715 				/* !L & !R: treat as !(L | R) */
1716 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1717 										TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1718 										Loffset, Roffset,
1719 										Ldata.npos + Rdata.npos);
1720 				if (data)
1721 					data->negate = true;
1722 				return TS_YES;
1723 			}
1724 			else if (Ldata.negate)
1725 			{
1726 				/* !L & R */
1727 				return TS_phrase_output(data, &Ldata, &Rdata,
1728 										TSPO_R_ONLY,
1729 										Loffset, Roffset,
1730 										Rdata.npos);
1731 			}
1732 			else if (Rdata.negate)
1733 			{
1734 				/* L & !R */
1735 				return TS_phrase_output(data, &Ldata, &Rdata,
1736 										TSPO_L_ONLY,
1737 										Loffset, Roffset,
1738 										Ldata.npos);
1739 			}
1740 			else
1741 			{
1742 				/* straight AND */
1743 				return TS_phrase_output(data, &Ldata, &Rdata,
1744 										TSPO_BOTH,
1745 										Loffset, Roffset,
1746 										Min(Ldata.npos, Rdata.npos));
1747 			}
1748 
1749 		case OP_OR:
1750 			memset(&Ldata, 0, sizeof(Ldata));
1751 			memset(&Rdata, 0, sizeof(Rdata));
1752 
1753 			lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1754 									   arg, flags, chkcond, &Ldata);
1755 			rmatch = TS_phrase_execute(curitem + 1,
1756 									   arg, flags, chkcond, &Rdata);
1757 
1758 			if (lmatch == TS_NO && rmatch == TS_NO)
1759 				return TS_NO;
1760 
1761 			/*
1762 			 * If either operand has no position information, then we can't
1763 			 * return reliable position data, only a MAYBE result.
1764 			 */
1765 			if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1766 				return TS_MAYBE;
1767 
1768 			/*
1769 			 * Cope with undefined output width from failed submatch.  (This
1770 			 * takes less code than trying to ensure that all failure returns
1771 			 * set data->width to zero.)
1772 			 */
1773 			if (lmatch == TS_NO)
1774 				Ldata.width = 0;
1775 			if (rmatch == TS_NO)
1776 				Rdata.width = 0;
1777 
1778 			/*
1779 			 * For OP_AND and OP_OR, report the width of the wider of the two
1780 			 * inputs, and align the narrower input's positions to the right
1781 			 * end of that width.  This rule deals at least somewhat
1782 			 * reasonably with cases like "x <-> (y | z <-> q)".
1783 			 */
1784 			maxwidth = Max(Ldata.width, Rdata.width);
1785 			Loffset = maxwidth - Ldata.width;
1786 			Roffset = maxwidth - Rdata.width;
1787 			data->width = maxwidth;
1788 
1789 			if (Ldata.negate && Rdata.negate)
1790 			{
1791 				/* !L | !R: treat as !(L & R) */
1792 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1793 										TSPO_BOTH,
1794 										Loffset, Roffset,
1795 										Min(Ldata.npos, Rdata.npos));
1796 				data->negate = true;
1797 				return TS_YES;
1798 			}
1799 			else if (Ldata.negate)
1800 			{
1801 				/* !L | R: treat as !(L & !R) */
1802 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1803 										TSPO_L_ONLY,
1804 										Loffset, Roffset,
1805 										Ldata.npos);
1806 				data->negate = true;
1807 				return TS_YES;
1808 			}
1809 			else if (Rdata.negate)
1810 			{
1811 				/* L | !R: treat as !(!L & R) */
1812 				(void) TS_phrase_output(data, &Ldata, &Rdata,
1813 										TSPO_R_ONLY,
1814 										Loffset, Roffset,
1815 										Rdata.npos);
1816 				data->negate = true;
1817 				return TS_YES;
1818 			}
1819 			else
1820 			{
1821 				/* straight OR */
1822 				return TS_phrase_output(data, &Ldata, &Rdata,
1823 										TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1824 										Loffset, Roffset,
1825 										Ldata.npos + Rdata.npos);
1826 			}
1827 
1828 		default:
1829 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1830 	}
1831 
1832 	/* not reachable, but keep compiler quiet */
1833 	return TS_NO;
1834 }
1835 
1836 
1837 /*
1838  * Evaluate tsquery boolean expression.
1839  *
1840  * curitem: current tsquery item (initially, the first one)
1841  * arg: opaque value to pass through to callback function
1842  * flags: bitmask of flag bits shown in ts_utils.h
1843  * chkcond: callback function to check whether a primitive value is present
1844  */
1845 bool
1846 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1847 		   TSExecuteCallback chkcond)
1848 {
1849 	/*
1850 	 * If we get TS_MAYBE from the recursion, return true.  We could only see
1851 	 * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1852 	 * need to check again.
1853 	 */
1854 	return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1855 }
1856 
1857 /*
1858  * TS_execute recursion for operators above any phrase operator.  Here we do
1859  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
1860  * operator, we pass it off to TS_phrase_execute which does worry.
1861  */
1862 static TSTernaryValue
1863 TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1864 				   TSExecuteCallback chkcond)
1865 {
1866 	TSTernaryValue lmatch;
1867 
1868 	/* since this function recurses, it could be driven to stack overflow */
1869 	check_stack_depth();
1870 
1871 	/* ... and let's check for query cancel while we're at it */
1872 	CHECK_FOR_INTERRUPTS();
1873 
1874 	if (curitem->type == QI_VAL)
1875 		return chkcond(arg, (QueryOperand *) curitem,
1876 					   NULL /* don't need position info */ ) ? TS_YES : TS_NO;
1877 
1878 	switch (curitem->qoperator.oper)
1879 	{
1880 		case OP_NOT:
1881 			if (!(flags & TS_EXEC_CALC_NOT))
1882 				return TS_YES;
1883 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1884 			{
1885 				case TS_NO:
1886 					return TS_YES;
1887 				case TS_YES:
1888 					return TS_NO;
1889 				case TS_MAYBE:
1890 					return TS_MAYBE;
1891 			}
1892 			break;
1893 
1894 		case OP_AND:
1895 			lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1896 										flags, chkcond);
1897 			if (lmatch == TS_NO)
1898 				return TS_NO;
1899 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1900 			{
1901 				case TS_NO:
1902 					return TS_NO;
1903 				case TS_YES:
1904 					return lmatch;
1905 				case TS_MAYBE:
1906 					return TS_MAYBE;
1907 			}
1908 			break;
1909 
1910 		case OP_OR:
1911 			lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1912 										flags, chkcond);
1913 			if (lmatch == TS_YES)
1914 				return TS_YES;
1915 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1916 			{
1917 				case TS_NO:
1918 					return lmatch;
1919 				case TS_YES:
1920 					return TS_YES;
1921 				case TS_MAYBE:
1922 					return TS_MAYBE;
1923 			}
1924 			break;
1925 
1926 		case OP_PHRASE:
1927 
1928 			/*
1929 			 * If we get a MAYBE result, and the caller doesn't want that,
1930 			 * convert it to NO.  It would be more consistent, perhaps, to
1931 			 * return the result of TS_phrase_execute() verbatim and then
1932 			 * convert MAYBE results at the top of the recursion.  But
1933 			 * converting at the topmost phrase operator gives results that
1934 			 * are bug-compatible with the old implementation, so do it like
1935 			 * this for now.
1936 			 */
1937 			switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1938 			{
1939 				case TS_NO:
1940 					return TS_NO;
1941 				case TS_YES:
1942 					return TS_YES;
1943 				case TS_MAYBE:
1944 					return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1945 			}
1946 			break;
1947 
1948 		default:
1949 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1950 	}
1951 
1952 	/* not reachable, but keep compiler quiet */
1953 	return TS_NO;
1954 }
1955 
1956 /*
1957  * Detect whether a tsquery boolean expression requires any positive matches
1958  * to values shown in the tsquery.
1959  *
1960  * This is needed to know whether a GIN index search requires full index scan.
1961  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1962  * entries for x; but 'x | !y' could match rows containing neither x nor y.
1963  */
1964 bool
1965 tsquery_requires_match(QueryItem *curitem)
1966 {
1967 	/* since this function recurses, it could be driven to stack overflow */
1968 	check_stack_depth();
1969 
1970 	if (curitem->type == QI_VAL)
1971 		return true;
1972 
1973 	switch (curitem->qoperator.oper)
1974 	{
1975 		case OP_NOT:
1976 
1977 			/*
1978 			 * Assume there are no required matches underneath a NOT.  For
1979 			 * some cases with nested NOTs, we could prove there's a required
1980 			 * match, but it seems unlikely to be worth the trouble.
1981 			 */
1982 			return false;
1983 
1984 		case OP_PHRASE:
1985 
1986 			/*
1987 			 * Treat OP_PHRASE as OP_AND here
1988 			 */
1989 		case OP_AND:
1990 			/* If either side requires a match, we're good */
1991 			if (tsquery_requires_match(curitem + curitem->qoperator.left))
1992 				return true;
1993 			else
1994 				return tsquery_requires_match(curitem + 1);
1995 
1996 		case OP_OR:
1997 			/* Both sides must require a match */
1998 			if (tsquery_requires_match(curitem + curitem->qoperator.left))
1999 				return tsquery_requires_match(curitem + 1);
2000 			else
2001 				return false;
2002 
2003 		default:
2004 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2005 	}
2006 
2007 	/* not reachable, but keep compiler quiet */
2008 	return false;
2009 }
2010 
2011 /*
2012  * boolean operations
2013  */
2014 Datum
2015 ts_match_qv(PG_FUNCTION_ARGS)
2016 {
2017 	PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
2018 										PG_GETARG_DATUM(1),
2019 										PG_GETARG_DATUM(0)));
2020 }
2021 
2022 Datum
2023 ts_match_vq(PG_FUNCTION_ARGS)
2024 {
2025 	TSVector	val = PG_GETARG_TSVECTOR(0);
2026 	TSQuery		query = PG_GETARG_TSQUERY(1);
2027 	CHKVAL		chkval;
2028 	bool		result;
2029 
2030 	/* empty query matches nothing */
2031 	if (!query->size)
2032 	{
2033 		PG_FREE_IF_COPY(val, 0);
2034 		PG_FREE_IF_COPY(query, 1);
2035 		PG_RETURN_BOOL(false);
2036 	}
2037 
2038 	chkval.arrb = ARRPTR(val);
2039 	chkval.arre = chkval.arrb + val->size;
2040 	chkval.values = STRPTR(val);
2041 	chkval.operand = GETOPERAND(query);
2042 	result = TS_execute(GETQUERY(query),
2043 						&chkval,
2044 						TS_EXEC_CALC_NOT,
2045 						checkcondition_str);
2046 
2047 	PG_FREE_IF_COPY(val, 0);
2048 	PG_FREE_IF_COPY(query, 1);
2049 	PG_RETURN_BOOL(result);
2050 }
2051 
2052 Datum
2053 ts_match_tt(PG_FUNCTION_ARGS)
2054 {
2055 	TSVector	vector;
2056 	TSQuery		query;
2057 	bool		res;
2058 
2059 	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2060 												  PG_GETARG_DATUM(0)));
2061 	query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
2062 												PG_GETARG_DATUM(1)));
2063 
2064 	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2065 										   TSVectorGetDatum(vector),
2066 										   TSQueryGetDatum(query)));
2067 
2068 	pfree(vector);
2069 	pfree(query);
2070 
2071 	PG_RETURN_BOOL(res);
2072 }
2073 
2074 Datum
2075 ts_match_tq(PG_FUNCTION_ARGS)
2076 {
2077 	TSVector	vector;
2078 	TSQuery		query = PG_GETARG_TSQUERY(1);
2079 	bool		res;
2080 
2081 	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
2082 												  PG_GETARG_DATUM(0)));
2083 
2084 	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
2085 										   TSVectorGetDatum(vector),
2086 										   TSQueryGetDatum(query)));
2087 
2088 	pfree(vector);
2089 	PG_FREE_IF_COPY(query, 1);
2090 
2091 	PG_RETURN_BOOL(res);
2092 }
2093 
2094 /*
2095  * ts_stat statistic function support
2096  */
2097 
2098 
2099 /*
2100  * Returns the number of positions in value 'wptr' within tsvector 'txt',
2101  * that have a weight equal to one of the weights in 'weight' bitmask.
2102  */
2103 static int
2104 check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2105 {
2106 	int			len = POSDATALEN(txt, wptr);
2107 	int			num = 0;
2108 	WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2109 
2110 	while (len--)
2111 	{
2112 		if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2113 			num++;
2114 		ptr++;
2115 	}
2116 	return num;
2117 }
2118 
2119 #define compareStatWord(a,e,t)							\
2120 	tsCompareString((a)->lexeme, (a)->lenlexeme,		\
2121 					STRPTR(t) + (e)->pos, (e)->len,		\
2122 					false)
2123 
2124 static void
2125 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2126 {
2127 	WordEntry  *we = ARRPTR(txt) + off;
2128 	StatEntry  *node = stat->root,
2129 			   *pnode = NULL;
2130 	int			n,
2131 				res = 0;
2132 	uint32		depth = 1;
2133 
2134 	if (stat->weight == 0)
2135 		n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2136 	else
2137 		n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2138 
2139 	if (n == 0)
2140 		return;					/* nothing to insert */
2141 
2142 	while (node)
2143 	{
2144 		res = compareStatWord(node, we, txt);
2145 
2146 		if (res == 0)
2147 		{
2148 			break;
2149 		}
2150 		else
2151 		{
2152 			pnode = node;
2153 			node = (res < 0) ? node->left : node->right;
2154 		}
2155 		depth++;
2156 	}
2157 
2158 	if (depth > stat->maxdepth)
2159 		stat->maxdepth = depth;
2160 
2161 	if (node == NULL)
2162 	{
2163 		node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2164 		node->left = node->right = NULL;
2165 		node->ndoc = 1;
2166 		node->nentry = n;
2167 		node->lenlexeme = we->len;
2168 		memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2169 
2170 		if (pnode == NULL)
2171 		{
2172 			stat->root = node;
2173 		}
2174 		else
2175 		{
2176 			if (res < 0)
2177 				pnode->left = node;
2178 			else
2179 				pnode->right = node;
2180 		}
2181 
2182 	}
2183 	else
2184 	{
2185 		node->ndoc++;
2186 		node->nentry += n;
2187 	}
2188 }
2189 
2190 static void
2191 chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2192 					uint32 low, uint32 high, uint32 offset)
2193 {
2194 	uint32		pos;
2195 	uint32		middle = (low + high) >> 1;
2196 
2197 	pos = (low + middle) >> 1;
2198 	if (low != middle && pos >= offset && pos - offset < txt->size)
2199 		insertStatEntry(persistentContext, stat, txt, pos - offset);
2200 	pos = (high + middle + 1) >> 1;
2201 	if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2202 		insertStatEntry(persistentContext, stat, txt, pos - offset);
2203 
2204 	if (low != middle)
2205 		chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2206 	if (high != middle + 1)
2207 		chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2208 }
2209 
2210 /*
2211  * This is written like a custom aggregate function, because the
2212  * original plan was to do just that. Unfortunately, an aggregate function
2213  * can't return a set, so that plan was abandoned. If that limitation is
2214  * lifted in the future, ts_stat could be a real aggregate function so that
2215  * you could use it like this:
2216  *
2217  *	 SELECT ts_stat(vector_column) FROM vector_table;
2218  *
2219  *	where vector_column is a tsvector-type column in vector_table.
2220  */
2221 
2222 static TSVectorStat *
2223 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2224 {
2225 	TSVector	txt = DatumGetTSVector(data);
2226 	uint32		i,
2227 				nbit = 0,
2228 				offset;
2229 
2230 	if (stat == NULL)
2231 	{							/* Init in first */
2232 		stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2233 		stat->maxdepth = 1;
2234 	}
2235 
2236 	/* simple check of correctness */
2237 	if (txt == NULL || txt->size == 0)
2238 	{
2239 		if (txt && txt != (TSVector) DatumGetPointer(data))
2240 			pfree(txt);
2241 		return stat;
2242 	}
2243 
2244 	i = txt->size - 1;
2245 	for (; i > 0; i >>= 1)
2246 		nbit++;
2247 
2248 	nbit = 1 << nbit;
2249 	offset = (nbit - txt->size) / 2;
2250 
2251 	insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2252 	chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2253 
2254 	return stat;
2255 }
2256 
2257 static void
2258 ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2259 				   TSVectorStat *stat)
2260 {
2261 	TupleDesc	tupdesc;
2262 	MemoryContext oldcontext;
2263 	StatEntry  *node;
2264 
2265 	funcctx->user_fctx = (void *) stat;
2266 
2267 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2268 
2269 	stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2270 	stat->stackpos = 0;
2271 
2272 	node = stat->root;
2273 	/* find leftmost value */
2274 	if (node == NULL)
2275 		stat->stack[stat->stackpos] = NULL;
2276 	else
2277 		for (;;)
2278 		{
2279 			stat->stack[stat->stackpos] = node;
2280 			if (node->left)
2281 			{
2282 				stat->stackpos++;
2283 				node = node->left;
2284 			}
2285 			else
2286 				break;
2287 		}
2288 	Assert(stat->stackpos <= stat->maxdepth);
2289 
2290 	tupdesc = CreateTemplateTupleDesc(3, false);
2291 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2292 					   TEXTOID, -1, 0);
2293 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2294 					   INT4OID, -1, 0);
2295 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2296 					   INT4OID, -1, 0);
2297 	funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2298 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2299 
2300 	MemoryContextSwitchTo(oldcontext);
2301 }
2302 
2303 static StatEntry *
2304 walkStatEntryTree(TSVectorStat *stat)
2305 {
2306 	StatEntry  *node = stat->stack[stat->stackpos];
2307 
2308 	if (node == NULL)
2309 		return NULL;
2310 
2311 	if (node->ndoc != 0)
2312 	{
2313 		/* return entry itself: we already was at left sublink */
2314 		return node;
2315 	}
2316 	else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2317 	{
2318 		/* go on right sublink */
2319 		stat->stackpos++;
2320 		node = node->right;
2321 
2322 		/* find most-left value */
2323 		for (;;)
2324 		{
2325 			stat->stack[stat->stackpos] = node;
2326 			if (node->left)
2327 			{
2328 				stat->stackpos++;
2329 				node = node->left;
2330 			}
2331 			else
2332 				break;
2333 		}
2334 		Assert(stat->stackpos <= stat->maxdepth);
2335 	}
2336 	else
2337 	{
2338 		/* we already return all left subtree, itself and  right subtree */
2339 		if (stat->stackpos == 0)
2340 			return NULL;
2341 
2342 		stat->stackpos--;
2343 		return walkStatEntryTree(stat);
2344 	}
2345 
2346 	return node;
2347 }
2348 
2349 static Datum
2350 ts_process_call(FuncCallContext *funcctx)
2351 {
2352 	TSVectorStat *st;
2353 	StatEntry  *entry;
2354 
2355 	st = (TSVectorStat *) funcctx->user_fctx;
2356 
2357 	entry = walkStatEntryTree(st);
2358 
2359 	if (entry != NULL)
2360 	{
2361 		Datum		result;
2362 		char	   *values[3];
2363 		char		ndoc[16];
2364 		char		nentry[16];
2365 		HeapTuple	tuple;
2366 
2367 		values[0] = palloc(entry->lenlexeme + 1);
2368 		memcpy(values[0], entry->lexeme, entry->lenlexeme);
2369 		(values[0])[entry->lenlexeme] = '\0';
2370 		sprintf(ndoc, "%d", entry->ndoc);
2371 		values[1] = ndoc;
2372 		sprintf(nentry, "%d", entry->nentry);
2373 		values[2] = nentry;
2374 
2375 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2376 		result = HeapTupleGetDatum(tuple);
2377 
2378 		pfree(values[0]);
2379 
2380 		/* mark entry as already visited */
2381 		entry->ndoc = 0;
2382 
2383 		return result;
2384 	}
2385 
2386 	return (Datum) 0;
2387 }
2388 
2389 static TSVectorStat *
2390 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2391 {
2392 	char	   *query = text_to_cstring(txt);
2393 	TSVectorStat *stat;
2394 	bool		isnull;
2395 	Portal		portal;
2396 	SPIPlanPtr	plan;
2397 
2398 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2399 		/* internal error */
2400 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2401 
2402 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2403 		/* internal error */
2404 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2405 
2406 	SPI_cursor_fetch(portal, true, 100);
2407 
2408 	if (SPI_tuptable == NULL ||
2409 		SPI_tuptable->tupdesc->natts != 1 ||
2410 		!IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
2411 						   TSVECTOROID))
2412 		ereport(ERROR,
2413 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2414 				 errmsg("ts_stat query must return one tsvector column")));
2415 
2416 	stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2417 	stat->maxdepth = 1;
2418 
2419 	if (ws)
2420 	{
2421 		char	   *buf;
2422 
2423 		buf = VARDATA_ANY(ws);
2424 		while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2425 		{
2426 			if (pg_mblen(buf) == 1)
2427 			{
2428 				switch (*buf)
2429 				{
2430 					case 'A':
2431 					case 'a':
2432 						stat->weight |= 1 << 3;
2433 						break;
2434 					case 'B':
2435 					case 'b':
2436 						stat->weight |= 1 << 2;
2437 						break;
2438 					case 'C':
2439 					case 'c':
2440 						stat->weight |= 1 << 1;
2441 						break;
2442 					case 'D':
2443 					case 'd':
2444 						stat->weight |= 1;
2445 						break;
2446 					default:
2447 						stat->weight |= 0;
2448 				}
2449 			}
2450 			buf += pg_mblen(buf);
2451 		}
2452 	}
2453 
2454 	while (SPI_processed > 0)
2455 	{
2456 		uint64		i;
2457 
2458 		for (i = 0; i < SPI_processed; i++)
2459 		{
2460 			Datum		data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2461 
2462 			if (!isnull)
2463 				stat = ts_accum(persistentContext, stat, data);
2464 		}
2465 
2466 		SPI_freetuptable(SPI_tuptable);
2467 		SPI_cursor_fetch(portal, true, 100);
2468 	}
2469 
2470 	SPI_freetuptable(SPI_tuptable);
2471 	SPI_cursor_close(portal);
2472 	SPI_freeplan(plan);
2473 	pfree(query);
2474 
2475 	return stat;
2476 }
2477 
2478 Datum
2479 ts_stat1(PG_FUNCTION_ARGS)
2480 {
2481 	FuncCallContext *funcctx;
2482 	Datum		result;
2483 
2484 	if (SRF_IS_FIRSTCALL())
2485 	{
2486 		TSVectorStat *stat;
2487 		text	   *txt = PG_GETARG_TEXT_PP(0);
2488 
2489 		funcctx = SRF_FIRSTCALL_INIT();
2490 		SPI_connect();
2491 		stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2492 		PG_FREE_IF_COPY(txt, 0);
2493 		ts_setup_firstcall(fcinfo, funcctx, stat);
2494 		SPI_finish();
2495 	}
2496 
2497 	funcctx = SRF_PERCALL_SETUP();
2498 	if ((result = ts_process_call(funcctx)) != (Datum) 0)
2499 		SRF_RETURN_NEXT(funcctx, result);
2500 	SRF_RETURN_DONE(funcctx);
2501 }
2502 
2503 Datum
2504 ts_stat2(PG_FUNCTION_ARGS)
2505 {
2506 	FuncCallContext *funcctx;
2507 	Datum		result;
2508 
2509 	if (SRF_IS_FIRSTCALL())
2510 	{
2511 		TSVectorStat *stat;
2512 		text	   *txt = PG_GETARG_TEXT_PP(0);
2513 		text	   *ws = PG_GETARG_TEXT_PP(1);
2514 
2515 		funcctx = SRF_FIRSTCALL_INIT();
2516 		SPI_connect();
2517 		stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2518 		PG_FREE_IF_COPY(txt, 0);
2519 		PG_FREE_IF_COPY(ws, 1);
2520 		ts_setup_firstcall(fcinfo, funcctx, stat);
2521 		SPI_finish();
2522 	}
2523 
2524 	funcctx = SRF_PERCALL_SETUP();
2525 	if ((result = ts_process_call(funcctx)) != (Datum) 0)
2526 		SRF_RETURN_NEXT(funcctx, result);
2527 	SRF_RETURN_DONE(funcctx);
2528 }
2529 
2530 
2531 /*
2532  * Triggers for automatic update of a tsvector column from text column(s)
2533  *
2534  * Trigger arguments are either
2535  *		name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2536  *		name of tsvector col, name of regconfig col, name(s) of text col(s)
2537  * ie, tsconfig can either be specified by name, or indirectly as the
2538  * contents of a regconfig field in the row.  If the name is used, it must
2539  * be explicitly schema-qualified.
2540  */
2541 Datum
2542 tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2543 {
2544 	return tsvector_update_trigger(fcinfo, false);
2545 }
2546 
2547 Datum
2548 tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2549 {
2550 	return tsvector_update_trigger(fcinfo, true);
2551 }
2552 
2553 static Datum
2554 tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2555 {
2556 	TriggerData *trigdata;
2557 	Trigger    *trigger;
2558 	Relation	rel;
2559 	HeapTuple	rettuple = NULL;
2560 	int			tsvector_attr_num,
2561 				i;
2562 	ParsedText	prs;
2563 	Datum		datum;
2564 	bool		isnull;
2565 	text	   *txt;
2566 	Oid			cfgId;
2567 
2568 	/* Check call context */
2569 	if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2570 		elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2571 
2572 	trigdata = (TriggerData *) fcinfo->context;
2573 	if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2574 		elog(ERROR, "tsvector_update_trigger: must be fired for row");
2575 	if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2576 		elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2577 
2578 	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2579 		rettuple = trigdata->tg_trigtuple;
2580 	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2581 		rettuple = trigdata->tg_newtuple;
2582 	else
2583 		elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2584 
2585 	trigger = trigdata->tg_trigger;
2586 	rel = trigdata->tg_relation;
2587 
2588 	if (trigger->tgnargs < 3)
2589 		elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2590 
2591 	/* Find the target tsvector column */
2592 	tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2593 	if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2594 		ereport(ERROR,
2595 				(errcode(ERRCODE_UNDEFINED_COLUMN),
2596 				 errmsg("tsvector column \"%s\" does not exist",
2597 						trigger->tgargs[0])));
2598 	/* This will effectively reject system columns, so no separate test: */
2599 	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2600 						   TSVECTOROID))
2601 		ereport(ERROR,
2602 				(errcode(ERRCODE_DATATYPE_MISMATCH),
2603 				 errmsg("column \"%s\" is not of tsvector type",
2604 						trigger->tgargs[0])));
2605 
2606 	/* Find the configuration to use */
2607 	if (config_column)
2608 	{
2609 		int			config_attr_num;
2610 
2611 		config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2612 		if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2613 			ereport(ERROR,
2614 					(errcode(ERRCODE_UNDEFINED_COLUMN),
2615 					 errmsg("configuration column \"%s\" does not exist",
2616 							trigger->tgargs[1])));
2617 		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2618 							   REGCONFIGOID))
2619 			ereport(ERROR,
2620 					(errcode(ERRCODE_DATATYPE_MISMATCH),
2621 					 errmsg("column \"%s\" is not of regconfig type",
2622 							trigger->tgargs[1])));
2623 
2624 		datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2625 		if (isnull)
2626 			ereport(ERROR,
2627 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2628 					 errmsg("configuration column \"%s\" must not be null",
2629 							trigger->tgargs[1])));
2630 		cfgId = DatumGetObjectId(datum);
2631 	}
2632 	else
2633 	{
2634 		List	   *names;
2635 
2636 		names = stringToQualifiedNameList(trigger->tgargs[1]);
2637 		/* require a schema so that results are not search path dependent */
2638 		if (list_length(names) < 2)
2639 			ereport(ERROR,
2640 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2641 					 errmsg("text search configuration name \"%s\" must be schema-qualified",
2642 							trigger->tgargs[1])));
2643 		cfgId = get_ts_config_oid(names, false);
2644 	}
2645 
2646 	/* initialize parse state */
2647 	prs.lenwords = 32;
2648 	prs.curwords = 0;
2649 	prs.pos = 0;
2650 	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2651 
2652 	/* find all words in indexable column(s) */
2653 	for (i = 2; i < trigger->tgnargs; i++)
2654 	{
2655 		int			numattr;
2656 
2657 		numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2658 		if (numattr == SPI_ERROR_NOATTRIBUTE)
2659 			ereport(ERROR,
2660 					(errcode(ERRCODE_UNDEFINED_COLUMN),
2661 					 errmsg("column \"%s\" does not exist",
2662 							trigger->tgargs[i])));
2663 		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2664 			ereport(ERROR,
2665 					(errcode(ERRCODE_DATATYPE_MISMATCH),
2666 					 errmsg("column \"%s\" is not of a character type",
2667 							trigger->tgargs[i])));
2668 
2669 		datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2670 		if (isnull)
2671 			continue;
2672 
2673 		txt = DatumGetTextPP(datum);
2674 
2675 		parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2676 
2677 		if (txt != (text *) DatumGetPointer(datum))
2678 			pfree(txt);
2679 	}
2680 
2681 	/* make tsvector value */
2682 	datum = TSVectorGetDatum(make_tsvector(&prs));
2683 	isnull = false;
2684 
2685 	/* and insert it into tuple */
2686 	rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2687 										 1, &tsvector_attr_num,
2688 										 &datum, &isnull);
2689 
2690 	pfree(DatumGetPointer(datum));
2691 
2692 	return PointerGetDatum(rettuple);
2693 }
2694