1 /*-------------------------------------------------------------------------
2  *
3  * tsginidx.c
4  *	 GIN support functions for tsvector_ops
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsginidx.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/gin.h"
17 #include "access/stratnum.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_utils.h"
21 #include "utils/builtins.h"
22 
23 
24 Datum
gin_cmp_tslexeme(PG_FUNCTION_ARGS)25 gin_cmp_tslexeme(PG_FUNCTION_ARGS)
26 {
27 	text	   *a = PG_GETARG_TEXT_PP(0);
28 	text	   *b = PG_GETARG_TEXT_PP(1);
29 	int			cmp;
30 
31 	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
32 						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
33 						  false);
34 
35 	PG_FREE_IF_COPY(a, 0);
36 	PG_FREE_IF_COPY(b, 1);
37 	PG_RETURN_INT32(cmp);
38 }
39 
40 Datum
gin_cmp_prefix(PG_FUNCTION_ARGS)41 gin_cmp_prefix(PG_FUNCTION_ARGS)
42 {
43 	text	   *a = PG_GETARG_TEXT_PP(0);
44 	text	   *b = PG_GETARG_TEXT_PP(1);
45 
46 #ifdef NOT_USED
47 	StrategyNumber strategy = PG_GETARG_UINT16(2);
48 	Pointer		extra_data = PG_GETARG_POINTER(3);
49 #endif
50 	int			cmp;
51 
52 	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
53 						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
54 						  true);
55 
56 	if (cmp < 0)
57 		cmp = 1;				/* prevent continue scan */
58 
59 	PG_FREE_IF_COPY(a, 0);
60 	PG_FREE_IF_COPY(b, 1);
61 	PG_RETURN_INT32(cmp);
62 }
63 
64 Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)65 gin_extract_tsvector(PG_FUNCTION_ARGS)
66 {
67 	TSVector	vector = PG_GETARG_TSVECTOR(0);
68 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
69 	Datum	   *entries = NULL;
70 
71 	*nentries = vector->size;
72 	if (vector->size > 0)
73 	{
74 		int			i;
75 		WordEntry  *we = ARRPTR(vector);
76 
77 		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
78 
79 		for (i = 0; i < vector->size; i++)
80 		{
81 			text	   *txt;
82 
83 			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
84 			entries[i] = PointerGetDatum(txt);
85 
86 			we++;
87 		}
88 	}
89 
90 	PG_FREE_IF_COPY(vector, 0);
91 	PG_RETURN_POINTER(entries);
92 }
93 
94 Datum
gin_extract_tsquery(PG_FUNCTION_ARGS)95 gin_extract_tsquery(PG_FUNCTION_ARGS)
96 {
97 	TSQuery		query = PG_GETARG_TSQUERY(0);
98 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
99 
100 	/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
101 	bool	  **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102 	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
103 
104 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
105 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
106 	Datum	   *entries = NULL;
107 
108 	*nentries = 0;
109 
110 	if (query->size > 0)
111 	{
112 		QueryItem  *item = GETQUERY(query);
113 		int32		i,
114 					j;
115 		bool	   *partialmatch;
116 		int		   *map_item_operand;
117 
118 		/*
119 		 * If the query doesn't have any required positive matches (for
120 		 * instance, it's something like '! foo'), we have to do a full index
121 		 * scan.
122 		 */
123 		if (tsquery_requires_match(item))
124 			*searchMode = GIN_SEARCH_MODE_DEFAULT;
125 		else
126 			*searchMode = GIN_SEARCH_MODE_ALL;
127 
128 		/* count number of VAL items */
129 		j = 0;
130 		for (i = 0; i < query->size; i++)
131 		{
132 			if (item[i].type == QI_VAL)
133 				j++;
134 		}
135 		*nentries = j;
136 
137 		entries = (Datum *) palloc(sizeof(Datum) * j);
138 		partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
139 
140 		/*
141 		 * Make map to convert item's number to corresponding operand's (the
142 		 * same, entry's) number. Entry's number is used in check array in
143 		 * consistent method. We use the same map for each entry.
144 		 */
145 		*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
146 		map_item_operand = (int *) palloc0(sizeof(int) * query->size);
147 
148 		/* Now rescan the VAL items and fill in the arrays */
149 		j = 0;
150 		for (i = 0; i < query->size; i++)
151 		{
152 			if (item[i].type == QI_VAL)
153 			{
154 				QueryOperand *val = &item[i].qoperand;
155 				text	   *txt;
156 
157 				txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
158 											   val->length);
159 				entries[j] = PointerGetDatum(txt);
160 				partialmatch[j] = val->prefix;
161 				(*extra_data)[j] = (Pointer) map_item_operand;
162 				map_item_operand[i] = j;
163 				j++;
164 			}
165 		}
166 	}
167 
168 	PG_FREE_IF_COPY(query, 0);
169 
170 	PG_RETURN_POINTER(entries);
171 }
172 
173 typedef struct
174 {
175 	QueryItem  *first_item;
176 	GinTernaryValue *check;
177 	int		   *map_item_operand;
178 	bool	   *need_recheck;
179 } GinChkVal;
180 
181 static GinTernaryValue
checkcondition_gin_internal(GinChkVal * gcv,QueryOperand * val,ExecPhraseData * data)182 checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
183 {
184 	int			j;
185 
186 	/*
187 	 * if any val requiring a weight is used or caller needs position
188 	 * information then set recheck flag
189 	 */
190 	if (val->weight != 0 || data != NULL)
191 		*(gcv->need_recheck) = true;
192 
193 	/* convert item's number to corresponding entry's (operand's) number */
194 	j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
195 
196 	/* return presence of current entry in indexed value */
197 	return gcv->check[j];
198 }
199 
200 /*
201  * Wrapper of check condition function for TS_execute.
202  */
203 static bool
checkcondition_gin(void * checkval,QueryOperand * val,ExecPhraseData * data)204 checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
205 {
206 	return checkcondition_gin_internal((GinChkVal *) checkval,
207 									   val,
208 									   data) != GIN_FALSE;
209 }
210 
211 /*
212  * Evaluate tsquery boolean expression using ternary logic.
213  *
214  * Note: the reason we can't use TS_execute() for this is that its API
215  * for the checkcondition callback doesn't allow a MAYBE result to be
216  * returned, but we might have MAYBEs in the gcv->check array.
217  * Perhaps we should change that API.
218  */
219 static GinTernaryValue
TS_execute_ternary(GinChkVal * gcv,QueryItem * curitem,bool in_phrase)220 TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
221 {
222 	GinTernaryValue val1,
223 				val2,
224 				result;
225 
226 	/* since this function recurses, it could be driven to stack overflow */
227 	check_stack_depth();
228 
229 	if (curitem->type == QI_VAL)
230 		return
231 			checkcondition_gin_internal(gcv,
232 										(QueryOperand *) curitem,
233 										NULL /* don't have position info */ );
234 
235 	switch (curitem->qoperator.oper)
236 	{
237 		case OP_NOT:
238 
239 			/*
240 			 * Below a phrase search, force NOT's result to MAYBE.  We cannot
241 			 * invert a TRUE result from the subexpression to FALSE, since
242 			 * TRUE only says that the subexpression matches somewhere, not
243 			 * that it matches everywhere, so there might be positions where
244 			 * the NOT will match.  We could invert FALSE to TRUE, but there's
245 			 * little point in distinguishing TRUE from MAYBE, since a recheck
246 			 * will have been forced already.
247 			 */
248 			if (in_phrase)
249 				return GIN_MAYBE;
250 
251 			result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
252 			if (result == GIN_MAYBE)
253 				return result;
254 			return !result;
255 
256 		case OP_PHRASE:
257 
258 			/*
259 			 * GIN doesn't contain any information about positions, so treat
260 			 * OP_PHRASE as OP_AND with recheck requirement, and always
261 			 * reporting MAYBE not TRUE.
262 			 */
263 			*(gcv->need_recheck) = true;
264 			/* Pass down in_phrase == true in case there's a NOT below */
265 			in_phrase = true;
266 
267 			/* FALL THRU */
268 
269 		case OP_AND:
270 			val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
271 									  in_phrase);
272 			if (val1 == GIN_FALSE)
273 				return GIN_FALSE;
274 			val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
275 			if (val2 == GIN_FALSE)
276 				return GIN_FALSE;
277 			if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
278 				curitem->qoperator.oper != OP_PHRASE)
279 				return GIN_TRUE;
280 			else
281 				return GIN_MAYBE;
282 
283 		case OP_OR:
284 			val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
285 									  in_phrase);
286 			if (val1 == GIN_TRUE)
287 				return GIN_TRUE;
288 			val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
289 			if (val2 == GIN_TRUE)
290 				return GIN_TRUE;
291 			if (val1 == GIN_FALSE && val2 == GIN_FALSE)
292 				return GIN_FALSE;
293 			else
294 				return GIN_MAYBE;
295 
296 		default:
297 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
298 	}
299 
300 	/* not reachable, but keep compiler quiet */
301 	return false;
302 }
303 
304 Datum
gin_tsquery_consistent(PG_FUNCTION_ARGS)305 gin_tsquery_consistent(PG_FUNCTION_ARGS)
306 {
307 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
308 
309 	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
310 	TSQuery		query = PG_GETARG_TSQUERY(2);
311 
312 	/* int32	nkeys = PG_GETARG_INT32(3); */
313 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
314 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
315 	bool		res = false;
316 
317 	/* Initially assume query doesn't require recheck */
318 	*recheck = false;
319 
320 	if (query->size > 0)
321 	{
322 		GinChkVal	gcv;
323 
324 		/*
325 		 * check-parameter array has one entry for each value (operand) in the
326 		 * query.
327 		 */
328 		gcv.first_item = GETQUERY(query);
329 		StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
330 						 "sizes of GinTernaryValue and bool are not equal");
331 		gcv.check = (GinTernaryValue *) check;
332 		gcv.map_item_operand = (int *) (extra_data[0]);
333 		gcv.need_recheck = recheck;
334 
335 		res = TS_execute(GETQUERY(query),
336 						 &gcv,
337 						 TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
338 						 checkcondition_gin);
339 	}
340 
341 	PG_RETURN_BOOL(res);
342 }
343 
344 Datum
gin_tsquery_triconsistent(PG_FUNCTION_ARGS)345 gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
346 {
347 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
348 
349 	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
350 	TSQuery		query = PG_GETARG_TSQUERY(2);
351 
352 	/* int32	nkeys = PG_GETARG_INT32(3); */
353 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
354 	GinTernaryValue res = GIN_FALSE;
355 	bool		recheck;
356 
357 	/* Initially assume query doesn't require recheck */
358 	recheck = false;
359 
360 	if (query->size > 0)
361 	{
362 		GinChkVal	gcv;
363 
364 		/*
365 		 * check-parameter array has one entry for each value (operand) in the
366 		 * query.
367 		 */
368 		gcv.first_item = GETQUERY(query);
369 		gcv.check = check;
370 		gcv.map_item_operand = (int *) (extra_data[0]);
371 		gcv.need_recheck = &recheck;
372 
373 		res = TS_execute_ternary(&gcv, GETQUERY(query), false);
374 
375 		if (res == GIN_TRUE && recheck)
376 			res = GIN_MAYBE;
377 	}
378 
379 	PG_RETURN_GIN_TERNARY_VALUE(res);
380 }
381 
382 /*
383  * Formerly, gin_extract_tsvector had only two arguments.  Now it has three,
384  * but we still need a pg_proc entry with two args to support reloading
385  * pre-9.1 contrib/tsearch2 opclass declarations.  This compatibility
386  * function should go away eventually.  (Note: you might say "hey, but the
387  * code above is only *using* two args, so let's just declare it that way".
388  * If you try that you'll find the opr_sanity regression test complains.)
389  */
390 Datum
gin_extract_tsvector_2args(PG_FUNCTION_ARGS)391 gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
392 {
393 	if (PG_NARGS() < 3)			/* should not happen */
394 		elog(ERROR, "gin_extract_tsvector requires three arguments");
395 	return gin_extract_tsvector(fcinfo);
396 }
397 
398 /*
399  * Likewise, we need a stub version of gin_extract_tsquery declared with
400  * only five arguments.
401  */
402 Datum
gin_extract_tsquery_5args(PG_FUNCTION_ARGS)403 gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
404 {
405 	if (PG_NARGS() < 7)			/* should not happen */
406 		elog(ERROR, "gin_extract_tsquery requires seven arguments");
407 	return gin_extract_tsquery(fcinfo);
408 }
409 
410 /*
411  * Likewise, we need a stub version of gin_tsquery_consistent declared with
412  * only six arguments.
413  */
414 Datum
gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)415 gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
416 {
417 	if (PG_NARGS() < 8)			/* should not happen */
418 		elog(ERROR, "gin_tsquery_consistent requires eight arguments");
419 	return gin_tsquery_consistent(fcinfo);
420 }
421 
422 /*
423  * Likewise, a stub version of gin_extract_tsquery declared with argument
424  * types that are no longer considered appropriate.
425  */
426 Datum
gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)427 gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
428 {
429 	return gin_extract_tsquery(fcinfo);
430 }
431 
432 /*
433  * Likewise, a stub version of gin_tsquery_consistent declared with argument
434  * types that are no longer considered appropriate.
435  */
436 Datum
gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)437 gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
438 {
439 	return gin_tsquery_consistent(fcinfo);
440 }
441