1 /*-------------------------------------------------------------------------
2  *
3  * tsginidx.c
4  *	 GIN support functions for tsvector_ops
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsginidx.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/gin.h"
17 #include "access/stratnum.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_utils.h"
21 #include "utils/builtins.h"
22 
23 
24 Datum
gin_cmp_tslexeme(PG_FUNCTION_ARGS)25 gin_cmp_tslexeme(PG_FUNCTION_ARGS)
26 {
27 	text	   *a = PG_GETARG_TEXT_PP(0);
28 	text	   *b = PG_GETARG_TEXT_PP(1);
29 	int			cmp;
30 
31 	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
32 						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
33 						  false);
34 
35 	PG_FREE_IF_COPY(a, 0);
36 	PG_FREE_IF_COPY(b, 1);
37 	PG_RETURN_INT32(cmp);
38 }
39 
40 Datum
gin_cmp_prefix(PG_FUNCTION_ARGS)41 gin_cmp_prefix(PG_FUNCTION_ARGS)
42 {
43 	text	   *a = PG_GETARG_TEXT_PP(0);
44 	text	   *b = PG_GETARG_TEXT_PP(1);
45 
46 #ifdef NOT_USED
47 	StrategyNumber strategy = PG_GETARG_UINT16(2);
48 	Pointer		extra_data = PG_GETARG_POINTER(3);
49 #endif
50 	int			cmp;
51 
52 	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
53 						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
54 						  true);
55 
56 	if (cmp < 0)
57 		cmp = 1;				/* prevent continue scan */
58 
59 	PG_FREE_IF_COPY(a, 0);
60 	PG_FREE_IF_COPY(b, 1);
61 	PG_RETURN_INT32(cmp);
62 }
63 
64 Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)65 gin_extract_tsvector(PG_FUNCTION_ARGS)
66 {
67 	TSVector	vector = PG_GETARG_TSVECTOR(0);
68 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
69 	Datum	   *entries = NULL;
70 
71 	*nentries = vector->size;
72 	if (vector->size > 0)
73 	{
74 		int			i;
75 		WordEntry  *we = ARRPTR(vector);
76 
77 		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
78 
79 		for (i = 0; i < vector->size; i++)
80 		{
81 			text	   *txt;
82 
83 			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
84 			entries[i] = PointerGetDatum(txt);
85 
86 			we++;
87 		}
88 	}
89 
90 	PG_FREE_IF_COPY(vector, 0);
91 	PG_RETURN_POINTER(entries);
92 }
93 
94 Datum
gin_extract_tsquery(PG_FUNCTION_ARGS)95 gin_extract_tsquery(PG_FUNCTION_ARGS)
96 {
97 	TSQuery		query = PG_GETARG_TSQUERY(0);
98 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
99 
100 	/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
101 	bool	  **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102 	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
103 
104 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
105 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
106 	Datum	   *entries = NULL;
107 
108 	*nentries = 0;
109 
110 	if (query->size > 0)
111 	{
112 		QueryItem  *item = GETQUERY(query);
113 		int32		i,
114 					j;
115 		bool	   *partialmatch;
116 		int		   *map_item_operand;
117 
118 		/*
119 		 * If the query doesn't have any required positive matches (for
120 		 * instance, it's something like '! foo'), we have to do a full index
121 		 * scan.
122 		 */
123 		if (tsquery_requires_match(item))
124 			*searchMode = GIN_SEARCH_MODE_DEFAULT;
125 		else
126 			*searchMode = GIN_SEARCH_MODE_ALL;
127 
128 		/* count number of VAL items */
129 		j = 0;
130 		for (i = 0; i < query->size; i++)
131 		{
132 			if (item[i].type == QI_VAL)
133 				j++;
134 		}
135 		*nentries = j;
136 
137 		entries = (Datum *) palloc(sizeof(Datum) * j);
138 		partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
139 
140 		/*
141 		 * Make map to convert item's number to corresponding operand's (the
142 		 * same, entry's) number. Entry's number is used in check array in
143 		 * consistent method. We use the same map for each entry.
144 		 */
145 		*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
146 		map_item_operand = (int *) palloc0(sizeof(int) * query->size);
147 
148 		/* Now rescan the VAL items and fill in the arrays */
149 		j = 0;
150 		for (i = 0; i < query->size; i++)
151 		{
152 			if (item[i].type == QI_VAL)
153 			{
154 				QueryOperand *val = &item[i].qoperand;
155 				text	   *txt;
156 
157 				txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
158 											   val->length);
159 				entries[j] = PointerGetDatum(txt);
160 				partialmatch[j] = val->prefix;
161 				(*extra_data)[j] = (Pointer) map_item_operand;
162 				map_item_operand[i] = j;
163 				j++;
164 			}
165 		}
166 	}
167 
168 	PG_FREE_IF_COPY(query, 0);
169 
170 	PG_RETURN_POINTER(entries);
171 }
172 
173 typedef struct
174 {
175 	QueryItem  *first_item;
176 	GinTernaryValue *check;
177 	int		   *map_item_operand;
178 } GinChkVal;
179 
180 /*
181  * TS_execute callback for matching a tsquery operand to GIN index data
182  */
183 static TSTernaryValue
checkcondition_gin(void * checkval,QueryOperand * val,ExecPhraseData * data)184 checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
185 {
186 	GinChkVal  *gcv = (GinChkVal *) checkval;
187 	int			j;
188 	GinTernaryValue result;
189 
190 	/* convert item's number to corresponding entry's (operand's) number */
191 	j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
192 
193 	/* determine presence of current entry in indexed value */
194 	result = gcv->check[j];
195 
196 	/*
197 	 * If any val requiring a weight is used or caller needs position
198 	 * information then we must recheck, so replace TRUE with MAYBE.
199 	 */
200 	if (result == GIN_TRUE)
201 	{
202 		if (val->weight != 0 || data != NULL)
203 			result = GIN_MAYBE;
204 	}
205 
206 	/*
207 	 * We rely on GinTernaryValue and TSTernaryValue using equivalent value
208 	 * assignments.  We could use a switch statement to map the values if that
209 	 * ever stops being true, but it seems unlikely to happen.
210 	 */
211 	return (TSTernaryValue) result;
212 }
213 
214 Datum
gin_tsquery_consistent(PG_FUNCTION_ARGS)215 gin_tsquery_consistent(PG_FUNCTION_ARGS)
216 {
217 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
218 
219 	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
220 	TSQuery		query = PG_GETARG_TSQUERY(2);
221 
222 	/* int32	nkeys = PG_GETARG_INT32(3); */
223 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
224 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
225 	bool		res = false;
226 
227 	/* Initially assume query doesn't require recheck */
228 	*recheck = false;
229 
230 	if (query->size > 0)
231 	{
232 		GinChkVal	gcv;
233 
234 		/*
235 		 * check-parameter array has one entry for each value (operand) in the
236 		 * query.
237 		 */
238 		gcv.first_item = GETQUERY(query);
239 		StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
240 						 "sizes of GinTernaryValue and bool are not equal");
241 		gcv.check = (GinTernaryValue *) check;
242 		gcv.map_item_operand = (int *) (extra_data[0]);
243 
244 		switch (TS_execute_ternary(GETQUERY(query),
245 								   &gcv,
246 								   TS_EXEC_PHRASE_NO_POS,
247 								   checkcondition_gin))
248 		{
249 			case TS_NO:
250 				res = false;
251 				break;
252 			case TS_YES:
253 				res = true;
254 				break;
255 			case TS_MAYBE:
256 				res = true;
257 				*recheck = true;
258 				break;
259 		}
260 	}
261 
262 	PG_RETURN_BOOL(res);
263 }
264 
265 Datum
gin_tsquery_triconsistent(PG_FUNCTION_ARGS)266 gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
267 {
268 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
269 
270 	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
271 	TSQuery		query = PG_GETARG_TSQUERY(2);
272 
273 	/* int32	nkeys = PG_GETARG_INT32(3); */
274 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
275 	GinTernaryValue res = GIN_FALSE;
276 
277 	if (query->size > 0)
278 	{
279 		GinChkVal	gcv;
280 
281 		/*
282 		 * check-parameter array has one entry for each value (operand) in the
283 		 * query.
284 		 */
285 		gcv.first_item = GETQUERY(query);
286 		gcv.check = check;
287 		gcv.map_item_operand = (int *) (extra_data[0]);
288 
289 		res = TS_execute_ternary(GETQUERY(query),
290 								 &gcv,
291 								 TS_EXEC_PHRASE_NO_POS,
292 								 checkcondition_gin);
293 	}
294 
295 	PG_RETURN_GIN_TERNARY_VALUE(res);
296 }
297 
298 /*
299  * Formerly, gin_extract_tsvector had only two arguments.  Now it has three,
300  * but we still need a pg_proc entry with two args to support reloading
301  * pre-9.1 contrib/tsearch2 opclass declarations.  This compatibility
302  * function should go away eventually.  (Note: you might say "hey, but the
303  * code above is only *using* two args, so let's just declare it that way".
304  * If you try that you'll find the opr_sanity regression test complains.)
305  */
306 Datum
gin_extract_tsvector_2args(PG_FUNCTION_ARGS)307 gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
308 {
309 	if (PG_NARGS() < 3)			/* should not happen */
310 		elog(ERROR, "gin_extract_tsvector requires three arguments");
311 	return gin_extract_tsvector(fcinfo);
312 }
313 
314 /*
315  * Likewise, we need a stub version of gin_extract_tsquery declared with
316  * only five arguments.
317  */
318 Datum
gin_extract_tsquery_5args(PG_FUNCTION_ARGS)319 gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
320 {
321 	if (PG_NARGS() < 7)			/* should not happen */
322 		elog(ERROR, "gin_extract_tsquery requires seven arguments");
323 	return gin_extract_tsquery(fcinfo);
324 }
325 
326 /*
327  * Likewise, we need a stub version of gin_tsquery_consistent declared with
328  * only six arguments.
329  */
330 Datum
gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)331 gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
332 {
333 	if (PG_NARGS() < 8)			/* should not happen */
334 		elog(ERROR, "gin_tsquery_consistent requires eight arguments");
335 	return gin_tsquery_consistent(fcinfo);
336 }
337 
338 /*
339  * Likewise, a stub version of gin_extract_tsquery declared with argument
340  * types that are no longer considered appropriate.
341  */
342 Datum
gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)343 gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
344 {
345 	return gin_extract_tsquery(fcinfo);
346 }
347 
348 /*
349  * Likewise, a stub version of gin_tsquery_consistent declared with argument
350  * types that are no longer considered appropriate.
351  */
352 Datum
gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)353 gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
354 {
355 	return gin_tsquery_consistent(fcinfo);
356 }
357