1 /*
2  * contrib/pg_trgm/trgm_gin.c
3  */
4 #include "postgres.h"
5 
6 #include "trgm.h"
7 
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11 
12 
13 PG_FUNCTION_INFO_V1(gin_extract_trgm);
14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
16 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
18 
19 /*
20  * This function can only be called if a pre-9.1 version of the GIN operator
21  * class definition is present in the catalogs (probably as a consequence
22  * of upgrade-in-place).  Cope.
23  */
24 Datum
25 gin_extract_trgm(PG_FUNCTION_ARGS)
26 {
27 	if (PG_NARGS() == 3)
28 		return gin_extract_value_trgm(fcinfo);
29 	if (PG_NARGS() == 7)
30 		return gin_extract_query_trgm(fcinfo);
31 	elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 	PG_RETURN_NULL();
33 }
34 
35 Datum
36 gin_extract_value_trgm(PG_FUNCTION_ARGS)
37 {
38 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
39 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
40 	Datum	   *entries = NULL;
41 	TRGM	   *trg;
42 	int32		trglen;
43 
44 	*nentries = 0;
45 
46 	trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
47 	trglen = ARRNELEM(trg);
48 
49 	if (trglen > 0)
50 	{
51 		trgm	   *ptr;
52 		int32		i;
53 
54 		*nentries = trglen;
55 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
56 
57 		ptr = GETARR(trg);
58 		for (i = 0; i < trglen; i++)
59 		{
60 			int32		item = trgm2int(ptr);
61 
62 			entries[i] = Int32GetDatum(item);
63 			ptr++;
64 		}
65 	}
66 
67 	PG_RETURN_POINTER(entries);
68 }
69 
70 Datum
71 gin_extract_query_trgm(PG_FUNCTION_ARGS)
72 {
73 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
74 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
75 	StrategyNumber strategy = PG_GETARG_UINT16(2);
76 
77 	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
78 	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79 
80 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
82 	Datum	   *entries = NULL;
83 	TRGM	   *trg;
84 	int32		trglen;
85 	trgm	   *ptr;
86 	TrgmPackedGraph *graph;
87 	int32		i;
88 
89 	switch (strategy)
90 	{
91 		case SimilarityStrategyNumber:
92 		case WordSimilarityStrategyNumber:
93 		case StrictWordSimilarityStrategyNumber:
94 			trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
95 			break;
96 		case ILikeStrategyNumber:
97 #ifndef IGNORECASE
98 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
99 #endif
100 			/* FALL THRU */
101 		case LikeStrategyNumber:
102 
103 			/*
104 			 * For wildcard search we extract all the trigrams that every
105 			 * potentially-matching string must include.
106 			 */
107 			trg = generate_wildcard_trgm(VARDATA_ANY(val),
108 										 VARSIZE_ANY_EXHDR(val));
109 			break;
110 		case RegExpICaseStrategyNumber:
111 #ifndef IGNORECASE
112 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
113 #endif
114 			/* FALL THRU */
115 		case RegExpStrategyNumber:
116 			trg = createTrgmNFA(val, PG_GET_COLLATION(),
117 								&graph, CurrentMemoryContext);
118 			if (trg && ARRNELEM(trg) > 0)
119 			{
120 				/*
121 				 * Successful regex processing: store NFA-like graph as
122 				 * extra_data.  GIN API requires an array of nentries
123 				 * Pointers, but we just put the same value in each element.
124 				 */
125 				trglen = ARRNELEM(trg);
126 				*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
127 				for (i = 0; i < trglen; i++)
128 					(*extra_data)[i] = (Pointer) graph;
129 			}
130 			else
131 			{
132 				/* No result: have to do full index scan. */
133 				*nentries = 0;
134 				*searchMode = GIN_SEARCH_MODE_ALL;
135 				PG_RETURN_POINTER(entries);
136 			}
137 			break;
138 		default:
139 			elog(ERROR, "unrecognized strategy number: %d", strategy);
140 			trg = NULL;			/* keep compiler quiet */
141 			break;
142 	}
143 
144 	trglen = ARRNELEM(trg);
145 	*nentries = trglen;
146 
147 	if (trglen > 0)
148 	{
149 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
150 		ptr = GETARR(trg);
151 		for (i = 0; i < trglen; i++)
152 		{
153 			int32		item = trgm2int(ptr);
154 
155 			entries[i] = Int32GetDatum(item);
156 			ptr++;
157 		}
158 	}
159 
160 	/*
161 	 * If no trigram was extracted then we have to scan all the index.
162 	 */
163 	if (trglen == 0)
164 		*searchMode = GIN_SEARCH_MODE_ALL;
165 
166 	PG_RETURN_POINTER(entries);
167 }
168 
169 Datum
170 gin_trgm_consistent(PG_FUNCTION_ARGS)
171 {
172 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
173 	StrategyNumber strategy = PG_GETARG_UINT16(1);
174 
175 	/* text    *query = PG_GETARG_TEXT_PP(2); */
176 	int32		nkeys = PG_GETARG_INT32(3);
177 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
178 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
179 	bool		res;
180 	int32		i,
181 				ntrue;
182 	double		nlimit;
183 
184 	/* All cases served by this function are inexact */
185 	*recheck = true;
186 
187 	switch (strategy)
188 	{
189 		case SimilarityStrategyNumber:
190 		case WordSimilarityStrategyNumber:
191 		case StrictWordSimilarityStrategyNumber:
192 			nlimit = index_strategy_get_limit(strategy);
193 
194 			/* Count the matches */
195 			ntrue = 0;
196 			for (i = 0; i < nkeys; i++)
197 			{
198 				if (check[i])
199 					ntrue++;
200 			}
201 
202 			/*--------------------
203 			 * If DIVUNION is defined then similarity formula is:
204 			 * c / (len1 + len2 - c)
205 			 * where c is number of common trigrams and it stands as ntrue in
206 			 * this code.  Here we don't know value of len2 but we can assume
207 			 * that c (ntrue) is a lower bound of len2, so upper bound of
208 			 * similarity is:
209 			 * c / (len1 + c - c)  => c / len1
210 			 * If DIVUNION is not defined then similarity formula is:
211 			 * c / max(len1, len2)
212 			 * And again, c (ntrue) is a lower bound of len2, but c <= len1
213 			 * just by definition and, consequently, upper bound of
214 			 * similarity is just c / len1.
215 			 * So, independently on DIVUNION the upper bound formula is the same.
216 			 */
217 			res = (nkeys == 0) ? false :
218 				(((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
219 			break;
220 		case ILikeStrategyNumber:
221 #ifndef IGNORECASE
222 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
223 #endif
224 			/* FALL THRU */
225 		case LikeStrategyNumber:
226 			/* Check if all extracted trigrams are presented. */
227 			res = true;
228 			for (i = 0; i < nkeys; i++)
229 			{
230 				if (!check[i])
231 				{
232 					res = false;
233 					break;
234 				}
235 			}
236 			break;
237 		case RegExpICaseStrategyNumber:
238 #ifndef IGNORECASE
239 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
240 #endif
241 			/* FALL THRU */
242 		case RegExpStrategyNumber:
243 			if (nkeys < 1)
244 			{
245 				/* Regex processing gave no result: do full index scan */
246 				res = true;
247 			}
248 			else
249 				res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
250 										 check);
251 			break;
252 		default:
253 			elog(ERROR, "unrecognized strategy number: %d", strategy);
254 			res = false;		/* keep compiler quiet */
255 			break;
256 	}
257 
258 	PG_RETURN_BOOL(res);
259 }
260 
261 /*
262  * In all cases, GIN_TRUE is at least as favorable to inclusion as
263  * GIN_MAYBE. If no better option is available, simply treat
264  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
265  * consistent function.
266  */
267 Datum
268 gin_trgm_triconsistent(PG_FUNCTION_ARGS)
269 {
270 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
271 	StrategyNumber strategy = PG_GETARG_UINT16(1);
272 
273 	/* text    *query = PG_GETARG_TEXT_PP(2); */
274 	int32		nkeys = PG_GETARG_INT32(3);
275 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
276 	GinTernaryValue res = GIN_MAYBE;
277 	int32		i,
278 				ntrue;
279 	bool	   *boolcheck;
280 	double		nlimit;
281 
282 	switch (strategy)
283 	{
284 		case SimilarityStrategyNumber:
285 		case WordSimilarityStrategyNumber:
286 		case StrictWordSimilarityStrategyNumber:
287 			nlimit = index_strategy_get_limit(strategy);
288 
289 			/* Count the matches */
290 			ntrue = 0;
291 			for (i = 0; i < nkeys; i++)
292 			{
293 				if (check[i] != GIN_FALSE)
294 					ntrue++;
295 			}
296 
297 			/*
298 			 * See comment in gin_trgm_consistent() about * upper bound
299 			 * formula
300 			 */
301 			res = (nkeys == 0)
302 				? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
303 							   ? GIN_MAYBE : GIN_FALSE);
304 			break;
305 		case ILikeStrategyNumber:
306 #ifndef IGNORECASE
307 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
308 #endif
309 			/* FALL THRU */
310 		case LikeStrategyNumber:
311 			/* Check if all extracted trigrams are presented. */
312 			res = GIN_MAYBE;
313 			for (i = 0; i < nkeys; i++)
314 			{
315 				if (check[i] == GIN_FALSE)
316 				{
317 					res = GIN_FALSE;
318 					break;
319 				}
320 			}
321 			break;
322 		case RegExpICaseStrategyNumber:
323 #ifndef IGNORECASE
324 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
325 #endif
326 			/* FALL THRU */
327 		case RegExpStrategyNumber:
328 			if (nkeys < 1)
329 			{
330 				/* Regex processing gave no result: do full index scan */
331 				res = GIN_MAYBE;
332 			}
333 			else
334 			{
335 				/*
336 				 * As trigramsMatchGraph implements a monotonic boolean
337 				 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
338 				 * give a conservative result.
339 				 */
340 				boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
341 				for (i = 0; i < nkeys; i++)
342 					boolcheck[i] = (check[i] != GIN_FALSE);
343 				if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
344 										boolcheck))
345 					res = GIN_FALSE;
346 				pfree(boolcheck);
347 			}
348 			break;
349 		default:
350 			elog(ERROR, "unrecognized strategy number: %d", strategy);
351 			res = GIN_FALSE;	/* keep compiler quiet */
352 			break;
353 	}
354 
355 	/* All cases served by this function are inexact */
356 	Assert(res != GIN_TRUE);
357 	PG_RETURN_GIN_TERNARY_VALUE(res);
358 }
359