1 /*
2  * contrib/pg_trgm/trgm_gin.c
3  */
4 #include "postgres.h"
5 
6 #include "trgm.h"
7 
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11 
12 
13 PG_FUNCTION_INFO_V1(gin_extract_trgm);
14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
16 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
18 
19 /*
20  * This function can only be called if a pre-9.1 version of the GIN operator
21  * class definition is present in the catalogs (probably as a consequence
22  * of upgrade-in-place).  Cope.
23  */
24 Datum
gin_extract_trgm(PG_FUNCTION_ARGS)25 gin_extract_trgm(PG_FUNCTION_ARGS)
26 {
27 	if (PG_NARGS() == 3)
28 		return gin_extract_value_trgm(fcinfo);
29 	if (PG_NARGS() == 7)
30 		return gin_extract_query_trgm(fcinfo);
31 	elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 	PG_RETURN_NULL();
33 }
34 
35 Datum
gin_extract_value_trgm(PG_FUNCTION_ARGS)36 gin_extract_value_trgm(PG_FUNCTION_ARGS)
37 {
38 	text	   *val = (text *) PG_GETARG_TEXT_P(0);
39 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
40 	Datum	   *entries = NULL;
41 	TRGM	   *trg;
42 	int32		trglen;
43 
44 	*nentries = 0;
45 
46 	trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
47 	trglen = ARRNELEM(trg);
48 
49 	if (trglen > 0)
50 	{
51 		trgm	   *ptr;
52 		int32		i;
53 
54 		*nentries = trglen;
55 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
56 
57 		ptr = GETARR(trg);
58 		for (i = 0; i < trglen; i++)
59 		{
60 			int32		item = trgm2int(ptr);
61 
62 			entries[i] = Int32GetDatum(item);
63 			ptr++;
64 		}
65 	}
66 
67 	PG_RETURN_POINTER(entries);
68 }
69 
70 Datum
gin_extract_query_trgm(PG_FUNCTION_ARGS)71 gin_extract_query_trgm(PG_FUNCTION_ARGS)
72 {
73 	text	   *val = (text *) PG_GETARG_TEXT_P(0);
74 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
75 	StrategyNumber strategy = PG_GETARG_UINT16(2);
76 
77 	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
78 	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79 
80 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
82 	Datum	   *entries = NULL;
83 	TRGM	   *trg;
84 	int32		trglen;
85 	trgm	   *ptr;
86 	TrgmPackedGraph *graph;
87 	int32		i;
88 
89 	switch (strategy)
90 	{
91 		case SimilarityStrategyNumber:
92 		case WordSimilarityStrategyNumber:
93 			trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
94 			break;
95 		case ILikeStrategyNumber:
96 #ifndef IGNORECASE
97 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
98 #endif
99 			/* FALL THRU */
100 		case LikeStrategyNumber:
101 
102 			/*
103 			 * For wildcard search we extract all the trigrams that every
104 			 * potentially-matching string must include.
105 			 */
106 			trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
107 			break;
108 		case RegExpICaseStrategyNumber:
109 #ifndef IGNORECASE
110 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
111 #endif
112 			/* FALL THRU */
113 		case RegExpStrategyNumber:
114 			trg = createTrgmNFA(val, PG_GET_COLLATION(),
115 								&graph, CurrentMemoryContext);
116 			if (trg && ARRNELEM(trg) > 0)
117 			{
118 				/*
119 				 * Successful regex processing: store NFA-like graph as
120 				 * extra_data.  GIN API requires an array of nentries
121 				 * Pointers, but we just put the same value in each element.
122 				 */
123 				trglen = ARRNELEM(trg);
124 				*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
125 				for (i = 0; i < trglen; i++)
126 					(*extra_data)[i] = (Pointer) graph;
127 			}
128 			else
129 			{
130 				/* No result: have to do full index scan. */
131 				*nentries = 0;
132 				*searchMode = GIN_SEARCH_MODE_ALL;
133 				PG_RETURN_POINTER(entries);
134 			}
135 			break;
136 		default:
137 			elog(ERROR, "unrecognized strategy number: %d", strategy);
138 			trg = NULL;			/* keep compiler quiet */
139 			break;
140 	}
141 
142 	trglen = ARRNELEM(trg);
143 	*nentries = trglen;
144 
145 	if (trglen > 0)
146 	{
147 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
148 		ptr = GETARR(trg);
149 		for (i = 0; i < trglen; i++)
150 		{
151 			int32		item = trgm2int(ptr);
152 
153 			entries[i] = Int32GetDatum(item);
154 			ptr++;
155 		}
156 	}
157 
158 	/*
159 	 * If no trigram was extracted then we have to scan all the index.
160 	 */
161 	if (trglen == 0)
162 		*searchMode = GIN_SEARCH_MODE_ALL;
163 
164 	PG_RETURN_POINTER(entries);
165 }
166 
167 Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)168 gin_trgm_consistent(PG_FUNCTION_ARGS)
169 {
170 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
171 	StrategyNumber strategy = PG_GETARG_UINT16(1);
172 
173 	/* text    *query = PG_GETARG_TEXT_P(2); */
174 	int32		nkeys = PG_GETARG_INT32(3);
175 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
176 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
177 	bool		res;
178 	int32		i,
179 				ntrue;
180 	double		nlimit;
181 
182 	/* All cases served by this function are inexact */
183 	*recheck = true;
184 
185 	switch (strategy)
186 	{
187 		case SimilarityStrategyNumber:
188 		case WordSimilarityStrategyNumber:
189 			nlimit = (strategy == SimilarityStrategyNumber) ?
190 				similarity_threshold : word_similarity_threshold;
191 
192 			/* Count the matches */
193 			ntrue = 0;
194 			for (i = 0; i < nkeys; i++)
195 			{
196 				if (check[i])
197 					ntrue++;
198 			}
199 
200 			/*--------------------
201 			 * If DIVUNION is defined then similarity formula is:
202 			 * c / (len1 + len2 - c)
203 			 * where c is number of common trigrams and it stands as ntrue in
204 			 * this code.  Here we don't know value of len2 but we can assume
205 			 * that c (ntrue) is a lower bound of len2, so upper bound of
206 			 * similarity is:
207 			 * c / (len1 + c - c)  => c / len1
208 			 * If DIVUNION is not defined then similarity formula is:
209 			 * c / max(len1, len2)
210 			 * And again, c (ntrue) is a lower bound of len2, but c <= len1
211 			 * just by definition and, consequently, upper bound of
212 			 * similarity is just c / len1.
213 			 * So, independently on DIVUNION the upper bound formula is the same.
214 			 */
215 			res = (nkeys == 0) ? false :
216 				(((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
217 			break;
218 		case ILikeStrategyNumber:
219 #ifndef IGNORECASE
220 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
221 #endif
222 			/* FALL THRU */
223 		case LikeStrategyNumber:
224 			/* Check if all extracted trigrams are presented. */
225 			res = true;
226 			for (i = 0; i < nkeys; i++)
227 			{
228 				if (!check[i])
229 				{
230 					res = false;
231 					break;
232 				}
233 			}
234 			break;
235 		case RegExpICaseStrategyNumber:
236 #ifndef IGNORECASE
237 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
238 #endif
239 			/* FALL THRU */
240 		case RegExpStrategyNumber:
241 			if (nkeys < 1)
242 			{
243 				/* Regex processing gave no result: do full index scan */
244 				res = true;
245 			}
246 			else
247 				res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
248 										 check);
249 			break;
250 		default:
251 			elog(ERROR, "unrecognized strategy number: %d", strategy);
252 			res = false;		/* keep compiler quiet */
253 			break;
254 	}
255 
256 	PG_RETURN_BOOL(res);
257 }
258 
259 /*
260  * In all cases, GIN_TRUE is at least as favorable to inclusion as
261  * GIN_MAYBE. If no better option is available, simply treat
262  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
263  * consistent function.
264  */
265 Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)266 gin_trgm_triconsistent(PG_FUNCTION_ARGS)
267 {
268 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
269 	StrategyNumber strategy = PG_GETARG_UINT16(1);
270 
271 	/* text    *query = PG_GETARG_TEXT_P(2); */
272 	int32		nkeys = PG_GETARG_INT32(3);
273 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
274 	GinTernaryValue res = GIN_MAYBE;
275 	int32		i,
276 				ntrue;
277 	bool	   *boolcheck;
278 	double		nlimit;
279 
280 	switch (strategy)
281 	{
282 		case SimilarityStrategyNumber:
283 		case WordSimilarityStrategyNumber:
284 			nlimit = (strategy == SimilarityStrategyNumber) ?
285 				similarity_threshold : word_similarity_threshold;
286 
287 			/* Count the matches */
288 			ntrue = 0;
289 			for (i = 0; i < nkeys; i++)
290 			{
291 				if (check[i] != GIN_FALSE)
292 					ntrue++;
293 			}
294 
295 			/*
296 			 * See comment in gin_trgm_consistent() about * upper bound
297 			 * formula
298 			 */
299 			res = (nkeys == 0)
300 				? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
301 							   ? GIN_MAYBE : GIN_FALSE);
302 			break;
303 		case ILikeStrategyNumber:
304 #ifndef IGNORECASE
305 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
306 #endif
307 			/* FALL THRU */
308 		case LikeStrategyNumber:
309 			/* Check if all extracted trigrams are presented. */
310 			res = GIN_MAYBE;
311 			for (i = 0; i < nkeys; i++)
312 			{
313 				if (check[i] == GIN_FALSE)
314 				{
315 					res = GIN_FALSE;
316 					break;
317 				}
318 			}
319 			break;
320 		case RegExpICaseStrategyNumber:
321 #ifndef IGNORECASE
322 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
323 #endif
324 			/* FALL THRU */
325 		case RegExpStrategyNumber:
326 			if (nkeys < 1)
327 			{
328 				/* Regex processing gave no result: do full index scan */
329 				res = GIN_MAYBE;
330 			}
331 			else
332 			{
333 				/*
334 				 * As trigramsMatchGraph implements a monotonic boolean
335 				 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
336 				 * give a conservative result.
337 				 */
338 				boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
339 				for (i = 0; i < nkeys; i++)
340 					boolcheck[i] = (check[i] != GIN_FALSE);
341 				if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
342 										boolcheck))
343 					res = GIN_FALSE;
344 				pfree(boolcheck);
345 			}
346 			break;
347 		default:
348 			elog(ERROR, "unrecognized strategy number: %d", strategy);
349 			res = GIN_FALSE;	/* keep compiler quiet */
350 			break;
351 	}
352 
353 	/* All cases served by this function are inexact */
354 	Assert(res != GIN_TRUE);
355 	PG_RETURN_GIN_TERNARY_VALUE(res);
356 }
357