1 /*
2  * contrib/pg_trgm/trgm_gin.c
3  */
4 #include "postgres.h"
5 
6 #include "trgm.h"
7 
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11 
12 
13 PG_FUNCTION_INFO_V1(gin_extract_trgm);
14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
16 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
18 
19 /*
20  * This function can only be called if a pre-9.1 version of the GIN operator
21  * class definition is present in the catalogs (probably as a consequence
22  * of upgrade-in-place).  Cope.
23  */
24 Datum
gin_extract_trgm(PG_FUNCTION_ARGS)25 gin_extract_trgm(PG_FUNCTION_ARGS)
26 {
27 	if (PG_NARGS() == 3)
28 		return gin_extract_value_trgm(fcinfo);
29 	if (PG_NARGS() == 7)
30 		return gin_extract_query_trgm(fcinfo);
31 	elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 	PG_RETURN_NULL();
33 }
34 
35 Datum
gin_extract_value_trgm(PG_FUNCTION_ARGS)36 gin_extract_value_trgm(PG_FUNCTION_ARGS)
37 {
38 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
39 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
40 	Datum	   *entries = NULL;
41 	TRGM	   *trg;
42 	int32		trglen;
43 
44 	*nentries = 0;
45 
46 	trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
47 	trglen = ARRNELEM(trg);
48 
49 	if (trglen > 0)
50 	{
51 		trgm	   *ptr;
52 		int32		i;
53 
54 		*nentries = trglen;
55 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
56 
57 		ptr = GETARR(trg);
58 		for (i = 0; i < trglen; i++)
59 		{
60 			int32		item = trgm2int(ptr);
61 
62 			entries[i] = Int32GetDatum(item);
63 			ptr++;
64 		}
65 	}
66 
67 	PG_RETURN_POINTER(entries);
68 }
69 
70 Datum
gin_extract_query_trgm(PG_FUNCTION_ARGS)71 gin_extract_query_trgm(PG_FUNCTION_ARGS)
72 {
73 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
74 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
75 	StrategyNumber strategy = PG_GETARG_UINT16(2);
76 
77 	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
78 	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79 
80 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
82 	Datum	   *entries = NULL;
83 	TRGM	   *trg;
84 	int32		trglen;
85 	trgm	   *ptr;
86 	TrgmPackedGraph *graph;
87 	int32		i;
88 
89 	switch (strategy)
90 	{
91 		case SimilarityStrategyNumber:
92 		case WordSimilarityStrategyNumber:
93 			trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
94 			break;
95 		case ILikeStrategyNumber:
96 #ifndef IGNORECASE
97 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
98 #endif
99 			/* FALL THRU */
100 		case LikeStrategyNumber:
101 
102 			/*
103 			 * For wildcard search we extract all the trigrams that every
104 			 * potentially-matching string must include.
105 			 */
106 			trg = generate_wildcard_trgm(VARDATA_ANY(val),
107 										 VARSIZE_ANY_EXHDR(val));
108 			break;
109 		case RegExpICaseStrategyNumber:
110 #ifndef IGNORECASE
111 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
112 #endif
113 			/* FALL THRU */
114 		case RegExpStrategyNumber:
115 			trg = createTrgmNFA(val, PG_GET_COLLATION(),
116 								&graph, CurrentMemoryContext);
117 			if (trg && ARRNELEM(trg) > 0)
118 			{
119 				/*
120 				 * Successful regex processing: store NFA-like graph as
121 				 * extra_data.  GIN API requires an array of nentries
122 				 * Pointers, but we just put the same value in each element.
123 				 */
124 				trglen = ARRNELEM(trg);
125 				*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
126 				for (i = 0; i < trglen; i++)
127 					(*extra_data)[i] = (Pointer) graph;
128 			}
129 			else
130 			{
131 				/* No result: have to do full index scan. */
132 				*nentries = 0;
133 				*searchMode = GIN_SEARCH_MODE_ALL;
134 				PG_RETURN_POINTER(entries);
135 			}
136 			break;
137 		default:
138 			elog(ERROR, "unrecognized strategy number: %d", strategy);
139 			trg = NULL;			/* keep compiler quiet */
140 			break;
141 	}
142 
143 	trglen = ARRNELEM(trg);
144 	*nentries = trglen;
145 
146 	if (trglen > 0)
147 	{
148 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
149 		ptr = GETARR(trg);
150 		for (i = 0; i < trglen; i++)
151 		{
152 			int32		item = trgm2int(ptr);
153 
154 			entries[i] = Int32GetDatum(item);
155 			ptr++;
156 		}
157 	}
158 
159 	/*
160 	 * If no trigram was extracted then we have to scan all the index.
161 	 */
162 	if (trglen == 0)
163 		*searchMode = GIN_SEARCH_MODE_ALL;
164 
165 	PG_RETURN_POINTER(entries);
166 }
167 
168 Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)169 gin_trgm_consistent(PG_FUNCTION_ARGS)
170 {
171 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
172 	StrategyNumber strategy = PG_GETARG_UINT16(1);
173 
174 	/* text    *query = PG_GETARG_TEXT_PP(2); */
175 	int32		nkeys = PG_GETARG_INT32(3);
176 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
177 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
178 	bool		res;
179 	int32		i,
180 				ntrue;
181 	double		nlimit;
182 
183 	/* All cases served by this function are inexact */
184 	*recheck = true;
185 
186 	switch (strategy)
187 	{
188 		case SimilarityStrategyNumber:
189 		case WordSimilarityStrategyNumber:
190 			nlimit = (strategy == SimilarityStrategyNumber) ?
191 				similarity_threshold : word_similarity_threshold;
192 
193 			/* Count the matches */
194 			ntrue = 0;
195 			for (i = 0; i < nkeys; i++)
196 			{
197 				if (check[i])
198 					ntrue++;
199 			}
200 
201 			/*--------------------
202 			 * If DIVUNION is defined then similarity formula is:
203 			 * c / (len1 + len2 - c)
204 			 * where c is number of common trigrams and it stands as ntrue in
205 			 * this code.  Here we don't know value of len2 but we can assume
206 			 * that c (ntrue) is a lower bound of len2, so upper bound of
207 			 * similarity is:
208 			 * c / (len1 + c - c)  => c / len1
209 			 * If DIVUNION is not defined then similarity formula is:
210 			 * c / max(len1, len2)
211 			 * And again, c (ntrue) is a lower bound of len2, but c <= len1
212 			 * just by definition and, consequently, upper bound of
213 			 * similarity is just c / len1.
214 			 * So, independently on DIVUNION the upper bound formula is the same.
215 			 */
216 			res = (nkeys == 0) ? false :
217 				(((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
218 			break;
219 		case ILikeStrategyNumber:
220 #ifndef IGNORECASE
221 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
222 #endif
223 			/* FALL THRU */
224 		case LikeStrategyNumber:
225 			/* Check if all extracted trigrams are presented. */
226 			res = true;
227 			for (i = 0; i < nkeys; i++)
228 			{
229 				if (!check[i])
230 				{
231 					res = false;
232 					break;
233 				}
234 			}
235 			break;
236 		case RegExpICaseStrategyNumber:
237 #ifndef IGNORECASE
238 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
239 #endif
240 			/* FALL THRU */
241 		case RegExpStrategyNumber:
242 			if (nkeys < 1)
243 			{
244 				/* Regex processing gave no result: do full index scan */
245 				res = true;
246 			}
247 			else
248 				res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
249 										 check);
250 			break;
251 		default:
252 			elog(ERROR, "unrecognized strategy number: %d", strategy);
253 			res = false;		/* keep compiler quiet */
254 			break;
255 	}
256 
257 	PG_RETURN_BOOL(res);
258 }
259 
260 /*
261  * In all cases, GIN_TRUE is at least as favorable to inclusion as
262  * GIN_MAYBE. If no better option is available, simply treat
263  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
264  * consistent function.
265  */
266 Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)267 gin_trgm_triconsistent(PG_FUNCTION_ARGS)
268 {
269 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
270 	StrategyNumber strategy = PG_GETARG_UINT16(1);
271 
272 	/* text    *query = PG_GETARG_TEXT_PP(2); */
273 	int32		nkeys = PG_GETARG_INT32(3);
274 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
275 	GinTernaryValue res = GIN_MAYBE;
276 	int32		i,
277 				ntrue;
278 	bool	   *boolcheck;
279 	double		nlimit;
280 
281 	switch (strategy)
282 	{
283 		case SimilarityStrategyNumber:
284 		case WordSimilarityStrategyNumber:
285 			nlimit = (strategy == SimilarityStrategyNumber) ?
286 				similarity_threshold : word_similarity_threshold;
287 
288 			/* Count the matches */
289 			ntrue = 0;
290 			for (i = 0; i < nkeys; i++)
291 			{
292 				if (check[i] != GIN_FALSE)
293 					ntrue++;
294 			}
295 
296 			/*
297 			 * See comment in gin_trgm_consistent() about * upper bound
298 			 * formula
299 			 */
300 			res = (nkeys == 0)
301 				? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
302 							   ? GIN_MAYBE : GIN_FALSE);
303 			break;
304 		case ILikeStrategyNumber:
305 #ifndef IGNORECASE
306 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
307 #endif
308 			/* FALL THRU */
309 		case LikeStrategyNumber:
310 			/* Check if all extracted trigrams are presented. */
311 			res = GIN_MAYBE;
312 			for (i = 0; i < nkeys; i++)
313 			{
314 				if (check[i] == GIN_FALSE)
315 				{
316 					res = GIN_FALSE;
317 					break;
318 				}
319 			}
320 			break;
321 		case RegExpICaseStrategyNumber:
322 #ifndef IGNORECASE
323 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
324 #endif
325 			/* FALL THRU */
326 		case RegExpStrategyNumber:
327 			if (nkeys < 1)
328 			{
329 				/* Regex processing gave no result: do full index scan */
330 				res = GIN_MAYBE;
331 			}
332 			else
333 			{
334 				/*
335 				 * As trigramsMatchGraph implements a monotonic boolean
336 				 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
337 				 * give a conservative result.
338 				 */
339 				boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
340 				for (i = 0; i < nkeys; i++)
341 					boolcheck[i] = (check[i] != GIN_FALSE);
342 				if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
343 										boolcheck))
344 					res = GIN_FALSE;
345 				pfree(boolcheck);
346 			}
347 			break;
348 		default:
349 			elog(ERROR, "unrecognized strategy number: %d", strategy);
350 			res = GIN_FALSE;	/* keep compiler quiet */
351 			break;
352 	}
353 
354 	/* All cases served by this function are inexact */
355 	Assert(res != GIN_TRUE);
356 	PG_RETURN_GIN_TERNARY_VALUE(res);
357 }
358