1 /*
2 * contrib/pg_trgm/trgm_gin.c
3 */
4 #include "postgres.h"
5
6 #include "trgm.h"
7
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11
12
13 PG_FUNCTION_INFO_V1(gin_extract_trgm);
14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
16 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
18
19 /*
20 * This function can only be called if a pre-9.1 version of the GIN operator
21 * class definition is present in the catalogs (probably as a consequence
22 * of upgrade-in-place). Cope.
23 */
24 Datum
gin_extract_trgm(PG_FUNCTION_ARGS)25 gin_extract_trgm(PG_FUNCTION_ARGS)
26 {
27 if (PG_NARGS() == 3)
28 return gin_extract_value_trgm(fcinfo);
29 if (PG_NARGS() == 7)
30 return gin_extract_query_trgm(fcinfo);
31 elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 PG_RETURN_NULL();
33 }
34
35 Datum
gin_extract_value_trgm(PG_FUNCTION_ARGS)36 gin_extract_value_trgm(PG_FUNCTION_ARGS)
37 {
38 text *val = (text *) PG_GETARG_TEXT_PP(0);
39 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
40 Datum *entries = NULL;
41 TRGM *trg;
42 int32 trglen;
43
44 *nentries = 0;
45
46 trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
47 trglen = ARRNELEM(trg);
48
49 if (trglen > 0)
50 {
51 trgm *ptr;
52 int32 i;
53
54 *nentries = trglen;
55 entries = (Datum *) palloc(sizeof(Datum) * trglen);
56
57 ptr = GETARR(trg);
58 for (i = 0; i < trglen; i++)
59 {
60 int32 item = trgm2int(ptr);
61
62 entries[i] = Int32GetDatum(item);
63 ptr++;
64 }
65 }
66
67 PG_RETURN_POINTER(entries);
68 }
69
70 Datum
gin_extract_query_trgm(PG_FUNCTION_ARGS)71 gin_extract_query_trgm(PG_FUNCTION_ARGS)
72 {
73 text *val = (text *) PG_GETARG_TEXT_PP(0);
74 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
75 StrategyNumber strategy = PG_GETARG_UINT16(2);
76
77 /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
78 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79
80 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
82 Datum *entries = NULL;
83 TRGM *trg;
84 int32 trglen;
85 trgm *ptr;
86 TrgmPackedGraph *graph;
87 int32 i;
88
89 switch (strategy)
90 {
91 case SimilarityStrategyNumber:
92 case WordSimilarityStrategyNumber:
93 case StrictWordSimilarityStrategyNumber:
94 trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
95 break;
96 case ILikeStrategyNumber:
97 #ifndef IGNORECASE
98 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
99 #endif
100 /* FALL THRU */
101 case LikeStrategyNumber:
102
103 /*
104 * For wildcard search we extract all the trigrams that every
105 * potentially-matching string must include.
106 */
107 trg = generate_wildcard_trgm(VARDATA_ANY(val),
108 VARSIZE_ANY_EXHDR(val));
109 break;
110 case RegExpICaseStrategyNumber:
111 #ifndef IGNORECASE
112 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
113 #endif
114 /* FALL THRU */
115 case RegExpStrategyNumber:
116 trg = createTrgmNFA(val, PG_GET_COLLATION(),
117 &graph, CurrentMemoryContext);
118 if (trg && ARRNELEM(trg) > 0)
119 {
120 /*
121 * Successful regex processing: store NFA-like graph as
122 * extra_data. GIN API requires an array of nentries
123 * Pointers, but we just put the same value in each element.
124 */
125 trglen = ARRNELEM(trg);
126 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
127 for (i = 0; i < trglen; i++)
128 (*extra_data)[i] = (Pointer) graph;
129 }
130 else
131 {
132 /* No result: have to do full index scan. */
133 *nentries = 0;
134 *searchMode = GIN_SEARCH_MODE_ALL;
135 PG_RETURN_POINTER(entries);
136 }
137 break;
138 default:
139 elog(ERROR, "unrecognized strategy number: %d", strategy);
140 trg = NULL; /* keep compiler quiet */
141 break;
142 }
143
144 trglen = ARRNELEM(trg);
145 *nentries = trglen;
146
147 if (trglen > 0)
148 {
149 entries = (Datum *) palloc(sizeof(Datum) * trglen);
150 ptr = GETARR(trg);
151 for (i = 0; i < trglen; i++)
152 {
153 int32 item = trgm2int(ptr);
154
155 entries[i] = Int32GetDatum(item);
156 ptr++;
157 }
158 }
159
160 /*
161 * If no trigram was extracted then we have to scan all the index.
162 */
163 if (trglen == 0)
164 *searchMode = GIN_SEARCH_MODE_ALL;
165
166 PG_RETURN_POINTER(entries);
167 }
168
169 Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)170 gin_trgm_consistent(PG_FUNCTION_ARGS)
171 {
172 bool *check = (bool *) PG_GETARG_POINTER(0);
173 StrategyNumber strategy = PG_GETARG_UINT16(1);
174
175 /* text *query = PG_GETARG_TEXT_PP(2); */
176 int32 nkeys = PG_GETARG_INT32(3);
177 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
178 bool *recheck = (bool *) PG_GETARG_POINTER(5);
179 bool res;
180 int32 i,
181 ntrue;
182 double nlimit;
183
184 /* All cases served by this function are inexact */
185 *recheck = true;
186
187 switch (strategy)
188 {
189 case SimilarityStrategyNumber:
190 case WordSimilarityStrategyNumber:
191 case StrictWordSimilarityStrategyNumber:
192 nlimit = index_strategy_get_limit(strategy);
193
194 /* Count the matches */
195 ntrue = 0;
196 for (i = 0; i < nkeys; i++)
197 {
198 if (check[i])
199 ntrue++;
200 }
201
202 /*--------------------
203 * If DIVUNION is defined then similarity formula is:
204 * c / (len1 + len2 - c)
205 * where c is number of common trigrams and it stands as ntrue in
206 * this code. Here we don't know value of len2 but we can assume
207 * that c (ntrue) is a lower bound of len2, so upper bound of
208 * similarity is:
209 * c / (len1 + c - c) => c / len1
210 * If DIVUNION is not defined then similarity formula is:
211 * c / max(len1, len2)
212 * And again, c (ntrue) is a lower bound of len2, but c <= len1
213 * just by definition and, consequently, upper bound of
214 * similarity is just c / len1.
215 * So, independently on DIVUNION the upper bound formula is the same.
216 */
217 res = (nkeys == 0) ? false :
218 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
219 break;
220 case ILikeStrategyNumber:
221 #ifndef IGNORECASE
222 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
223 #endif
224 /* FALL THRU */
225 case LikeStrategyNumber:
226 /* Check if all extracted trigrams are presented. */
227 res = true;
228 for (i = 0; i < nkeys; i++)
229 {
230 if (!check[i])
231 {
232 res = false;
233 break;
234 }
235 }
236 break;
237 case RegExpICaseStrategyNumber:
238 #ifndef IGNORECASE
239 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
240 #endif
241 /* FALL THRU */
242 case RegExpStrategyNumber:
243 if (nkeys < 1)
244 {
245 /* Regex processing gave no result: do full index scan */
246 res = true;
247 }
248 else
249 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
250 check);
251 break;
252 default:
253 elog(ERROR, "unrecognized strategy number: %d", strategy);
254 res = false; /* keep compiler quiet */
255 break;
256 }
257
258 PG_RETURN_BOOL(res);
259 }
260
261 /*
262 * In all cases, GIN_TRUE is at least as favorable to inclusion as
263 * GIN_MAYBE. If no better option is available, simply treat
264 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
265 * consistent function.
266 */
267 Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)268 gin_trgm_triconsistent(PG_FUNCTION_ARGS)
269 {
270 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
271 StrategyNumber strategy = PG_GETARG_UINT16(1);
272
273 /* text *query = PG_GETARG_TEXT_PP(2); */
274 int32 nkeys = PG_GETARG_INT32(3);
275 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
276 GinTernaryValue res = GIN_MAYBE;
277 int32 i,
278 ntrue;
279 bool *boolcheck;
280 double nlimit;
281
282 switch (strategy)
283 {
284 case SimilarityStrategyNumber:
285 case WordSimilarityStrategyNumber:
286 case StrictWordSimilarityStrategyNumber:
287 nlimit = index_strategy_get_limit(strategy);
288
289 /* Count the matches */
290 ntrue = 0;
291 for (i = 0; i < nkeys; i++)
292 {
293 if (check[i] != GIN_FALSE)
294 ntrue++;
295 }
296
297 /*
298 * See comment in gin_trgm_consistent() about * upper bound
299 * formula
300 */
301 res = (nkeys == 0)
302 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
303 ? GIN_MAYBE : GIN_FALSE);
304 break;
305 case ILikeStrategyNumber:
306 #ifndef IGNORECASE
307 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
308 #endif
309 /* FALL THRU */
310 case LikeStrategyNumber:
311 /* Check if all extracted trigrams are presented. */
312 res = GIN_MAYBE;
313 for (i = 0; i < nkeys; i++)
314 {
315 if (check[i] == GIN_FALSE)
316 {
317 res = GIN_FALSE;
318 break;
319 }
320 }
321 break;
322 case RegExpICaseStrategyNumber:
323 #ifndef IGNORECASE
324 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
325 #endif
326 /* FALL THRU */
327 case RegExpStrategyNumber:
328 if (nkeys < 1)
329 {
330 /* Regex processing gave no result: do full index scan */
331 res = GIN_MAYBE;
332 }
333 else
334 {
335 /*
336 * As trigramsMatchGraph implements a monotonic boolean
337 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
338 * give a conservative result.
339 */
340 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
341 for (i = 0; i < nkeys; i++)
342 boolcheck[i] = (check[i] != GIN_FALSE);
343 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
344 boolcheck))
345 res = GIN_FALSE;
346 pfree(boolcheck);
347 }
348 break;
349 default:
350 elog(ERROR, "unrecognized strategy number: %d", strategy);
351 res = GIN_FALSE; /* keep compiler quiet */
352 break;
353 }
354
355 /* All cases served by this function are inexact */
356 Assert(res != GIN_TRUE);
357 PG_RETURN_GIN_TERNARY_VALUE(res);
358 }
359