1 /*
2 * contrib/pg_trgm/trgm_gin.c
3 */
4 #include "postgres.h"
5
6 #include "trgm.h"
7
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11
12
13 PG_FUNCTION_INFO_V1(gin_extract_trgm);
14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
16 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
18
19 /*
20 * This function can only be called if a pre-9.1 version of the GIN operator
21 * class definition is present in the catalogs (probably as a consequence
22 * of upgrade-in-place). Cope.
23 */
24 Datum
gin_extract_trgm(PG_FUNCTION_ARGS)25 gin_extract_trgm(PG_FUNCTION_ARGS)
26 {
27 if (PG_NARGS() == 3)
28 return gin_extract_value_trgm(fcinfo);
29 if (PG_NARGS() == 7)
30 return gin_extract_query_trgm(fcinfo);
31 elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 PG_RETURN_NULL();
33 }
34
35 Datum
gin_extract_value_trgm(PG_FUNCTION_ARGS)36 gin_extract_value_trgm(PG_FUNCTION_ARGS)
37 {
38 text *val = (text *) PG_GETARG_TEXT_P(0);
39 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
40 Datum *entries = NULL;
41 TRGM *trg;
42 int32 trglen;
43
44 *nentries = 0;
45
46 trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
47 trglen = ARRNELEM(trg);
48
49 if (trglen > 0)
50 {
51 trgm *ptr;
52 int32 i;
53
54 *nentries = trglen;
55 entries = (Datum *) palloc(sizeof(Datum) * trglen);
56
57 ptr = GETARR(trg);
58 for (i = 0; i < trglen; i++)
59 {
60 int32 item = trgm2int(ptr);
61
62 entries[i] = Int32GetDatum(item);
63 ptr++;
64 }
65 }
66
67 PG_RETURN_POINTER(entries);
68 }
69
70 Datum
gin_extract_query_trgm(PG_FUNCTION_ARGS)71 gin_extract_query_trgm(PG_FUNCTION_ARGS)
72 {
73 text *val = (text *) PG_GETARG_TEXT_P(0);
74 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
75 StrategyNumber strategy = PG_GETARG_UINT16(2);
76
77 /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
78 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79
80 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
82 Datum *entries = NULL;
83 TRGM *trg;
84 int32 trglen;
85 trgm *ptr;
86 TrgmPackedGraph *graph;
87 int32 i;
88
89 switch (strategy)
90 {
91 case SimilarityStrategyNumber:
92 case WordSimilarityStrategyNumber:
93 trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
94 break;
95 case ILikeStrategyNumber:
96 #ifndef IGNORECASE
97 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
98 #endif
99 /* FALL THRU */
100 case LikeStrategyNumber:
101
102 /*
103 * For wildcard search we extract all the trigrams that every
104 * potentially-matching string must include.
105 */
106 trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
107 break;
108 case RegExpICaseStrategyNumber:
109 #ifndef IGNORECASE
110 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
111 #endif
112 /* FALL THRU */
113 case RegExpStrategyNumber:
114 trg = createTrgmNFA(val, PG_GET_COLLATION(),
115 &graph, CurrentMemoryContext);
116 if (trg && ARRNELEM(trg) > 0)
117 {
118 /*
119 * Successful regex processing: store NFA-like graph as
120 * extra_data. GIN API requires an array of nentries
121 * Pointers, but we just put the same value in each element.
122 */
123 trglen = ARRNELEM(trg);
124 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
125 for (i = 0; i < trglen; i++)
126 (*extra_data)[i] = (Pointer) graph;
127 }
128 else
129 {
130 /* No result: have to do full index scan. */
131 *nentries = 0;
132 *searchMode = GIN_SEARCH_MODE_ALL;
133 PG_RETURN_POINTER(entries);
134 }
135 break;
136 default:
137 elog(ERROR, "unrecognized strategy number: %d", strategy);
138 trg = NULL; /* keep compiler quiet */
139 break;
140 }
141
142 trglen = ARRNELEM(trg);
143 *nentries = trglen;
144
145 if (trglen > 0)
146 {
147 entries = (Datum *) palloc(sizeof(Datum) * trglen);
148 ptr = GETARR(trg);
149 for (i = 0; i < trglen; i++)
150 {
151 int32 item = trgm2int(ptr);
152
153 entries[i] = Int32GetDatum(item);
154 ptr++;
155 }
156 }
157
158 /*
159 * If no trigram was extracted then we have to scan all the index.
160 */
161 if (trglen == 0)
162 *searchMode = GIN_SEARCH_MODE_ALL;
163
164 PG_RETURN_POINTER(entries);
165 }
166
167 Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)168 gin_trgm_consistent(PG_FUNCTION_ARGS)
169 {
170 bool *check = (bool *) PG_GETARG_POINTER(0);
171 StrategyNumber strategy = PG_GETARG_UINT16(1);
172
173 /* text *query = PG_GETARG_TEXT_P(2); */
174 int32 nkeys = PG_GETARG_INT32(3);
175 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
176 bool *recheck = (bool *) PG_GETARG_POINTER(5);
177 bool res;
178 int32 i,
179 ntrue;
180 double nlimit;
181
182 /* All cases served by this function are inexact */
183 *recheck = true;
184
185 switch (strategy)
186 {
187 case SimilarityStrategyNumber:
188 case WordSimilarityStrategyNumber:
189 nlimit = (strategy == SimilarityStrategyNumber) ?
190 similarity_threshold : word_similarity_threshold;
191
192 /* Count the matches */
193 ntrue = 0;
194 for (i = 0; i < nkeys; i++)
195 {
196 if (check[i])
197 ntrue++;
198 }
199
200 /*--------------------
201 * If DIVUNION is defined then similarity formula is:
202 * c / (len1 + len2 - c)
203 * where c is number of common trigrams and it stands as ntrue in
204 * this code. Here we don't know value of len2 but we can assume
205 * that c (ntrue) is a lower bound of len2, so upper bound of
206 * similarity is:
207 * c / (len1 + c - c) => c / len1
208 * If DIVUNION is not defined then similarity formula is:
209 * c / max(len1, len2)
210 * And again, c (ntrue) is a lower bound of len2, but c <= len1
211 * just by definition and, consequently, upper bound of
212 * similarity is just c / len1.
213 * So, independently on DIVUNION the upper bound formula is the same.
214 */
215 res = (nkeys == 0) ? false :
216 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
217 break;
218 case ILikeStrategyNumber:
219 #ifndef IGNORECASE
220 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
221 #endif
222 /* FALL THRU */
223 case LikeStrategyNumber:
224 /* Check if all extracted trigrams are presented. */
225 res = true;
226 for (i = 0; i < nkeys; i++)
227 {
228 if (!check[i])
229 {
230 res = false;
231 break;
232 }
233 }
234 break;
235 case RegExpICaseStrategyNumber:
236 #ifndef IGNORECASE
237 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
238 #endif
239 /* FALL THRU */
240 case RegExpStrategyNumber:
241 if (nkeys < 1)
242 {
243 /* Regex processing gave no result: do full index scan */
244 res = true;
245 }
246 else
247 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
248 check);
249 break;
250 default:
251 elog(ERROR, "unrecognized strategy number: %d", strategy);
252 res = false; /* keep compiler quiet */
253 break;
254 }
255
256 PG_RETURN_BOOL(res);
257 }
258
259 /*
260 * In all cases, GIN_TRUE is at least as favorable to inclusion as
261 * GIN_MAYBE. If no better option is available, simply treat
262 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
263 * consistent function.
264 */
265 Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)266 gin_trgm_triconsistent(PG_FUNCTION_ARGS)
267 {
268 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
269 StrategyNumber strategy = PG_GETARG_UINT16(1);
270
271 /* text *query = PG_GETARG_TEXT_P(2); */
272 int32 nkeys = PG_GETARG_INT32(3);
273 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
274 GinTernaryValue res = GIN_MAYBE;
275 int32 i,
276 ntrue;
277 bool *boolcheck;
278 double nlimit;
279
280 switch (strategy)
281 {
282 case SimilarityStrategyNumber:
283 case WordSimilarityStrategyNumber:
284 nlimit = (strategy == SimilarityStrategyNumber) ?
285 similarity_threshold : word_similarity_threshold;
286
287 /* Count the matches */
288 ntrue = 0;
289 for (i = 0; i < nkeys; i++)
290 {
291 if (check[i] != GIN_FALSE)
292 ntrue++;
293 }
294
295 /*
296 * See comment in gin_trgm_consistent() about * upper bound
297 * formula
298 */
299 res = (nkeys == 0)
300 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
301 ? GIN_MAYBE : GIN_FALSE);
302 break;
303 case ILikeStrategyNumber:
304 #ifndef IGNORECASE
305 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
306 #endif
307 /* FALL THRU */
308 case LikeStrategyNumber:
309 /* Check if all extracted trigrams are presented. */
310 res = GIN_MAYBE;
311 for (i = 0; i < nkeys; i++)
312 {
313 if (check[i] == GIN_FALSE)
314 {
315 res = GIN_FALSE;
316 break;
317 }
318 }
319 break;
320 case RegExpICaseStrategyNumber:
321 #ifndef IGNORECASE
322 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
323 #endif
324 /* FALL THRU */
325 case RegExpStrategyNumber:
326 if (nkeys < 1)
327 {
328 /* Regex processing gave no result: do full index scan */
329 res = GIN_MAYBE;
330 }
331 else
332 {
333 /*
334 * As trigramsMatchGraph implements a monotonic boolean
335 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
336 * give a conservative result.
337 */
338 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
339 for (i = 0; i < nkeys; i++)
340 boolcheck[i] = (check[i] != GIN_FALSE);
341 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
342 boolcheck))
343 res = GIN_FALSE;
344 pfree(boolcheck);
345 }
346 break;
347 default:
348 elog(ERROR, "unrecognized strategy number: %d", strategy);
349 res = GIN_FALSE; /* keep compiler quiet */
350 break;
351 }
352
353 /* All cases served by this function are inexact */
354 Assert(res != GIN_TRUE);
355 PG_RETURN_GIN_TERNARY_VALUE(res);
356 }
357