1 /*
2 * contrib/pg_trgm/trgm_gin.c
3 */
4 #include "postgres.h"
5
6 #include "trgm.h"
7
8 #include "access/gin.h"
9 #include "access/stratnum.h"
10 #include "fmgr.h"
11
12
13 PG_FUNCTION_INFO_V1(gin_extract_trgm);
14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
16 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
18
19 /*
20 * This function can only be called if a pre-9.1 version of the GIN operator
21 * class definition is present in the catalogs (probably as a consequence
22 * of upgrade-in-place). Cope.
23 */
24 Datum
gin_extract_trgm(PG_FUNCTION_ARGS)25 gin_extract_trgm(PG_FUNCTION_ARGS)
26 {
27 if (PG_NARGS() == 3)
28 return gin_extract_value_trgm(fcinfo);
29 if (PG_NARGS() == 7)
30 return gin_extract_query_trgm(fcinfo);
31 elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
32 PG_RETURN_NULL();
33 }
34
35 Datum
gin_extract_value_trgm(PG_FUNCTION_ARGS)36 gin_extract_value_trgm(PG_FUNCTION_ARGS)
37 {
38 text *val = (text *) PG_GETARG_TEXT_PP(0);
39 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
40 Datum *entries = NULL;
41 TRGM *trg;
42 int32 trglen;
43
44 *nentries = 0;
45
46 trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
47 trglen = ARRNELEM(trg);
48
49 if (trglen > 0)
50 {
51 trgm *ptr;
52 int32 i;
53
54 *nentries = trglen;
55 entries = (Datum *) palloc(sizeof(Datum) * trglen);
56
57 ptr = GETARR(trg);
58 for (i = 0; i < trglen; i++)
59 {
60 int32 item = trgm2int(ptr);
61
62 entries[i] = Int32GetDatum(item);
63 ptr++;
64 }
65 }
66
67 PG_RETURN_POINTER(entries);
68 }
69
70 Datum
gin_extract_query_trgm(PG_FUNCTION_ARGS)71 gin_extract_query_trgm(PG_FUNCTION_ARGS)
72 {
73 text *val = (text *) PG_GETARG_TEXT_PP(0);
74 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
75 StrategyNumber strategy = PG_GETARG_UINT16(2);
76
77 /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
78 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79
80 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
81 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
82 Datum *entries = NULL;
83 TRGM *trg;
84 int32 trglen;
85 trgm *ptr;
86 TrgmPackedGraph *graph;
87 int32 i;
88
89 switch (strategy)
90 {
91 case SimilarityStrategyNumber:
92 case WordSimilarityStrategyNumber:
93 trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
94 break;
95 case ILikeStrategyNumber:
96 #ifndef IGNORECASE
97 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
98 #endif
99 /* FALL THRU */
100 case LikeStrategyNumber:
101
102 /*
103 * For wildcard search we extract all the trigrams that every
104 * potentially-matching string must include.
105 */
106 trg = generate_wildcard_trgm(VARDATA_ANY(val),
107 VARSIZE_ANY_EXHDR(val));
108 break;
109 case RegExpICaseStrategyNumber:
110 #ifndef IGNORECASE
111 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
112 #endif
113 /* FALL THRU */
114 case RegExpStrategyNumber:
115 trg = createTrgmNFA(val, PG_GET_COLLATION(),
116 &graph, CurrentMemoryContext);
117 if (trg && ARRNELEM(trg) > 0)
118 {
119 /*
120 * Successful regex processing: store NFA-like graph as
121 * extra_data. GIN API requires an array of nentries
122 * Pointers, but we just put the same value in each element.
123 */
124 trglen = ARRNELEM(trg);
125 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
126 for (i = 0; i < trglen; i++)
127 (*extra_data)[i] = (Pointer) graph;
128 }
129 else
130 {
131 /* No result: have to do full index scan. */
132 *nentries = 0;
133 *searchMode = GIN_SEARCH_MODE_ALL;
134 PG_RETURN_POINTER(entries);
135 }
136 break;
137 default:
138 elog(ERROR, "unrecognized strategy number: %d", strategy);
139 trg = NULL; /* keep compiler quiet */
140 break;
141 }
142
143 trglen = ARRNELEM(trg);
144 *nentries = trglen;
145
146 if (trglen > 0)
147 {
148 entries = (Datum *) palloc(sizeof(Datum) * trglen);
149 ptr = GETARR(trg);
150 for (i = 0; i < trglen; i++)
151 {
152 int32 item = trgm2int(ptr);
153
154 entries[i] = Int32GetDatum(item);
155 ptr++;
156 }
157 }
158
159 /*
160 * If no trigram was extracted then we have to scan all the index.
161 */
162 if (trglen == 0)
163 *searchMode = GIN_SEARCH_MODE_ALL;
164
165 PG_RETURN_POINTER(entries);
166 }
167
168 Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)169 gin_trgm_consistent(PG_FUNCTION_ARGS)
170 {
171 bool *check = (bool *) PG_GETARG_POINTER(0);
172 StrategyNumber strategy = PG_GETARG_UINT16(1);
173
174 /* text *query = PG_GETARG_TEXT_PP(2); */
175 int32 nkeys = PG_GETARG_INT32(3);
176 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
177 bool *recheck = (bool *) PG_GETARG_POINTER(5);
178 bool res;
179 int32 i,
180 ntrue;
181 double nlimit;
182
183 /* All cases served by this function are inexact */
184 *recheck = true;
185
186 switch (strategy)
187 {
188 case SimilarityStrategyNumber:
189 case WordSimilarityStrategyNumber:
190 nlimit = (strategy == SimilarityStrategyNumber) ?
191 similarity_threshold : word_similarity_threshold;
192
193 /* Count the matches */
194 ntrue = 0;
195 for (i = 0; i < nkeys; i++)
196 {
197 if (check[i])
198 ntrue++;
199 }
200
201 /*--------------------
202 * If DIVUNION is defined then similarity formula is:
203 * c / (len1 + len2 - c)
204 * where c is number of common trigrams and it stands as ntrue in
205 * this code. Here we don't know value of len2 but we can assume
206 * that c (ntrue) is a lower bound of len2, so upper bound of
207 * similarity is:
208 * c / (len1 + c - c) => c / len1
209 * If DIVUNION is not defined then similarity formula is:
210 * c / max(len1, len2)
211 * And again, c (ntrue) is a lower bound of len2, but c <= len1
212 * just by definition and, consequently, upper bound of
213 * similarity is just c / len1.
214 * So, independently on DIVUNION the upper bound formula is the same.
215 */
216 res = (nkeys == 0) ? false :
217 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
218 break;
219 case ILikeStrategyNumber:
220 #ifndef IGNORECASE
221 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
222 #endif
223 /* FALL THRU */
224 case LikeStrategyNumber:
225 /* Check if all extracted trigrams are presented. */
226 res = true;
227 for (i = 0; i < nkeys; i++)
228 {
229 if (!check[i])
230 {
231 res = false;
232 break;
233 }
234 }
235 break;
236 case RegExpICaseStrategyNumber:
237 #ifndef IGNORECASE
238 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
239 #endif
240 /* FALL THRU */
241 case RegExpStrategyNumber:
242 if (nkeys < 1)
243 {
244 /* Regex processing gave no result: do full index scan */
245 res = true;
246 }
247 else
248 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
249 check);
250 break;
251 default:
252 elog(ERROR, "unrecognized strategy number: %d", strategy);
253 res = false; /* keep compiler quiet */
254 break;
255 }
256
257 PG_RETURN_BOOL(res);
258 }
259
260 /*
261 * In all cases, GIN_TRUE is at least as favorable to inclusion as
262 * GIN_MAYBE. If no better option is available, simply treat
263 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
264 * consistent function.
265 */
266 Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)267 gin_trgm_triconsistent(PG_FUNCTION_ARGS)
268 {
269 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
270 StrategyNumber strategy = PG_GETARG_UINT16(1);
271
272 /* text *query = PG_GETARG_TEXT_PP(2); */
273 int32 nkeys = PG_GETARG_INT32(3);
274 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
275 GinTernaryValue res = GIN_MAYBE;
276 int32 i,
277 ntrue;
278 bool *boolcheck;
279 double nlimit;
280
281 switch (strategy)
282 {
283 case SimilarityStrategyNumber:
284 case WordSimilarityStrategyNumber:
285 nlimit = (strategy == SimilarityStrategyNumber) ?
286 similarity_threshold : word_similarity_threshold;
287
288 /* Count the matches */
289 ntrue = 0;
290 for (i = 0; i < nkeys; i++)
291 {
292 if (check[i] != GIN_FALSE)
293 ntrue++;
294 }
295
296 /*
297 * See comment in gin_trgm_consistent() about * upper bound
298 * formula
299 */
300 res = (nkeys == 0)
301 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
302 ? GIN_MAYBE : GIN_FALSE);
303 break;
304 case ILikeStrategyNumber:
305 #ifndef IGNORECASE
306 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
307 #endif
308 /* FALL THRU */
309 case LikeStrategyNumber:
310 /* Check if all extracted trigrams are presented. */
311 res = GIN_MAYBE;
312 for (i = 0; i < nkeys; i++)
313 {
314 if (check[i] == GIN_FALSE)
315 {
316 res = GIN_FALSE;
317 break;
318 }
319 }
320 break;
321 case RegExpICaseStrategyNumber:
322 #ifndef IGNORECASE
323 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
324 #endif
325 /* FALL THRU */
326 case RegExpStrategyNumber:
327 if (nkeys < 1)
328 {
329 /* Regex processing gave no result: do full index scan */
330 res = GIN_MAYBE;
331 }
332 else
333 {
334 /*
335 * As trigramsMatchGraph implements a monotonic boolean
336 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
337 * give a conservative result.
338 */
339 boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
340 for (i = 0; i < nkeys; i++)
341 boolcheck[i] = (check[i] != GIN_FALSE);
342 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
343 boolcheck))
344 res = GIN_FALSE;
345 pfree(boolcheck);
346 }
347 break;
348 default:
349 elog(ERROR, "unrecognized strategy number: %d", strategy);
350 res = GIN_FALSE; /* keep compiler quiet */
351 break;
352 }
353
354 /* All cases served by this function are inexact */
355 Assert(res != GIN_TRUE);
356 PG_RETURN_GIN_TERNARY_VALUE(res);
357 }
358