1 /*-------------------------------------------------------------------------
2 *
3 * tsginidx.c
4 * GIN support functions for tsvector_ops
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsginidx.c
11 *
12 *-------------------------------------------------------------------------
13 */
14 #include "postgres.h"
15
16 #include "access/gin.h"
17 #include "access/stratnum.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_utils.h"
21 #include "utils/builtins.h"
22
23
24 Datum
gin_cmp_tslexeme(PG_FUNCTION_ARGS)25 gin_cmp_tslexeme(PG_FUNCTION_ARGS)
26 {
27 text *a = PG_GETARG_TEXT_PP(0);
28 text *b = PG_GETARG_TEXT_PP(1);
29 int cmp;
30
31 cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
32 VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
33 false);
34
35 PG_FREE_IF_COPY(a, 0);
36 PG_FREE_IF_COPY(b, 1);
37 PG_RETURN_INT32(cmp);
38 }
39
40 Datum
gin_cmp_prefix(PG_FUNCTION_ARGS)41 gin_cmp_prefix(PG_FUNCTION_ARGS)
42 {
43 text *a = PG_GETARG_TEXT_PP(0);
44 text *b = PG_GETARG_TEXT_PP(1);
45
46 #ifdef NOT_USED
47 StrategyNumber strategy = PG_GETARG_UINT16(2);
48 Pointer extra_data = PG_GETARG_POINTER(3);
49 #endif
50 int cmp;
51
52 cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
53 VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
54 true);
55
56 if (cmp < 0)
57 cmp = 1; /* prevent continue scan */
58
59 PG_FREE_IF_COPY(a, 0);
60 PG_FREE_IF_COPY(b, 1);
61 PG_RETURN_INT32(cmp);
62 }
63
64 Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)65 gin_extract_tsvector(PG_FUNCTION_ARGS)
66 {
67 TSVector vector = PG_GETARG_TSVECTOR(0);
68 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
69 Datum *entries = NULL;
70
71 *nentries = vector->size;
72 if (vector->size > 0)
73 {
74 int i;
75 WordEntry *we = ARRPTR(vector);
76
77 entries = (Datum *) palloc(sizeof(Datum) * vector->size);
78
79 for (i = 0; i < vector->size; i++)
80 {
81 text *txt;
82
83 txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
84 entries[i] = PointerGetDatum(txt);
85
86 we++;
87 }
88 }
89
90 PG_FREE_IF_COPY(vector, 0);
91 PG_RETURN_POINTER(entries);
92 }
93
94 Datum
gin_extract_tsquery(PG_FUNCTION_ARGS)95 gin_extract_tsquery(PG_FUNCTION_ARGS)
96 {
97 TSQuery query = PG_GETARG_TSQUERY(0);
98 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
99
100 /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
101 bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
103
104 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
105 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
106 Datum *entries = NULL;
107
108 *nentries = 0;
109
110 if (query->size > 0)
111 {
112 QueryItem *item = GETQUERY(query);
113 int32 i,
114 j;
115 bool *partialmatch;
116 int *map_item_operand;
117
118 /*
119 * If the query doesn't have any required positive matches (for
120 * instance, it's something like '! foo'), we have to do a full index
121 * scan.
122 */
123 if (tsquery_requires_match(item))
124 *searchMode = GIN_SEARCH_MODE_DEFAULT;
125 else
126 *searchMode = GIN_SEARCH_MODE_ALL;
127
128 /* count number of VAL items */
129 j = 0;
130 for (i = 0; i < query->size; i++)
131 {
132 if (item[i].type == QI_VAL)
133 j++;
134 }
135 *nentries = j;
136
137 entries = (Datum *) palloc(sizeof(Datum) * j);
138 partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
139
140 /*
141 * Make map to convert item's number to corresponding operand's (the
142 * same, entry's) number. Entry's number is used in check array in
143 * consistent method. We use the same map for each entry.
144 */
145 *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
146 map_item_operand = (int *) palloc0(sizeof(int) * query->size);
147
148 /* Now rescan the VAL items and fill in the arrays */
149 j = 0;
150 for (i = 0; i < query->size; i++)
151 {
152 if (item[i].type == QI_VAL)
153 {
154 QueryOperand *val = &item[i].qoperand;
155 text *txt;
156
157 txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
158 val->length);
159 entries[j] = PointerGetDatum(txt);
160 partialmatch[j] = val->prefix;
161 (*extra_data)[j] = (Pointer) map_item_operand;
162 map_item_operand[i] = j;
163 j++;
164 }
165 }
166 }
167
168 PG_FREE_IF_COPY(query, 0);
169
170 PG_RETURN_POINTER(entries);
171 }
172
173 typedef struct
174 {
175 QueryItem *first_item;
176 GinTernaryValue *check;
177 int *map_item_operand;
178 bool *need_recheck;
179 } GinChkVal;
180
181 static GinTernaryValue
checkcondition_gin_internal(GinChkVal * gcv,QueryOperand * val,ExecPhraseData * data)182 checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
183 {
184 int j;
185
186 /*
187 * if any val requiring a weight is used or caller needs position
188 * information then set recheck flag
189 */
190 if (val->weight != 0 || data != NULL)
191 *(gcv->need_recheck) = true;
192
193 /* convert item's number to corresponding entry's (operand's) number */
194 j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
195
196 /* return presence of current entry in indexed value */
197 return gcv->check[j];
198 }
199
200 /*
201 * Wrapper of check condition function for TS_execute.
202 */
203 static bool
checkcondition_gin(void * checkval,QueryOperand * val,ExecPhraseData * data)204 checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
205 {
206 return checkcondition_gin_internal((GinChkVal *) checkval,
207 val,
208 data) != GIN_FALSE;
209 }
210
211 /*
212 * Evaluate tsquery boolean expression using ternary logic.
213 *
214 * Note: the reason we can't use TS_execute() for this is that its API
215 * for the checkcondition callback doesn't allow a MAYBE result to be
216 * returned, but we might have MAYBEs in the gcv->check array.
217 * Perhaps we should change that API.
218 */
219 static GinTernaryValue
TS_execute_ternary(GinChkVal * gcv,QueryItem * curitem,bool in_phrase)220 TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
221 {
222 GinTernaryValue val1,
223 val2,
224 result;
225
226 /* since this function recurses, it could be driven to stack overflow */
227 check_stack_depth();
228
229 if (curitem->type == QI_VAL)
230 return
231 checkcondition_gin_internal(gcv,
232 (QueryOperand *) curitem,
233 NULL /* don't have position info */ );
234
235 switch (curitem->qoperator.oper)
236 {
237 case OP_NOT:
238
239 /*
240 * Below a phrase search, force NOT's result to MAYBE. We cannot
241 * invert a TRUE result from the subexpression to FALSE, since
242 * TRUE only says that the subexpression matches somewhere, not
243 * that it matches everywhere, so there might be positions where
244 * the NOT will match. We could invert FALSE to TRUE, but there's
245 * little point in distinguishing TRUE from MAYBE, since a recheck
246 * will have been forced already.
247 */
248 if (in_phrase)
249 return GIN_MAYBE;
250
251 result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
252 if (result == GIN_MAYBE)
253 return result;
254 return !result;
255
256 case OP_PHRASE:
257
258 /*
259 * GIN doesn't contain any information about positions, so treat
260 * OP_PHRASE as OP_AND with recheck requirement, and always
261 * reporting MAYBE not TRUE.
262 */
263 *(gcv->need_recheck) = true;
264 /* Pass down in_phrase == true in case there's a NOT below */
265 in_phrase = true;
266
267 /* FALL THRU */
268
269 case OP_AND:
270 val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
271 in_phrase);
272 if (val1 == GIN_FALSE)
273 return GIN_FALSE;
274 val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
275 if (val2 == GIN_FALSE)
276 return GIN_FALSE;
277 if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
278 curitem->qoperator.oper != OP_PHRASE)
279 return GIN_TRUE;
280 else
281 return GIN_MAYBE;
282
283 case OP_OR:
284 val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
285 in_phrase);
286 if (val1 == GIN_TRUE)
287 return GIN_TRUE;
288 val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
289 if (val2 == GIN_TRUE)
290 return GIN_TRUE;
291 if (val1 == GIN_FALSE && val2 == GIN_FALSE)
292 return GIN_FALSE;
293 else
294 return GIN_MAYBE;
295
296 default:
297 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
298 }
299
300 /* not reachable, but keep compiler quiet */
301 return false;
302 }
303
304 Datum
gin_tsquery_consistent(PG_FUNCTION_ARGS)305 gin_tsquery_consistent(PG_FUNCTION_ARGS)
306 {
307 bool *check = (bool *) PG_GETARG_POINTER(0);
308
309 /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
310 TSQuery query = PG_GETARG_TSQUERY(2);
311
312 /* int32 nkeys = PG_GETARG_INT32(3); */
313 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
314 bool *recheck = (bool *) PG_GETARG_POINTER(5);
315 bool res = false;
316
317 /* Initially assume query doesn't require recheck */
318 *recheck = false;
319
320 if (query->size > 0)
321 {
322 GinChkVal gcv;
323
324 /*
325 * check-parameter array has one entry for each value (operand) in the
326 * query.
327 */
328 gcv.first_item = GETQUERY(query);
329 StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
330 "sizes of GinTernaryValue and bool are not equal");
331 gcv.check = (GinTernaryValue *) check;
332 gcv.map_item_operand = (int *) (extra_data[0]);
333 gcv.need_recheck = recheck;
334
335 res = TS_execute(GETQUERY(query),
336 &gcv,
337 TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
338 checkcondition_gin);
339 }
340
341 PG_RETURN_BOOL(res);
342 }
343
344 Datum
gin_tsquery_triconsistent(PG_FUNCTION_ARGS)345 gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
346 {
347 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
348
349 /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
350 TSQuery query = PG_GETARG_TSQUERY(2);
351
352 /* int32 nkeys = PG_GETARG_INT32(3); */
353 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
354 GinTernaryValue res = GIN_FALSE;
355 bool recheck;
356
357 /* Initially assume query doesn't require recheck */
358 recheck = false;
359
360 if (query->size > 0)
361 {
362 GinChkVal gcv;
363
364 /*
365 * check-parameter array has one entry for each value (operand) in the
366 * query.
367 */
368 gcv.first_item = GETQUERY(query);
369 gcv.check = check;
370 gcv.map_item_operand = (int *) (extra_data[0]);
371 gcv.need_recheck = &recheck;
372
373 res = TS_execute_ternary(&gcv, GETQUERY(query), false);
374
375 if (res == GIN_TRUE && recheck)
376 res = GIN_MAYBE;
377 }
378
379 PG_RETURN_GIN_TERNARY_VALUE(res);
380 }
381
382 /*
383 * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
384 * but we still need a pg_proc entry with two args to support reloading
385 * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
386 * function should go away eventually. (Note: you might say "hey, but the
387 * code above is only *using* two args, so let's just declare it that way".
388 * If you try that you'll find the opr_sanity regression test complains.)
389 */
390 Datum
gin_extract_tsvector_2args(PG_FUNCTION_ARGS)391 gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
392 {
393 if (PG_NARGS() < 3) /* should not happen */
394 elog(ERROR, "gin_extract_tsvector requires three arguments");
395 return gin_extract_tsvector(fcinfo);
396 }
397
398 /*
399 * Likewise, we need a stub version of gin_extract_tsquery declared with
400 * only five arguments.
401 */
402 Datum
gin_extract_tsquery_5args(PG_FUNCTION_ARGS)403 gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
404 {
405 if (PG_NARGS() < 7) /* should not happen */
406 elog(ERROR, "gin_extract_tsquery requires seven arguments");
407 return gin_extract_tsquery(fcinfo);
408 }
409
410 /*
411 * Likewise, we need a stub version of gin_tsquery_consistent declared with
412 * only six arguments.
413 */
414 Datum
gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)415 gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
416 {
417 if (PG_NARGS() < 8) /* should not happen */
418 elog(ERROR, "gin_tsquery_consistent requires eight arguments");
419 return gin_tsquery_consistent(fcinfo);
420 }
421
422 /*
423 * Likewise, a stub version of gin_extract_tsquery declared with argument
424 * types that are no longer considered appropriate.
425 */
426 Datum
gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)427 gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
428 {
429 return gin_extract_tsquery(fcinfo);
430 }
431
432 /*
433 * Likewise, a stub version of gin_tsquery_consistent declared with argument
434 * types that are no longer considered appropriate.
435 */
436 Datum
gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)437 gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
438 {
439 return gin_tsquery_consistent(fcinfo);
440 }
441