1 /*-------------------------------------------------------------------------
2 *
3 * wparser.c
4 * Standard interface to word parser
5 *
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/wparser.c
11 *
12 *-------------------------------------------------------------------------
13 */
14 #include "postgres.h"
15
16 #include "catalog/namespace.h"
17 #include "catalog/pg_type.h"
18 #include "commands/defrem.h"
19 #include "common/jsonapi.h"
20 #include "funcapi.h"
21 #include "tsearch/ts_cache.h"
22 #include "tsearch/ts_utils.h"
23 #include "utils/builtins.h"
24 #include "utils/jsonfuncs.h"
25 #include "utils/varlena.h"
26
27 /******sql-level interface******/
28
29 typedef struct
30 {
31 int cur;
32 LexDescr *list;
33 } TSTokenTypeStorage;
34
35 /* state for ts_headline_json_* */
36 typedef struct HeadlineJsonState
37 {
38 HeadlineParsedText *prs;
39 TSConfigCacheEntry *cfg;
40 TSParserCacheEntry *prsobj;
41 TSQuery query;
42 List *prsoptions;
43 bool transformed;
44 } HeadlineJsonState;
45
46 static text *headline_json_value(void *_state, char *elem_value, int elem_len);
47
48 static void
tt_setup_firstcall(FuncCallContext * funcctx,Oid prsid)49 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
50 {
51 TupleDesc tupdesc;
52 MemoryContext oldcontext;
53 TSTokenTypeStorage *st;
54 TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
55
56 if (!OidIsValid(prs->lextypeOid))
57 elog(ERROR, "method lextype isn't defined for text search parser %u",
58 prsid);
59
60 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
61
62 st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
63 st->cur = 0;
64 /* lextype takes one dummy argument */
65 st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
66 (Datum) 0));
67 funcctx->user_fctx = (void *) st;
68
69 tupdesc = CreateTemplateTupleDesc(3);
70 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
71 INT4OID, -1, 0);
72 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
73 TEXTOID, -1, 0);
74 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
75 TEXTOID, -1, 0);
76
77 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
78 MemoryContextSwitchTo(oldcontext);
79 }
80
81 static Datum
tt_process_call(FuncCallContext * funcctx)82 tt_process_call(FuncCallContext *funcctx)
83 {
84 TSTokenTypeStorage *st;
85
86 st = (TSTokenTypeStorage *) funcctx->user_fctx;
87 if (st->list && st->list[st->cur].lexid)
88 {
89 Datum result;
90 char *values[3];
91 char txtid[16];
92 HeapTuple tuple;
93
94 sprintf(txtid, "%d", st->list[st->cur].lexid);
95 values[0] = txtid;
96 values[1] = st->list[st->cur].alias;
97 values[2] = st->list[st->cur].descr;
98
99 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
100 result = HeapTupleGetDatum(tuple);
101
102 pfree(values[1]);
103 pfree(values[2]);
104 st->cur++;
105 return result;
106 }
107 return (Datum) 0;
108 }
109
110 Datum
ts_token_type_byid(PG_FUNCTION_ARGS)111 ts_token_type_byid(PG_FUNCTION_ARGS)
112 {
113 FuncCallContext *funcctx;
114 Datum result;
115
116 if (SRF_IS_FIRSTCALL())
117 {
118 funcctx = SRF_FIRSTCALL_INIT();
119 tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
120 }
121
122 funcctx = SRF_PERCALL_SETUP();
123
124 if ((result = tt_process_call(funcctx)) != (Datum) 0)
125 SRF_RETURN_NEXT(funcctx, result);
126 SRF_RETURN_DONE(funcctx);
127 }
128
129 Datum
ts_token_type_byname(PG_FUNCTION_ARGS)130 ts_token_type_byname(PG_FUNCTION_ARGS)
131 {
132 FuncCallContext *funcctx;
133 Datum result;
134
135 if (SRF_IS_FIRSTCALL())
136 {
137 text *prsname = PG_GETARG_TEXT_PP(0);
138 Oid prsId;
139
140 funcctx = SRF_FIRSTCALL_INIT();
141 prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
142 tt_setup_firstcall(funcctx, prsId);
143 }
144
145 funcctx = SRF_PERCALL_SETUP();
146
147 if ((result = tt_process_call(funcctx)) != (Datum) 0)
148 SRF_RETURN_NEXT(funcctx, result);
149 SRF_RETURN_DONE(funcctx);
150 }
151
152 typedef struct
153 {
154 int type;
155 char *lexeme;
156 } LexemeEntry;
157
158 typedef struct
159 {
160 int cur;
161 int len;
162 LexemeEntry *list;
163 } PrsStorage;
164
165
166 static void
prs_setup_firstcall(FuncCallContext * funcctx,Oid prsid,text * txt)167 prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
168 {
169 TupleDesc tupdesc;
170 MemoryContext oldcontext;
171 PrsStorage *st;
172 TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
173 char *lex = NULL;
174 int llen = 0,
175 type = 0;
176 void *prsdata;
177
178 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
179
180 st = (PrsStorage *) palloc(sizeof(PrsStorage));
181 st->cur = 0;
182 st->len = 16;
183 st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
184
185 prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
186 PointerGetDatum(VARDATA_ANY(txt)),
187 Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
188
189 while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
190 PointerGetDatum(prsdata),
191 PointerGetDatum(&lex),
192 PointerGetDatum(&llen)))) != 0)
193 {
194 if (st->cur >= st->len)
195 {
196 st->len = 2 * st->len;
197 st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
198 }
199 st->list[st->cur].lexeme = palloc(llen + 1);
200 memcpy(st->list[st->cur].lexeme, lex, llen);
201 st->list[st->cur].lexeme[llen] = '\0';
202 st->list[st->cur].type = type;
203 st->cur++;
204 }
205
206 FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
207
208 st->len = st->cur;
209 st->cur = 0;
210
211 funcctx->user_fctx = (void *) st;
212 tupdesc = CreateTemplateTupleDesc(2);
213 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
214 INT4OID, -1, 0);
215 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
216 TEXTOID, -1, 0);
217
218 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
219 MemoryContextSwitchTo(oldcontext);
220 }
221
222 static Datum
prs_process_call(FuncCallContext * funcctx)223 prs_process_call(FuncCallContext *funcctx)
224 {
225 PrsStorage *st;
226
227 st = (PrsStorage *) funcctx->user_fctx;
228 if (st->cur < st->len)
229 {
230 Datum result;
231 char *values[2];
232 char tid[16];
233 HeapTuple tuple;
234
235 values[0] = tid;
236 sprintf(tid, "%d", st->list[st->cur].type);
237 values[1] = st->list[st->cur].lexeme;
238 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
239 result = HeapTupleGetDatum(tuple);
240
241 pfree(values[1]);
242 st->cur++;
243 return result;
244 }
245 return (Datum) 0;
246 }
247
248 Datum
ts_parse_byid(PG_FUNCTION_ARGS)249 ts_parse_byid(PG_FUNCTION_ARGS)
250 {
251 FuncCallContext *funcctx;
252 Datum result;
253
254 if (SRF_IS_FIRSTCALL())
255 {
256 text *txt = PG_GETARG_TEXT_PP(1);
257
258 funcctx = SRF_FIRSTCALL_INIT();
259 prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
260 PG_FREE_IF_COPY(txt, 1);
261 }
262
263 funcctx = SRF_PERCALL_SETUP();
264
265 if ((result = prs_process_call(funcctx)) != (Datum) 0)
266 SRF_RETURN_NEXT(funcctx, result);
267 SRF_RETURN_DONE(funcctx);
268 }
269
270 Datum
ts_parse_byname(PG_FUNCTION_ARGS)271 ts_parse_byname(PG_FUNCTION_ARGS)
272 {
273 FuncCallContext *funcctx;
274 Datum result;
275
276 if (SRF_IS_FIRSTCALL())
277 {
278 text *prsname = PG_GETARG_TEXT_PP(0);
279 text *txt = PG_GETARG_TEXT_PP(1);
280 Oid prsId;
281
282 funcctx = SRF_FIRSTCALL_INIT();
283 prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
284 prs_setup_firstcall(funcctx, prsId, txt);
285 }
286
287 funcctx = SRF_PERCALL_SETUP();
288
289 if ((result = prs_process_call(funcctx)) != (Datum) 0)
290 SRF_RETURN_NEXT(funcctx, result);
291 SRF_RETURN_DONE(funcctx);
292 }
293
294 Datum
ts_headline_byid_opt(PG_FUNCTION_ARGS)295 ts_headline_byid_opt(PG_FUNCTION_ARGS)
296 {
297 Oid tsconfig = PG_GETARG_OID(0);
298 text *in = PG_GETARG_TEXT_PP(1);
299 TSQuery query = PG_GETARG_TSQUERY(2);
300 text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
301 HeadlineParsedText prs;
302 List *prsoptions;
303 text *out;
304 TSConfigCacheEntry *cfg;
305 TSParserCacheEntry *prsobj;
306
307 cfg = lookup_ts_config_cache(tsconfig);
308 prsobj = lookup_ts_parser_cache(cfg->prsId);
309
310 if (!OidIsValid(prsobj->headlineOid))
311 ereport(ERROR,
312 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
313 errmsg("text search parser does not support headline creation")));
314
315 memset(&prs, 0, sizeof(HeadlineParsedText));
316 prs.lenwords = 32;
317 prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
318
319 hlparsetext(cfg->cfgId, &prs, query,
320 VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
321
322 if (opt)
323 prsoptions = deserialize_deflist(PointerGetDatum(opt));
324 else
325 prsoptions = NIL;
326
327 FunctionCall3(&(prsobj->prsheadline),
328 PointerGetDatum(&prs),
329 PointerGetDatum(prsoptions),
330 PointerGetDatum(query));
331
332 out = generateHeadline(&prs);
333
334 PG_FREE_IF_COPY(in, 1);
335 PG_FREE_IF_COPY(query, 2);
336 if (opt)
337 PG_FREE_IF_COPY(opt, 3);
338 pfree(prs.words);
339 pfree(prs.startsel);
340 pfree(prs.stopsel);
341
342 PG_RETURN_POINTER(out);
343 }
344
345 Datum
ts_headline_byid(PG_FUNCTION_ARGS)346 ts_headline_byid(PG_FUNCTION_ARGS)
347 {
348 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
349 PG_GETARG_DATUM(0),
350 PG_GETARG_DATUM(1),
351 PG_GETARG_DATUM(2)));
352 }
353
354 Datum
ts_headline(PG_FUNCTION_ARGS)355 ts_headline(PG_FUNCTION_ARGS)
356 {
357 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
358 ObjectIdGetDatum(getTSCurrentConfig(true)),
359 PG_GETARG_DATUM(0),
360 PG_GETARG_DATUM(1)));
361 }
362
363 Datum
ts_headline_opt(PG_FUNCTION_ARGS)364 ts_headline_opt(PG_FUNCTION_ARGS)
365 {
366 PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
367 ObjectIdGetDatum(getTSCurrentConfig(true)),
368 PG_GETARG_DATUM(0),
369 PG_GETARG_DATUM(1),
370 PG_GETARG_DATUM(2)));
371 }
372
373 Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)374 ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
375 {
376 Oid tsconfig = PG_GETARG_OID(0);
377 Jsonb *jb = PG_GETARG_JSONB_P(1);
378 TSQuery query = PG_GETARG_TSQUERY(2);
379 text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
380 Jsonb *out;
381 JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
382 HeadlineParsedText prs;
383 HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
384
385 memset(&prs, 0, sizeof(HeadlineParsedText));
386 prs.lenwords = 32;
387 prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
388
389 state->prs = &prs;
390 state->cfg = lookup_ts_config_cache(tsconfig);
391 state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
392 state->query = query;
393 if (opt)
394 state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
395 else
396 state->prsoptions = NIL;
397
398 if (!OidIsValid(state->prsobj->headlineOid))
399 ereport(ERROR,
400 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
401 errmsg("text search parser does not support headline creation")));
402
403 out = transform_jsonb_string_values(jb, state, action);
404
405 PG_FREE_IF_COPY(jb, 1);
406 PG_FREE_IF_COPY(query, 2);
407 if (opt)
408 PG_FREE_IF_COPY(opt, 3);
409
410 pfree(prs.words);
411
412 if (state->transformed)
413 {
414 pfree(prs.startsel);
415 pfree(prs.stopsel);
416 }
417
418 PG_RETURN_JSONB_P(out);
419 }
420
421 Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)422 ts_headline_jsonb(PG_FUNCTION_ARGS)
423 {
424 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
425 ObjectIdGetDatum(getTSCurrentConfig(true)),
426 PG_GETARG_DATUM(0),
427 PG_GETARG_DATUM(1)));
428 }
429
430 Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)431 ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
432 {
433 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
434 PG_GETARG_DATUM(0),
435 PG_GETARG_DATUM(1),
436 PG_GETARG_DATUM(2)));
437 }
438
439 Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)440 ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
441 {
442 PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
443 ObjectIdGetDatum(getTSCurrentConfig(true)),
444 PG_GETARG_DATUM(0),
445 PG_GETARG_DATUM(1),
446 PG_GETARG_DATUM(2)));
447 }
448
449 Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)450 ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
451 {
452 Oid tsconfig = PG_GETARG_OID(0);
453 text *json = PG_GETARG_TEXT_P(1);
454 TSQuery query = PG_GETARG_TSQUERY(2);
455 text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
456 text *out;
457 JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
458
459 HeadlineParsedText prs;
460 HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
461
462 memset(&prs, 0, sizeof(HeadlineParsedText));
463 prs.lenwords = 32;
464 prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
465
466 state->prs = &prs;
467 state->cfg = lookup_ts_config_cache(tsconfig);
468 state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
469 state->query = query;
470 if (opt)
471 state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
472 else
473 state->prsoptions = NIL;
474
475 if (!OidIsValid(state->prsobj->headlineOid))
476 ereport(ERROR,
477 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
478 errmsg("text search parser does not support headline creation")));
479
480 out = transform_json_string_values(json, state, action);
481
482 PG_FREE_IF_COPY(json, 1);
483 PG_FREE_IF_COPY(query, 2);
484 if (opt)
485 PG_FREE_IF_COPY(opt, 3);
486 pfree(prs.words);
487
488 if (state->transformed)
489 {
490 pfree(prs.startsel);
491 pfree(prs.stopsel);
492 }
493
494 PG_RETURN_TEXT_P(out);
495 }
496
497 Datum
ts_headline_json(PG_FUNCTION_ARGS)498 ts_headline_json(PG_FUNCTION_ARGS)
499 {
500 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
501 ObjectIdGetDatum(getTSCurrentConfig(true)),
502 PG_GETARG_DATUM(0),
503 PG_GETARG_DATUM(1)));
504 }
505
506 Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)507 ts_headline_json_byid(PG_FUNCTION_ARGS)
508 {
509 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
510 PG_GETARG_DATUM(0),
511 PG_GETARG_DATUM(1),
512 PG_GETARG_DATUM(2)));
513 }
514
515 Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)516 ts_headline_json_opt(PG_FUNCTION_ARGS)
517 {
518 PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
519 ObjectIdGetDatum(getTSCurrentConfig(true)),
520 PG_GETARG_DATUM(0),
521 PG_GETARG_DATUM(1),
522 PG_GETARG_DATUM(2)));
523 }
524
525
526 /*
527 * Return headline in text from, generated from a json(b) element
528 */
529 static text *
headline_json_value(void * _state,char * elem_value,int elem_len)530 headline_json_value(void *_state, char *elem_value, int elem_len)
531 {
532 HeadlineJsonState *state = (HeadlineJsonState *) _state;
533
534 HeadlineParsedText *prs = state->prs;
535 TSConfigCacheEntry *cfg = state->cfg;
536 TSParserCacheEntry *prsobj = state->prsobj;
537 TSQuery query = state->query;
538 List *prsoptions = state->prsoptions;
539
540 prs->curwords = 0;
541 hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
542 FunctionCall3(&(prsobj->prsheadline),
543 PointerGetDatum(prs),
544 PointerGetDatum(prsoptions),
545 PointerGetDatum(query));
546
547 state->transformed = true;
548 return generateHeadline(prs);
549 }
550