1 /*-------------------------------------------------------------------------
2  *
3  * wparser.c
4  *		Standard interface to word parser
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/tsearch/wparser.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "catalog/namespace.h"
17 #include "catalog/pg_type.h"
18 #include "commands/defrem.h"
19 #include "common/jsonapi.h"
20 #include "funcapi.h"
21 #include "tsearch/ts_cache.h"
22 #include "tsearch/ts_utils.h"
23 #include "utils/builtins.h"
24 #include "utils/jsonfuncs.h"
25 #include "utils/varlena.h"
26 
27 /******sql-level interface******/
28 
29 typedef struct
30 {
31 	int			cur;
32 	LexDescr   *list;
33 } TSTokenTypeStorage;
34 
35 /* state for ts_headline_json_* */
36 typedef struct HeadlineJsonState
37 {
38 	HeadlineParsedText *prs;
39 	TSConfigCacheEntry *cfg;
40 	TSParserCacheEntry *prsobj;
41 	TSQuery		query;
42 	List	   *prsoptions;
43 	bool		transformed;
44 } HeadlineJsonState;
45 
46 static text *headline_json_value(void *_state, char *elem_value, int elem_len);
47 
48 static void
tt_setup_firstcall(FuncCallContext * funcctx,Oid prsid)49 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
50 {
51 	TupleDesc	tupdesc;
52 	MemoryContext oldcontext;
53 	TSTokenTypeStorage *st;
54 	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
55 
56 	if (!OidIsValid(prs->lextypeOid))
57 		elog(ERROR, "method lextype isn't defined for text search parser %u",
58 			 prsid);
59 
60 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
61 
62 	st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
63 	st->cur = 0;
64 	/* lextype takes one dummy argument */
65 	st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
66 															 (Datum) 0));
67 	funcctx->user_fctx = (void *) st;
68 
69 	tupdesc = CreateTemplateTupleDesc(3);
70 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
71 					   INT4OID, -1, 0);
72 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
73 					   TEXTOID, -1, 0);
74 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
75 					   TEXTOID, -1, 0);
76 
77 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
78 	MemoryContextSwitchTo(oldcontext);
79 }
80 
81 static Datum
tt_process_call(FuncCallContext * funcctx)82 tt_process_call(FuncCallContext *funcctx)
83 {
84 	TSTokenTypeStorage *st;
85 
86 	st = (TSTokenTypeStorage *) funcctx->user_fctx;
87 	if (st->list && st->list[st->cur].lexid)
88 	{
89 		Datum		result;
90 		char	   *values[3];
91 		char		txtid[16];
92 		HeapTuple	tuple;
93 
94 		sprintf(txtid, "%d", st->list[st->cur].lexid);
95 		values[0] = txtid;
96 		values[1] = st->list[st->cur].alias;
97 		values[2] = st->list[st->cur].descr;
98 
99 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
100 		result = HeapTupleGetDatum(tuple);
101 
102 		pfree(values[1]);
103 		pfree(values[2]);
104 		st->cur++;
105 		return result;
106 	}
107 	return (Datum) 0;
108 }
109 
110 Datum
ts_token_type_byid(PG_FUNCTION_ARGS)111 ts_token_type_byid(PG_FUNCTION_ARGS)
112 {
113 	FuncCallContext *funcctx;
114 	Datum		result;
115 
116 	if (SRF_IS_FIRSTCALL())
117 	{
118 		funcctx = SRF_FIRSTCALL_INIT();
119 		tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
120 	}
121 
122 	funcctx = SRF_PERCALL_SETUP();
123 
124 	if ((result = tt_process_call(funcctx)) != (Datum) 0)
125 		SRF_RETURN_NEXT(funcctx, result);
126 	SRF_RETURN_DONE(funcctx);
127 }
128 
129 Datum
ts_token_type_byname(PG_FUNCTION_ARGS)130 ts_token_type_byname(PG_FUNCTION_ARGS)
131 {
132 	FuncCallContext *funcctx;
133 	Datum		result;
134 
135 	if (SRF_IS_FIRSTCALL())
136 	{
137 		text	   *prsname = PG_GETARG_TEXT_PP(0);
138 		Oid			prsId;
139 
140 		funcctx = SRF_FIRSTCALL_INIT();
141 		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
142 		tt_setup_firstcall(funcctx, prsId);
143 	}
144 
145 	funcctx = SRF_PERCALL_SETUP();
146 
147 	if ((result = tt_process_call(funcctx)) != (Datum) 0)
148 		SRF_RETURN_NEXT(funcctx, result);
149 	SRF_RETURN_DONE(funcctx);
150 }
151 
152 typedef struct
153 {
154 	int			type;
155 	char	   *lexeme;
156 } LexemeEntry;
157 
158 typedef struct
159 {
160 	int			cur;
161 	int			len;
162 	LexemeEntry *list;
163 } PrsStorage;
164 
165 
166 static void
prs_setup_firstcall(FuncCallContext * funcctx,Oid prsid,text * txt)167 prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
168 {
169 	TupleDesc	tupdesc;
170 	MemoryContext oldcontext;
171 	PrsStorage *st;
172 	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
173 	char	   *lex = NULL;
174 	int			llen = 0,
175 				type = 0;
176 	void	   *prsdata;
177 
178 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
179 
180 	st = (PrsStorage *) palloc(sizeof(PrsStorage));
181 	st->cur = 0;
182 	st->len = 16;
183 	st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
184 
185 	prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
186 													 PointerGetDatum(VARDATA_ANY(txt)),
187 													 Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
188 
189 	while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
190 											   PointerGetDatum(prsdata),
191 											   PointerGetDatum(&lex),
192 											   PointerGetDatum(&llen)))) != 0)
193 	{
194 		if (st->cur >= st->len)
195 		{
196 			st->len = 2 * st->len;
197 			st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
198 		}
199 		st->list[st->cur].lexeme = palloc(llen + 1);
200 		memcpy(st->list[st->cur].lexeme, lex, llen);
201 		st->list[st->cur].lexeme[llen] = '\0';
202 		st->list[st->cur].type = type;
203 		st->cur++;
204 	}
205 
206 	FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
207 
208 	st->len = st->cur;
209 	st->cur = 0;
210 
211 	funcctx->user_fctx = (void *) st;
212 	tupdesc = CreateTemplateTupleDesc(2);
213 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
214 					   INT4OID, -1, 0);
215 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
216 					   TEXTOID, -1, 0);
217 
218 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
219 	MemoryContextSwitchTo(oldcontext);
220 }
221 
222 static Datum
prs_process_call(FuncCallContext * funcctx)223 prs_process_call(FuncCallContext *funcctx)
224 {
225 	PrsStorage *st;
226 
227 	st = (PrsStorage *) funcctx->user_fctx;
228 	if (st->cur < st->len)
229 	{
230 		Datum		result;
231 		char	   *values[2];
232 		char		tid[16];
233 		HeapTuple	tuple;
234 
235 		values[0] = tid;
236 		sprintf(tid, "%d", st->list[st->cur].type);
237 		values[1] = st->list[st->cur].lexeme;
238 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
239 		result = HeapTupleGetDatum(tuple);
240 
241 		pfree(values[1]);
242 		st->cur++;
243 		return result;
244 	}
245 	return (Datum) 0;
246 }
247 
248 Datum
ts_parse_byid(PG_FUNCTION_ARGS)249 ts_parse_byid(PG_FUNCTION_ARGS)
250 {
251 	FuncCallContext *funcctx;
252 	Datum		result;
253 
254 	if (SRF_IS_FIRSTCALL())
255 	{
256 		text	   *txt = PG_GETARG_TEXT_PP(1);
257 
258 		funcctx = SRF_FIRSTCALL_INIT();
259 		prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
260 		PG_FREE_IF_COPY(txt, 1);
261 	}
262 
263 	funcctx = SRF_PERCALL_SETUP();
264 
265 	if ((result = prs_process_call(funcctx)) != (Datum) 0)
266 		SRF_RETURN_NEXT(funcctx, result);
267 	SRF_RETURN_DONE(funcctx);
268 }
269 
270 Datum
ts_parse_byname(PG_FUNCTION_ARGS)271 ts_parse_byname(PG_FUNCTION_ARGS)
272 {
273 	FuncCallContext *funcctx;
274 	Datum		result;
275 
276 	if (SRF_IS_FIRSTCALL())
277 	{
278 		text	   *prsname = PG_GETARG_TEXT_PP(0);
279 		text	   *txt = PG_GETARG_TEXT_PP(1);
280 		Oid			prsId;
281 
282 		funcctx = SRF_FIRSTCALL_INIT();
283 		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
284 		prs_setup_firstcall(funcctx, prsId, txt);
285 	}
286 
287 	funcctx = SRF_PERCALL_SETUP();
288 
289 	if ((result = prs_process_call(funcctx)) != (Datum) 0)
290 		SRF_RETURN_NEXT(funcctx, result);
291 	SRF_RETURN_DONE(funcctx);
292 }
293 
294 Datum
ts_headline_byid_opt(PG_FUNCTION_ARGS)295 ts_headline_byid_opt(PG_FUNCTION_ARGS)
296 {
297 	Oid			tsconfig = PG_GETARG_OID(0);
298 	text	   *in = PG_GETARG_TEXT_PP(1);
299 	TSQuery		query = PG_GETARG_TSQUERY(2);
300 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
301 	HeadlineParsedText prs;
302 	List	   *prsoptions;
303 	text	   *out;
304 	TSConfigCacheEntry *cfg;
305 	TSParserCacheEntry *prsobj;
306 
307 	cfg = lookup_ts_config_cache(tsconfig);
308 	prsobj = lookup_ts_parser_cache(cfg->prsId);
309 
310 	if (!OidIsValid(prsobj->headlineOid))
311 		ereport(ERROR,
312 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
313 				 errmsg("text search parser does not support headline creation")));
314 
315 	memset(&prs, 0, sizeof(HeadlineParsedText));
316 	prs.lenwords = 32;
317 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
318 
319 	hlparsetext(cfg->cfgId, &prs, query,
320 				VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
321 
322 	if (opt)
323 		prsoptions = deserialize_deflist(PointerGetDatum(opt));
324 	else
325 		prsoptions = NIL;
326 
327 	FunctionCall3(&(prsobj->prsheadline),
328 				  PointerGetDatum(&prs),
329 				  PointerGetDatum(prsoptions),
330 				  PointerGetDatum(query));
331 
332 	out = generateHeadline(&prs);
333 
334 	PG_FREE_IF_COPY(in, 1);
335 	PG_FREE_IF_COPY(query, 2);
336 	if (opt)
337 		PG_FREE_IF_COPY(opt, 3);
338 	pfree(prs.words);
339 	pfree(prs.startsel);
340 	pfree(prs.stopsel);
341 
342 	PG_RETURN_POINTER(out);
343 }
344 
345 Datum
ts_headline_byid(PG_FUNCTION_ARGS)346 ts_headline_byid(PG_FUNCTION_ARGS)
347 {
348 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
349 										PG_GETARG_DATUM(0),
350 										PG_GETARG_DATUM(1),
351 										PG_GETARG_DATUM(2)));
352 }
353 
354 Datum
ts_headline(PG_FUNCTION_ARGS)355 ts_headline(PG_FUNCTION_ARGS)
356 {
357 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
358 										ObjectIdGetDatum(getTSCurrentConfig(true)),
359 										PG_GETARG_DATUM(0),
360 										PG_GETARG_DATUM(1)));
361 }
362 
363 Datum
ts_headline_opt(PG_FUNCTION_ARGS)364 ts_headline_opt(PG_FUNCTION_ARGS)
365 {
366 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
367 										ObjectIdGetDatum(getTSCurrentConfig(true)),
368 										PG_GETARG_DATUM(0),
369 										PG_GETARG_DATUM(1),
370 										PG_GETARG_DATUM(2)));
371 }
372 
373 Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)374 ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
375 {
376 	Oid			tsconfig = PG_GETARG_OID(0);
377 	Jsonb	   *jb = PG_GETARG_JSONB_P(1);
378 	TSQuery		query = PG_GETARG_TSQUERY(2);
379 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
380 	Jsonb	   *out;
381 	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
382 	HeadlineParsedText prs;
383 	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
384 
385 	memset(&prs, 0, sizeof(HeadlineParsedText));
386 	prs.lenwords = 32;
387 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
388 
389 	state->prs = &prs;
390 	state->cfg = lookup_ts_config_cache(tsconfig);
391 	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
392 	state->query = query;
393 	if (opt)
394 		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
395 	else
396 		state->prsoptions = NIL;
397 
398 	if (!OidIsValid(state->prsobj->headlineOid))
399 		ereport(ERROR,
400 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
401 				 errmsg("text search parser does not support headline creation")));
402 
403 	out = transform_jsonb_string_values(jb, state, action);
404 
405 	PG_FREE_IF_COPY(jb, 1);
406 	PG_FREE_IF_COPY(query, 2);
407 	if (opt)
408 		PG_FREE_IF_COPY(opt, 3);
409 
410 	pfree(prs.words);
411 
412 	if (state->transformed)
413 	{
414 		pfree(prs.startsel);
415 		pfree(prs.stopsel);
416 	}
417 
418 	PG_RETURN_JSONB_P(out);
419 }
420 
421 Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)422 ts_headline_jsonb(PG_FUNCTION_ARGS)
423 {
424 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
425 										ObjectIdGetDatum(getTSCurrentConfig(true)),
426 										PG_GETARG_DATUM(0),
427 										PG_GETARG_DATUM(1)));
428 }
429 
430 Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)431 ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
432 {
433 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
434 										PG_GETARG_DATUM(0),
435 										PG_GETARG_DATUM(1),
436 										PG_GETARG_DATUM(2)));
437 }
438 
439 Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)440 ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
441 {
442 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
443 										ObjectIdGetDatum(getTSCurrentConfig(true)),
444 										PG_GETARG_DATUM(0),
445 										PG_GETARG_DATUM(1),
446 										PG_GETARG_DATUM(2)));
447 }
448 
449 Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)450 ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
451 {
452 	Oid			tsconfig = PG_GETARG_OID(0);
453 	text	   *json = PG_GETARG_TEXT_P(1);
454 	TSQuery		query = PG_GETARG_TSQUERY(2);
455 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
456 	text	   *out;
457 	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
458 
459 	HeadlineParsedText prs;
460 	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
461 
462 	memset(&prs, 0, sizeof(HeadlineParsedText));
463 	prs.lenwords = 32;
464 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
465 
466 	state->prs = &prs;
467 	state->cfg = lookup_ts_config_cache(tsconfig);
468 	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
469 	state->query = query;
470 	if (opt)
471 		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
472 	else
473 		state->prsoptions = NIL;
474 
475 	if (!OidIsValid(state->prsobj->headlineOid))
476 		ereport(ERROR,
477 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
478 				 errmsg("text search parser does not support headline creation")));
479 
480 	out = transform_json_string_values(json, state, action);
481 
482 	PG_FREE_IF_COPY(json, 1);
483 	PG_FREE_IF_COPY(query, 2);
484 	if (opt)
485 		PG_FREE_IF_COPY(opt, 3);
486 	pfree(prs.words);
487 
488 	if (state->transformed)
489 	{
490 		pfree(prs.startsel);
491 		pfree(prs.stopsel);
492 	}
493 
494 	PG_RETURN_TEXT_P(out);
495 }
496 
497 Datum
ts_headline_json(PG_FUNCTION_ARGS)498 ts_headline_json(PG_FUNCTION_ARGS)
499 {
500 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
501 										ObjectIdGetDatum(getTSCurrentConfig(true)),
502 										PG_GETARG_DATUM(0),
503 										PG_GETARG_DATUM(1)));
504 }
505 
506 Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)507 ts_headline_json_byid(PG_FUNCTION_ARGS)
508 {
509 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
510 										PG_GETARG_DATUM(0),
511 										PG_GETARG_DATUM(1),
512 										PG_GETARG_DATUM(2)));
513 }
514 
515 Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)516 ts_headline_json_opt(PG_FUNCTION_ARGS)
517 {
518 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
519 										ObjectIdGetDatum(getTSCurrentConfig(true)),
520 										PG_GETARG_DATUM(0),
521 										PG_GETARG_DATUM(1),
522 										PG_GETARG_DATUM(2)));
523 }
524 
525 
526 /*
527  * Return headline in text from, generated from a json(b) element
528  */
529 static text *
headline_json_value(void * _state,char * elem_value,int elem_len)530 headline_json_value(void *_state, char *elem_value, int elem_len)
531 {
532 	HeadlineJsonState *state = (HeadlineJsonState *) _state;
533 
534 	HeadlineParsedText *prs = state->prs;
535 	TSConfigCacheEntry *cfg = state->cfg;
536 	TSParserCacheEntry *prsobj = state->prsobj;
537 	TSQuery		query = state->query;
538 	List	   *prsoptions = state->prsoptions;
539 
540 	prs->curwords = 0;
541 	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
542 	FunctionCall3(&(prsobj->prsheadline),
543 				  PointerGetDatum(prs),
544 				  PointerGetDatum(prsoptions),
545 				  PointerGetDatum(query));
546 
547 	state->transformed = true;
548 	return generateHeadline(prs);
549 }
550