1 /*-------------------------------------------------------------------------
2  *
3  * wparser.c
4  *		Standard interface to word parser
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/tsearch/wparser.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "funcapi.h"
17 #include "catalog/namespace.h"
18 #include "catalog/pg_type.h"
19 #include "commands/defrem.h"
20 #include "tsearch/ts_cache.h"
21 #include "tsearch/ts_utils.h"
22 #include "utils/builtins.h"
23 #include "utils/jsonapi.h"
24 #include "utils/varlena.h"
25 
26 
27 /******sql-level interface******/
28 
29 typedef struct
30 {
31 	int			cur;
32 	LexDescr   *list;
33 } TSTokenTypeStorage;
34 
35 /* state for ts_headline_json_* */
36 typedef struct HeadlineJsonState
37 {
38 	HeadlineParsedText *prs;
39 	TSConfigCacheEntry *cfg;
40 	TSParserCacheEntry *prsobj;
41 	TSQuery		query;
42 	List	   *prsoptions;
43 	bool		transformed;
44 } HeadlineJsonState;
45 
46 static text *headline_json_value(void *_state, char *elem_value, int elem_len);
47 
48 static void
tt_setup_firstcall(FuncCallContext * funcctx,Oid prsid)49 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
50 {
51 	TupleDesc	tupdesc;
52 	MemoryContext oldcontext;
53 	TSTokenTypeStorage *st;
54 	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
55 
56 	if (!OidIsValid(prs->lextypeOid))
57 		elog(ERROR, "method lextype isn't defined for text search parser %u",
58 			 prsid);
59 
60 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
61 
62 	st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
63 	st->cur = 0;
64 	/* lextype takes one dummy argument */
65 	st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
66 															 (Datum) 0));
67 	funcctx->user_fctx = (void *) st;
68 
69 	tupdesc = CreateTemplateTupleDesc(3);
70 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
71 					   INT4OID, -1, 0);
72 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
73 					   TEXTOID, -1, 0);
74 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
75 					   TEXTOID, -1, 0);
76 
77 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
78 	MemoryContextSwitchTo(oldcontext);
79 }
80 
81 static Datum
tt_process_call(FuncCallContext * funcctx)82 tt_process_call(FuncCallContext *funcctx)
83 {
84 	TSTokenTypeStorage *st;
85 
86 	st = (TSTokenTypeStorage *) funcctx->user_fctx;
87 	if (st->list && st->list[st->cur].lexid)
88 	{
89 		Datum		result;
90 		char	   *values[3];
91 		char		txtid[16];
92 		HeapTuple	tuple;
93 
94 		sprintf(txtid, "%d", st->list[st->cur].lexid);
95 		values[0] = txtid;
96 		values[1] = st->list[st->cur].alias;
97 		values[2] = st->list[st->cur].descr;
98 
99 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
100 		result = HeapTupleGetDatum(tuple);
101 
102 		pfree(values[1]);
103 		pfree(values[2]);
104 		st->cur++;
105 		return result;
106 	}
107 	if (st->list)
108 		pfree(st->list);
109 	pfree(st);
110 	return (Datum) 0;
111 }
112 
113 Datum
ts_token_type_byid(PG_FUNCTION_ARGS)114 ts_token_type_byid(PG_FUNCTION_ARGS)
115 {
116 	FuncCallContext *funcctx;
117 	Datum		result;
118 
119 	if (SRF_IS_FIRSTCALL())
120 	{
121 		funcctx = SRF_FIRSTCALL_INIT();
122 		tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
123 	}
124 
125 	funcctx = SRF_PERCALL_SETUP();
126 
127 	if ((result = tt_process_call(funcctx)) != (Datum) 0)
128 		SRF_RETURN_NEXT(funcctx, result);
129 	SRF_RETURN_DONE(funcctx);
130 }
131 
132 Datum
ts_token_type_byname(PG_FUNCTION_ARGS)133 ts_token_type_byname(PG_FUNCTION_ARGS)
134 {
135 	FuncCallContext *funcctx;
136 	Datum		result;
137 
138 	if (SRF_IS_FIRSTCALL())
139 	{
140 		text	   *prsname = PG_GETARG_TEXT_PP(0);
141 		Oid			prsId;
142 
143 		funcctx = SRF_FIRSTCALL_INIT();
144 		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
145 		tt_setup_firstcall(funcctx, prsId);
146 	}
147 
148 	funcctx = SRF_PERCALL_SETUP();
149 
150 	if ((result = tt_process_call(funcctx)) != (Datum) 0)
151 		SRF_RETURN_NEXT(funcctx, result);
152 	SRF_RETURN_DONE(funcctx);
153 }
154 
155 typedef struct
156 {
157 	int			type;
158 	char	   *lexeme;
159 } LexemeEntry;
160 
161 typedef struct
162 {
163 	int			cur;
164 	int			len;
165 	LexemeEntry *list;
166 } PrsStorage;
167 
168 
169 static void
prs_setup_firstcall(FuncCallContext * funcctx,Oid prsid,text * txt)170 prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
171 {
172 	TupleDesc	tupdesc;
173 	MemoryContext oldcontext;
174 	PrsStorage *st;
175 	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
176 	char	   *lex = NULL;
177 	int			llen = 0,
178 				type = 0;
179 	void	   *prsdata;
180 
181 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
182 
183 	st = (PrsStorage *) palloc(sizeof(PrsStorage));
184 	st->cur = 0;
185 	st->len = 16;
186 	st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
187 
188 	prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
189 													 PointerGetDatum(VARDATA_ANY(txt)),
190 													 Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
191 
192 	while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
193 											   PointerGetDatum(prsdata),
194 											   PointerGetDatum(&lex),
195 											   PointerGetDatum(&llen)))) != 0)
196 	{
197 		if (st->cur >= st->len)
198 		{
199 			st->len = 2 * st->len;
200 			st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
201 		}
202 		st->list[st->cur].lexeme = palloc(llen + 1);
203 		memcpy(st->list[st->cur].lexeme, lex, llen);
204 		st->list[st->cur].lexeme[llen] = '\0';
205 		st->list[st->cur].type = type;
206 		st->cur++;
207 	}
208 
209 	FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
210 
211 	st->len = st->cur;
212 	st->cur = 0;
213 
214 	funcctx->user_fctx = (void *) st;
215 	tupdesc = CreateTemplateTupleDesc(2);
216 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
217 					   INT4OID, -1, 0);
218 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
219 					   TEXTOID, -1, 0);
220 
221 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
222 	MemoryContextSwitchTo(oldcontext);
223 }
224 
225 static Datum
prs_process_call(FuncCallContext * funcctx)226 prs_process_call(FuncCallContext *funcctx)
227 {
228 	PrsStorage *st;
229 
230 	st = (PrsStorage *) funcctx->user_fctx;
231 	if (st->cur < st->len)
232 	{
233 		Datum		result;
234 		char	   *values[2];
235 		char		tid[16];
236 		HeapTuple	tuple;
237 
238 		values[0] = tid;
239 		sprintf(tid, "%d", st->list[st->cur].type);
240 		values[1] = st->list[st->cur].lexeme;
241 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
242 		result = HeapTupleGetDatum(tuple);
243 
244 		pfree(values[1]);
245 		st->cur++;
246 		return result;
247 	}
248 	else
249 	{
250 		if (st->list)
251 			pfree(st->list);
252 		pfree(st);
253 	}
254 	return (Datum) 0;
255 }
256 
257 Datum
ts_parse_byid(PG_FUNCTION_ARGS)258 ts_parse_byid(PG_FUNCTION_ARGS)
259 {
260 	FuncCallContext *funcctx;
261 	Datum		result;
262 
263 	if (SRF_IS_FIRSTCALL())
264 	{
265 		text	   *txt = PG_GETARG_TEXT_PP(1);
266 
267 		funcctx = SRF_FIRSTCALL_INIT();
268 		prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
269 		PG_FREE_IF_COPY(txt, 1);
270 	}
271 
272 	funcctx = SRF_PERCALL_SETUP();
273 
274 	if ((result = prs_process_call(funcctx)) != (Datum) 0)
275 		SRF_RETURN_NEXT(funcctx, result);
276 	SRF_RETURN_DONE(funcctx);
277 }
278 
279 Datum
ts_parse_byname(PG_FUNCTION_ARGS)280 ts_parse_byname(PG_FUNCTION_ARGS)
281 {
282 	FuncCallContext *funcctx;
283 	Datum		result;
284 
285 	if (SRF_IS_FIRSTCALL())
286 	{
287 		text	   *prsname = PG_GETARG_TEXT_PP(0);
288 		text	   *txt = PG_GETARG_TEXT_PP(1);
289 		Oid			prsId;
290 
291 		funcctx = SRF_FIRSTCALL_INIT();
292 		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
293 		prs_setup_firstcall(funcctx, prsId, txt);
294 	}
295 
296 	funcctx = SRF_PERCALL_SETUP();
297 
298 	if ((result = prs_process_call(funcctx)) != (Datum) 0)
299 		SRF_RETURN_NEXT(funcctx, result);
300 	SRF_RETURN_DONE(funcctx);
301 }
302 
303 Datum
ts_headline_byid_opt(PG_FUNCTION_ARGS)304 ts_headline_byid_opt(PG_FUNCTION_ARGS)
305 {
306 	Oid			tsconfig = PG_GETARG_OID(0);
307 	text	   *in = PG_GETARG_TEXT_PP(1);
308 	TSQuery		query = PG_GETARG_TSQUERY(2);
309 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
310 	HeadlineParsedText prs;
311 	List	   *prsoptions;
312 	text	   *out;
313 	TSConfigCacheEntry *cfg;
314 	TSParserCacheEntry *prsobj;
315 
316 	cfg = lookup_ts_config_cache(tsconfig);
317 	prsobj = lookup_ts_parser_cache(cfg->prsId);
318 
319 	if (!OidIsValid(prsobj->headlineOid))
320 		ereport(ERROR,
321 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
322 				 errmsg("text search parser does not support headline creation")));
323 
324 	memset(&prs, 0, sizeof(HeadlineParsedText));
325 	prs.lenwords = 32;
326 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
327 
328 	hlparsetext(cfg->cfgId, &prs, query,
329 				VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
330 
331 	if (opt)
332 		prsoptions = deserialize_deflist(PointerGetDatum(opt));
333 	else
334 		prsoptions = NIL;
335 
336 	FunctionCall3(&(prsobj->prsheadline),
337 				  PointerGetDatum(&prs),
338 				  PointerGetDatum(prsoptions),
339 				  PointerGetDatum(query));
340 
341 	out = generateHeadline(&prs);
342 
343 	PG_FREE_IF_COPY(in, 1);
344 	PG_FREE_IF_COPY(query, 2);
345 	if (opt)
346 		PG_FREE_IF_COPY(opt, 3);
347 	pfree(prs.words);
348 	pfree(prs.startsel);
349 	pfree(prs.stopsel);
350 
351 	PG_RETURN_POINTER(out);
352 }
353 
354 Datum
ts_headline_byid(PG_FUNCTION_ARGS)355 ts_headline_byid(PG_FUNCTION_ARGS)
356 {
357 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
358 										PG_GETARG_DATUM(0),
359 										PG_GETARG_DATUM(1),
360 										PG_GETARG_DATUM(2)));
361 }
362 
363 Datum
ts_headline(PG_FUNCTION_ARGS)364 ts_headline(PG_FUNCTION_ARGS)
365 {
366 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
367 										ObjectIdGetDatum(getTSCurrentConfig(true)),
368 										PG_GETARG_DATUM(0),
369 										PG_GETARG_DATUM(1)));
370 }
371 
372 Datum
ts_headline_opt(PG_FUNCTION_ARGS)373 ts_headline_opt(PG_FUNCTION_ARGS)
374 {
375 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
376 										ObjectIdGetDatum(getTSCurrentConfig(true)),
377 										PG_GETARG_DATUM(0),
378 										PG_GETARG_DATUM(1),
379 										PG_GETARG_DATUM(2)));
380 }
381 
382 Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)383 ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
384 {
385 	Oid			tsconfig = PG_GETARG_OID(0);
386 	Jsonb	   *jb = PG_GETARG_JSONB_P(1);
387 	TSQuery		query = PG_GETARG_TSQUERY(2);
388 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
389 	Jsonb	   *out;
390 	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
391 	HeadlineParsedText prs;
392 	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
393 
394 	memset(&prs, 0, sizeof(HeadlineParsedText));
395 	prs.lenwords = 32;
396 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
397 
398 	state->prs = &prs;
399 	state->cfg = lookup_ts_config_cache(tsconfig);
400 	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
401 	state->query = query;
402 	if (opt)
403 		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
404 	else
405 		state->prsoptions = NIL;
406 
407 	if (!OidIsValid(state->prsobj->headlineOid))
408 		ereport(ERROR,
409 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
410 				 errmsg("text search parser does not support headline creation")));
411 
412 	out = transform_jsonb_string_values(jb, state, action);
413 
414 	PG_FREE_IF_COPY(jb, 1);
415 	PG_FREE_IF_COPY(query, 2);
416 	if (opt)
417 		PG_FREE_IF_COPY(opt, 3);
418 
419 	pfree(prs.words);
420 
421 	if (state->transformed)
422 	{
423 		pfree(prs.startsel);
424 		pfree(prs.stopsel);
425 	}
426 
427 	PG_RETURN_JSONB_P(out);
428 }
429 
430 Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)431 ts_headline_jsonb(PG_FUNCTION_ARGS)
432 {
433 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
434 										ObjectIdGetDatum(getTSCurrentConfig(true)),
435 										PG_GETARG_DATUM(0),
436 										PG_GETARG_DATUM(1)));
437 }
438 
439 Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)440 ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
441 {
442 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
443 										PG_GETARG_DATUM(0),
444 										PG_GETARG_DATUM(1),
445 										PG_GETARG_DATUM(2)));
446 }
447 
448 Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)449 ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
450 {
451 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
452 										ObjectIdGetDatum(getTSCurrentConfig(true)),
453 										PG_GETARG_DATUM(0),
454 										PG_GETARG_DATUM(1),
455 										PG_GETARG_DATUM(2)));
456 }
457 
458 Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)459 ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
460 {
461 	Oid			tsconfig = PG_GETARG_OID(0);
462 	text	   *json = PG_GETARG_TEXT_P(1);
463 	TSQuery		query = PG_GETARG_TSQUERY(2);
464 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
465 	text	   *out;
466 	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
467 
468 	HeadlineParsedText prs;
469 	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
470 
471 	memset(&prs, 0, sizeof(HeadlineParsedText));
472 	prs.lenwords = 32;
473 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
474 
475 	state->prs = &prs;
476 	state->cfg = lookup_ts_config_cache(tsconfig);
477 	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
478 	state->query = query;
479 	if (opt)
480 		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
481 	else
482 		state->prsoptions = NIL;
483 
484 	if (!OidIsValid(state->prsobj->headlineOid))
485 		ereport(ERROR,
486 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
487 				 errmsg("text search parser does not support headline creation")));
488 
489 	out = transform_json_string_values(json, state, action);
490 
491 	PG_FREE_IF_COPY(json, 1);
492 	PG_FREE_IF_COPY(query, 2);
493 	if (opt)
494 		PG_FREE_IF_COPY(opt, 3);
495 	pfree(prs.words);
496 
497 	if (state->transformed)
498 	{
499 		pfree(prs.startsel);
500 		pfree(prs.stopsel);
501 	}
502 
503 	PG_RETURN_TEXT_P(out);
504 }
505 
506 Datum
ts_headline_json(PG_FUNCTION_ARGS)507 ts_headline_json(PG_FUNCTION_ARGS)
508 {
509 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
510 										ObjectIdGetDatum(getTSCurrentConfig(true)),
511 										PG_GETARG_DATUM(0),
512 										PG_GETARG_DATUM(1)));
513 }
514 
515 Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)516 ts_headline_json_byid(PG_FUNCTION_ARGS)
517 {
518 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
519 										PG_GETARG_DATUM(0),
520 										PG_GETARG_DATUM(1),
521 										PG_GETARG_DATUM(2)));
522 }
523 
524 Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)525 ts_headline_json_opt(PG_FUNCTION_ARGS)
526 {
527 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
528 										ObjectIdGetDatum(getTSCurrentConfig(true)),
529 										PG_GETARG_DATUM(0),
530 										PG_GETARG_DATUM(1),
531 										PG_GETARG_DATUM(2)));
532 }
533 
534 
535 /*
536  * Return headline in text from, generated from a json(b) element
537  */
538 static text *
headline_json_value(void * _state,char * elem_value,int elem_len)539 headline_json_value(void *_state, char *elem_value, int elem_len)
540 {
541 	HeadlineJsonState *state = (HeadlineJsonState *) _state;
542 
543 	HeadlineParsedText *prs = state->prs;
544 	TSConfigCacheEntry *cfg = state->cfg;
545 	TSParserCacheEntry *prsobj = state->prsobj;
546 	TSQuery		query = state->query;
547 	List	   *prsoptions = state->prsoptions;
548 
549 	prs->curwords = 0;
550 	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
551 	FunctionCall3(&(prsobj->prsheadline),
552 				  PointerGetDatum(prs),
553 				  PointerGetDatum(prsoptions),
554 				  PointerGetDatum(query));
555 
556 	state->transformed = true;
557 	return generateHeadline(prs);
558 }
559