1 /*-------------------------------------------------------------------------
2  *
3  * wparser.c
4  *		Standard interface to word parser
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/tsearch/wparser.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "funcapi.h"
17 #include "catalog/namespace.h"
18 #include "catalog/pg_type.h"
19 #include "commands/defrem.h"
20 #include "tsearch/ts_cache.h"
21 #include "tsearch/ts_utils.h"
22 #include "utils/builtins.h"
23 
24 
25 /******sql-level interface******/
26 
27 typedef struct
28 {
29 	int			cur;
30 	LexDescr   *list;
31 } TSTokenTypeStorage;
32 
33 static void
tt_setup_firstcall(FuncCallContext * funcctx,Oid prsid)34 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
35 {
36 	TupleDesc	tupdesc;
37 	MemoryContext oldcontext;
38 	TSTokenTypeStorage *st;
39 	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
40 
41 	if (!OidIsValid(prs->lextypeOid))
42 		elog(ERROR, "method lextype isn't defined for text search parser %u",
43 			 prsid);
44 
45 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
46 
47 	st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
48 	st->cur = 0;
49 	/* lextype takes one dummy argument */
50 	st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
51 															 (Datum) 0));
52 	funcctx->user_fctx = (void *) st;
53 
54 	tupdesc = CreateTemplateTupleDesc(3, false);
55 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
56 					   INT4OID, -1, 0);
57 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
58 					   TEXTOID, -1, 0);
59 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
60 					   TEXTOID, -1, 0);
61 
62 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
63 	MemoryContextSwitchTo(oldcontext);
64 }
65 
66 static Datum
tt_process_call(FuncCallContext * funcctx)67 tt_process_call(FuncCallContext *funcctx)
68 {
69 	TSTokenTypeStorage *st;
70 
71 	st = (TSTokenTypeStorage *) funcctx->user_fctx;
72 	if (st->list && st->list[st->cur].lexid)
73 	{
74 		Datum		result;
75 		char	   *values[3];
76 		char		txtid[16];
77 		HeapTuple	tuple;
78 
79 		sprintf(txtid, "%d", st->list[st->cur].lexid);
80 		values[0] = txtid;
81 		values[1] = st->list[st->cur].alias;
82 		values[2] = st->list[st->cur].descr;
83 
84 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
85 		result = HeapTupleGetDatum(tuple);
86 
87 		pfree(values[1]);
88 		pfree(values[2]);
89 		st->cur++;
90 		return result;
91 	}
92 	if (st->list)
93 		pfree(st->list);
94 	pfree(st);
95 	return (Datum) 0;
96 }
97 
98 Datum
ts_token_type_byid(PG_FUNCTION_ARGS)99 ts_token_type_byid(PG_FUNCTION_ARGS)
100 {
101 	FuncCallContext *funcctx;
102 	Datum		result;
103 
104 	if (SRF_IS_FIRSTCALL())
105 	{
106 		funcctx = SRF_FIRSTCALL_INIT();
107 		tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
108 	}
109 
110 	funcctx = SRF_PERCALL_SETUP();
111 
112 	if ((result = tt_process_call(funcctx)) != (Datum) 0)
113 		SRF_RETURN_NEXT(funcctx, result);
114 	SRF_RETURN_DONE(funcctx);
115 }
116 
117 Datum
ts_token_type_byname(PG_FUNCTION_ARGS)118 ts_token_type_byname(PG_FUNCTION_ARGS)
119 {
120 	FuncCallContext *funcctx;
121 	Datum		result;
122 
123 	if (SRF_IS_FIRSTCALL())
124 	{
125 		text	   *prsname = PG_GETARG_TEXT_P(0);
126 		Oid			prsId;
127 
128 		funcctx = SRF_FIRSTCALL_INIT();
129 		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
130 		tt_setup_firstcall(funcctx, prsId);
131 	}
132 
133 	funcctx = SRF_PERCALL_SETUP();
134 
135 	if ((result = tt_process_call(funcctx)) != (Datum) 0)
136 		SRF_RETURN_NEXT(funcctx, result);
137 	SRF_RETURN_DONE(funcctx);
138 }
139 
140 typedef struct
141 {
142 	int			type;
143 	char	   *lexeme;
144 } LexemeEntry;
145 
146 typedef struct
147 {
148 	int			cur;
149 	int			len;
150 	LexemeEntry *list;
151 } PrsStorage;
152 
153 
154 static void
prs_setup_firstcall(FuncCallContext * funcctx,Oid prsid,text * txt)155 prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
156 {
157 	TupleDesc	tupdesc;
158 	MemoryContext oldcontext;
159 	PrsStorage *st;
160 	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
161 	char	   *lex = NULL;
162 	int			llen = 0,
163 				type = 0;
164 	void	   *prsdata;
165 
166 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
167 
168 	st = (PrsStorage *) palloc(sizeof(PrsStorage));
169 	st->cur = 0;
170 	st->len = 16;
171 	st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
172 
173 	prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
174 											   PointerGetDatum(VARDATA(txt)),
175 									Int32GetDatum(VARSIZE(txt) - VARHDRSZ)));
176 
177 	while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
178 											   PointerGetDatum(prsdata),
179 											   PointerGetDatum(&lex),
180 											   PointerGetDatum(&llen)))) != 0)
181 	{
182 		if (st->cur >= st->len)
183 		{
184 			st->len = 2 * st->len;
185 			st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
186 		}
187 		st->list[st->cur].lexeme = palloc(llen + 1);
188 		memcpy(st->list[st->cur].lexeme, lex, llen);
189 		st->list[st->cur].lexeme[llen] = '\0';
190 		st->list[st->cur].type = type;
191 		st->cur++;
192 	}
193 
194 	FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
195 
196 	st->len = st->cur;
197 	st->cur = 0;
198 
199 	funcctx->user_fctx = (void *) st;
200 	tupdesc = CreateTemplateTupleDesc(2, false);
201 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
202 					   INT4OID, -1, 0);
203 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
204 					   TEXTOID, -1, 0);
205 
206 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
207 	MemoryContextSwitchTo(oldcontext);
208 }
209 
210 static Datum
prs_process_call(FuncCallContext * funcctx)211 prs_process_call(FuncCallContext *funcctx)
212 {
213 	PrsStorage *st;
214 
215 	st = (PrsStorage *) funcctx->user_fctx;
216 	if (st->cur < st->len)
217 	{
218 		Datum		result;
219 		char	   *values[2];
220 		char		tid[16];
221 		HeapTuple	tuple;
222 
223 		values[0] = tid;
224 		sprintf(tid, "%d", st->list[st->cur].type);
225 		values[1] = st->list[st->cur].lexeme;
226 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
227 		result = HeapTupleGetDatum(tuple);
228 
229 		pfree(values[1]);
230 		st->cur++;
231 		return result;
232 	}
233 	else
234 	{
235 		if (st->list)
236 			pfree(st->list);
237 		pfree(st);
238 	}
239 	return (Datum) 0;
240 }
241 
242 Datum
ts_parse_byid(PG_FUNCTION_ARGS)243 ts_parse_byid(PG_FUNCTION_ARGS)
244 {
245 	FuncCallContext *funcctx;
246 	Datum		result;
247 
248 	if (SRF_IS_FIRSTCALL())
249 	{
250 		text	   *txt = PG_GETARG_TEXT_P(1);
251 
252 		funcctx = SRF_FIRSTCALL_INIT();
253 		prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
254 		PG_FREE_IF_COPY(txt, 1);
255 	}
256 
257 	funcctx = SRF_PERCALL_SETUP();
258 
259 	if ((result = prs_process_call(funcctx)) != (Datum) 0)
260 		SRF_RETURN_NEXT(funcctx, result);
261 	SRF_RETURN_DONE(funcctx);
262 }
263 
264 Datum
ts_parse_byname(PG_FUNCTION_ARGS)265 ts_parse_byname(PG_FUNCTION_ARGS)
266 {
267 	FuncCallContext *funcctx;
268 	Datum		result;
269 
270 	if (SRF_IS_FIRSTCALL())
271 	{
272 		text	   *prsname = PG_GETARG_TEXT_P(0);
273 		text	   *txt = PG_GETARG_TEXT_P(1);
274 		Oid			prsId;
275 
276 		funcctx = SRF_FIRSTCALL_INIT();
277 		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
278 		prs_setup_firstcall(funcctx, prsId, txt);
279 	}
280 
281 	funcctx = SRF_PERCALL_SETUP();
282 
283 	if ((result = prs_process_call(funcctx)) != (Datum) 0)
284 		SRF_RETURN_NEXT(funcctx, result);
285 	SRF_RETURN_DONE(funcctx);
286 }
287 
288 Datum
ts_headline_byid_opt(PG_FUNCTION_ARGS)289 ts_headline_byid_opt(PG_FUNCTION_ARGS)
290 {
291 	text	   *in = PG_GETARG_TEXT_P(1);
292 	TSQuery		query = PG_GETARG_TSQUERY(2);
293 	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
294 	HeadlineParsedText prs;
295 	List	   *prsoptions;
296 	text	   *out;
297 	TSConfigCacheEntry *cfg;
298 	TSParserCacheEntry *prsobj;
299 
300 	cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
301 	prsobj = lookup_ts_parser_cache(cfg->prsId);
302 
303 	if (!OidIsValid(prsobj->headlineOid))
304 		ereport(ERROR,
305 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
306 		   errmsg("text search parser does not support headline creation")));
307 
308 	memset(&prs, 0, sizeof(HeadlineParsedText));
309 	prs.lenwords = 32;
310 	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
311 
312 	hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
313 
314 	if (opt)
315 		prsoptions = deserialize_deflist(PointerGetDatum(opt));
316 	else
317 		prsoptions = NIL;
318 
319 	FunctionCall3(&(prsobj->prsheadline),
320 				  PointerGetDatum(&prs),
321 				  PointerGetDatum(prsoptions),
322 				  PointerGetDatum(query));
323 
324 	out = generateHeadline(&prs);
325 
326 	PG_FREE_IF_COPY(in, 1);
327 	PG_FREE_IF_COPY(query, 2);
328 	if (opt)
329 		PG_FREE_IF_COPY(opt, 3);
330 	pfree(prs.words);
331 	pfree(prs.startsel);
332 	pfree(prs.stopsel);
333 
334 	PG_RETURN_POINTER(out);
335 }
336 
337 Datum
ts_headline_byid(PG_FUNCTION_ARGS)338 ts_headline_byid(PG_FUNCTION_ARGS)
339 {
340 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
341 										PG_GETARG_DATUM(0),
342 										PG_GETARG_DATUM(1),
343 										PG_GETARG_DATUM(2)));
344 }
345 
346 Datum
ts_headline(PG_FUNCTION_ARGS)347 ts_headline(PG_FUNCTION_ARGS)
348 {
349 	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
350 								  ObjectIdGetDatum(getTSCurrentConfig(true)),
351 										PG_GETARG_DATUM(0),
352 										PG_GETARG_DATUM(1)));
353 }
354 
355 Datum
ts_headline_opt(PG_FUNCTION_ARGS)356 ts_headline_opt(PG_FUNCTION_ARGS)
357 {
358 	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
359 								  ObjectIdGetDatum(getTSCurrentConfig(true)),
360 										PG_GETARG_DATUM(0),
361 										PG_GETARG_DATUM(1),
362 										PG_GETARG_DATUM(2)));
363 }
364