1 /*
2  * txtquery io
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltxtquery_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "crc32.h"
11 #include "ltree.h"
12 #include "miscadmin.h"
13 
14 PG_FUNCTION_INFO_V1(ltxtq_in);
15 PG_FUNCTION_INFO_V1(ltxtq_out);
16 
17 
18 /* parser's states */
19 #define WAITOPERAND 1
20 #define INOPERAND 2
21 #define WAITOPERATOR	3
22 
23 /*
24  * node of query tree, also used
25  * for storing polish notation in parser
26  */
27 typedef struct NODE
28 {
29 	int32		type;
30 	int32		val;
31 	int16		distance;
32 	int16		length;
33 	uint16		flag;
34 	struct NODE *next;
35 } NODE;
36 
37 typedef struct
38 {
39 	char	   *buf;
40 	int32		state;
41 	int32		count;
42 	/* reverse polish notation in list (for temporary usage) */
43 	NODE	   *str;
44 	/* number in str */
45 	int32		num;
46 
47 	/* user-friendly operand */
48 	int32		lenop;
49 	int32		sumlen;
50 	char	   *op;
51 	char	   *curop;
52 } QPRS_STATE;
53 
54 /*
55  * get token from query string
56  */
57 static int32
gettoken_query(QPRS_STATE * state,int32 * val,int32 * lenval,char ** strval,uint16 * flag)58 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
59 {
60 	int			charlen;
61 
62 	for (;;)
63 	{
64 		charlen = pg_mblen(state->buf);
65 
66 		switch (state->state)
67 		{
68 			case WAITOPERAND:
69 				if (charlen == 1 && t_iseq(state->buf, '!'))
70 				{
71 					(state->buf)++;
72 					*val = (int32) '!';
73 					return OPR;
74 				}
75 				else if (charlen == 1 && t_iseq(state->buf, '('))
76 				{
77 					state->count++;
78 					(state->buf)++;
79 					return OPEN;
80 				}
81 				else if (ISALNUM(state->buf))
82 				{
83 					state->state = INOPERAND;
84 					*strval = state->buf;
85 					*lenval = charlen;
86 					*flag = 0;
87 				}
88 				else if (!t_isspace(state->buf))
89 					ereport(ERROR,
90 							(errcode(ERRCODE_SYNTAX_ERROR),
91 							 errmsg("operand syntax error")));
92 				break;
93 			case INOPERAND:
94 				if (ISALNUM(state->buf))
95 				{
96 					if (*flag)
97 						ereport(ERROR,
98 								(errcode(ERRCODE_SYNTAX_ERROR),
99 								 errmsg("modifiers syntax error")));
100 					*lenval += charlen;
101 				}
102 				else if (charlen == 1 && t_iseq(state->buf, '%'))
103 					*flag |= LVAR_SUBLEXEME;
104 				else if (charlen == 1 && t_iseq(state->buf, '@'))
105 					*flag |= LVAR_INCASE;
106 				else if (charlen == 1 && t_iseq(state->buf, '*'))
107 					*flag |= LVAR_ANYEND;
108 				else
109 				{
110 					state->state = WAITOPERATOR;
111 					return VAL;
112 				}
113 				break;
114 			case WAITOPERATOR:
115 				if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
116 				{
117 					state->state = WAITOPERAND;
118 					*val = (int32) *(state->buf);
119 					(state->buf)++;
120 					return OPR;
121 				}
122 				else if (charlen == 1 && t_iseq(state->buf, ')'))
123 				{
124 					(state->buf)++;
125 					state->count--;
126 					return (state->count < 0) ? ERR : CLOSE;
127 				}
128 				else if (*(state->buf) == '\0')
129 					return (state->count) ? ERR : END;
130 				else if (charlen == 1 && !t_iseq(state->buf, ' '))
131 					return ERR;
132 				break;
133 			default:
134 				return ERR;
135 				break;
136 		}
137 
138 		state->buf += charlen;
139 	}
140 }
141 
142 /*
143  * push new one in polish notation reverse view
144  */
145 static void
pushquery(QPRS_STATE * state,int32 type,int32 val,int32 distance,int32 lenval,uint16 flag)146 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
147 {
148 	NODE	   *tmp = (NODE *) palloc(sizeof(NODE));
149 
150 	tmp->type = type;
151 	tmp->val = val;
152 	tmp->flag = flag;
153 	if (distance > 0xffff)
154 		ereport(ERROR,
155 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
156 				 errmsg("value is too big")));
157 	if (lenval > 0xff)
158 		ereport(ERROR,
159 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
160 				 errmsg("operand is too long")));
161 	tmp->distance = distance;
162 	tmp->length = lenval;
163 	tmp->next = state->str;
164 	state->str = tmp;
165 	state->num++;
166 }
167 
168 /*
169  * This function is used for query_txt parsing
170  */
171 static void
pushval_asis(QPRS_STATE * state,int type,char * strval,int lenval,uint16 flag)172 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
173 {
174 	if (lenval > 0xffff)
175 		ereport(ERROR,
176 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
177 				 errmsg("word is too long")));
178 
179 	pushquery(state, type, ltree_crc32_sz(strval, lenval),
180 			  state->curop - state->op, lenval, flag);
181 
182 	while (state->curop - state->op + lenval + 1 >= state->lenop)
183 	{
184 		int32		tmp = state->curop - state->op;
185 
186 		state->lenop *= 2;
187 		state->op = (char *) repalloc((void *) state->op, state->lenop);
188 		state->curop = state->op + tmp;
189 	}
190 	memcpy((void *) state->curop, (void *) strval, lenval);
191 	state->curop += lenval;
192 	*(state->curop) = '\0';
193 	state->curop++;
194 	state->sumlen += lenval + 1;
195 	return;
196 }
197 
198 #define STACKDEPTH		32
199 /*
200  * make polish notation of query
201  */
202 static int32
makepol(QPRS_STATE * state)203 makepol(QPRS_STATE *state)
204 {
205 	int32		val = 0,
206 				type;
207 	int32		lenval = 0;
208 	char	   *strval = NULL;
209 	int32		stack[STACKDEPTH];
210 	int32		lenstack = 0;
211 	uint16		flag = 0;
212 
213 	/* since this function recurses, it could be driven to stack overflow */
214 	check_stack_depth();
215 
216 	while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
217 	{
218 		switch (type)
219 		{
220 			case VAL:
221 				pushval_asis(state, VAL, strval, lenval, flag);
222 				while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
223 									stack[lenstack - 1] == (int32) '!'))
224 				{
225 					lenstack--;
226 					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
227 				}
228 				break;
229 			case OPR:
230 				if (lenstack && val == (int32) '|')
231 					pushquery(state, OPR, val, 0, 0, 0);
232 				else
233 				{
234 					if (lenstack == STACKDEPTH)
235 						/* internal error */
236 						elog(ERROR, "stack too short");
237 					stack[lenstack] = val;
238 					lenstack++;
239 				}
240 				break;
241 			case OPEN:
242 				if (makepol(state) == ERR)
243 					return ERR;
244 				while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
245 									stack[lenstack - 1] == (int32) '!'))
246 				{
247 					lenstack--;
248 					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
249 				}
250 				break;
251 			case CLOSE:
252 				while (lenstack)
253 				{
254 					lenstack--;
255 					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
256 				};
257 				return END;
258 				break;
259 			case ERR:
260 			default:
261 				ereport(ERROR,
262 						(errcode(ERRCODE_SYNTAX_ERROR),
263 						 errmsg("syntax error")));
264 
265 				return ERR;
266 
267 		}
268 	}
269 	while (lenstack)
270 	{
271 		lenstack--;
272 		pushquery(state, OPR, stack[lenstack], 0, 0, 0);
273 	};
274 	return END;
275 }
276 
277 static void
findoprnd(ITEM * ptr,int32 * pos)278 findoprnd(ITEM *ptr, int32 *pos)
279 {
280 	/* since this function recurses, it could be driven to stack overflow. */
281 	check_stack_depth();
282 
283 	if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
284 	{
285 		ptr[*pos].left = 0;
286 		(*pos)++;
287 	}
288 	else if (ptr[*pos].val == (int32) '!')
289 	{
290 		ptr[*pos].left = 1;
291 		(*pos)++;
292 		findoprnd(ptr, pos);
293 	}
294 	else
295 	{
296 		ITEM	   *curitem = &ptr[*pos];
297 		int32		tmp = *pos;
298 
299 		(*pos)++;
300 		findoprnd(ptr, pos);
301 		curitem->left = *pos - tmp;
302 		findoprnd(ptr, pos);
303 	}
304 }
305 
306 
307 /*
308  * input
309  */
310 static ltxtquery *
queryin(char * buf)311 queryin(char *buf)
312 {
313 	QPRS_STATE	state;
314 	int32		i;
315 	ltxtquery  *query;
316 	int32		commonlen;
317 	ITEM	   *ptr;
318 	NODE	   *tmp;
319 	int32		pos = 0;
320 
321 #ifdef BS_DEBUG
322 	char		pbuf[16384],
323 			   *cur;
324 #endif
325 
326 	/* init state */
327 	state.buf = buf;
328 	state.state = WAITOPERAND;
329 	state.count = 0;
330 	state.num = 0;
331 	state.str = NULL;
332 
333 	/* init list of operand */
334 	state.sumlen = 0;
335 	state.lenop = 64;
336 	state.curop = state.op = (char *) palloc(state.lenop);
337 	*(state.curop) = '\0';
338 
339 	/* parse query & make polish notation (postfix, but in reverse order) */
340 	makepol(&state);
341 	if (!state.num)
342 		ereport(ERROR,
343 				(errcode(ERRCODE_SYNTAX_ERROR),
344 				 errmsg("syntax error"),
345 				 errdetail("Empty query.")));
346 
347 	if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
348 		ereport(ERROR,
349 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
350 				 errmsg("ltxtquery is too large")));
351 	commonlen = COMPUTESIZE(state.num, state.sumlen);
352 
353 	query = (ltxtquery *) palloc0(commonlen);
354 	SET_VARSIZE(query, commonlen);
355 	query->size = state.num;
356 	ptr = GETQUERY(query);
357 
358 	/* set item in polish notation */
359 	for (i = 0; i < state.num; i++)
360 	{
361 		ptr[i].type = state.str->type;
362 		ptr[i].val = state.str->val;
363 		ptr[i].distance = state.str->distance;
364 		ptr[i].length = state.str->length;
365 		ptr[i].flag = state.str->flag;
366 		tmp = state.str->next;
367 		pfree(state.str);
368 		state.str = tmp;
369 	}
370 
371 	/* set user friendly-operand view */
372 	memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
373 	pfree(state.op);
374 
375 	/* set left operand's position for every operator */
376 	pos = 0;
377 	findoprnd(ptr, &pos);
378 
379 	return query;
380 }
381 
382 /*
383  * in without morphology
384  */
385 Datum
ltxtq_in(PG_FUNCTION_ARGS)386 ltxtq_in(PG_FUNCTION_ARGS)
387 {
388 	PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
389 }
390 
391 /*
392  * out function
393  */
394 typedef struct
395 {
396 	ITEM	   *curpol;
397 	char	   *buf;
398 	char	   *cur;
399 	char	   *op;
400 	int32		buflen;
401 } INFIX;
402 
403 #define RESIZEBUF(inf,addsize) \
404 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
405 { \
406 	int32 len = (inf)->cur - (inf)->buf; \
407 	(inf)->buflen *= 2; \
408 	(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
409 	(inf)->cur = (inf)->buf + len; \
410 }
411 
412 /*
413  * recursive walk on tree and print it in
414  * infix (human-readable) view
415  */
416 static void
infix(INFIX * in,bool first)417 infix(INFIX *in, bool first)
418 {
419 	/* since this function recurses, it could be driven to stack overflow. */
420 	check_stack_depth();
421 
422 	if (in->curpol->type == VAL)
423 	{
424 		char	   *op = in->op + in->curpol->distance;
425 
426 		RESIZEBUF(in, in->curpol->length * 2 + 5);
427 		while (*op)
428 		{
429 			*(in->cur) = *op;
430 			op++;
431 			in->cur++;
432 		}
433 		if (in->curpol->flag & LVAR_SUBLEXEME)
434 		{
435 			*(in->cur) = '%';
436 			in->cur++;
437 		}
438 		if (in->curpol->flag & LVAR_INCASE)
439 		{
440 			*(in->cur) = '@';
441 			in->cur++;
442 		}
443 		if (in->curpol->flag & LVAR_ANYEND)
444 		{
445 			*(in->cur) = '*';
446 			in->cur++;
447 		}
448 		*(in->cur) = '\0';
449 		in->curpol++;
450 	}
451 	else if (in->curpol->val == (int32) '!')
452 	{
453 		bool		isopr = false;
454 
455 		RESIZEBUF(in, 1);
456 		*(in->cur) = '!';
457 		in->cur++;
458 		*(in->cur) = '\0';
459 		in->curpol++;
460 		if (in->curpol->type == OPR)
461 		{
462 			isopr = true;
463 			RESIZEBUF(in, 2);
464 			sprintf(in->cur, "( ");
465 			in->cur = strchr(in->cur, '\0');
466 		}
467 		infix(in, isopr);
468 		if (isopr)
469 		{
470 			RESIZEBUF(in, 2);
471 			sprintf(in->cur, " )");
472 			in->cur = strchr(in->cur, '\0');
473 		}
474 	}
475 	else
476 	{
477 		int32		op = in->curpol->val;
478 		INFIX		nrm;
479 
480 		in->curpol++;
481 		if (op == (int32) '|' && !first)
482 		{
483 			RESIZEBUF(in, 2);
484 			sprintf(in->cur, "( ");
485 			in->cur = strchr(in->cur, '\0');
486 		}
487 
488 		nrm.curpol = in->curpol;
489 		nrm.op = in->op;
490 		nrm.buflen = 16;
491 		nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
492 
493 		/* get right operand */
494 		infix(&nrm, false);
495 
496 		/* get & print left operand */
497 		in->curpol = nrm.curpol;
498 		infix(in, false);
499 
500 		/* print operator & right operand */
501 		RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
502 		sprintf(in->cur, " %c %s", op, nrm.buf);
503 		in->cur = strchr(in->cur, '\0');
504 		pfree(nrm.buf);
505 
506 		if (op == (int32) '|' && !first)
507 		{
508 			RESIZEBUF(in, 2);
509 			sprintf(in->cur, " )");
510 			in->cur = strchr(in->cur, '\0');
511 		}
512 	}
513 }
514 
515 Datum
ltxtq_out(PG_FUNCTION_ARGS)516 ltxtq_out(PG_FUNCTION_ARGS)
517 {
518 	ltxtquery  *query = PG_GETARG_LTXTQUERY_P(0);
519 	INFIX		nrm;
520 
521 	if (query->size == 0)
522 		ereport(ERROR,
523 				(errcode(ERRCODE_SYNTAX_ERROR),
524 				 errmsg("syntax error"),
525 				 errdetail("Empty query.")));
526 
527 	nrm.curpol = GETQUERY(query);
528 	nrm.buflen = 32;
529 	nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
530 	*(nrm.cur) = '\0';
531 	nrm.op = GETOPERAND(query);
532 	infix(&nrm, true);
533 
534 	PG_FREE_IF_COPY(query, 0);
535 	PG_RETURN_POINTER(nrm.buf);
536 }
537