1 /*
2  * txtquery io
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltxtquery_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "crc32.h"
11 #include "libpq/pqformat.h"
12 #include "ltree.h"
13 #include "miscadmin.h"
14 
15 
16 /* parser's states */
17 #define WAITOPERAND 1
18 #define INOPERAND 2
19 #define WAITOPERATOR	3
20 
21 /*
22  * node of query tree, also used
23  * for storing polish notation in parser
24  */
25 typedef struct NODE
26 {
27 	int32		type;
28 	int32		val;
29 	int16		distance;
30 	int16		length;
31 	uint16		flag;
32 	struct NODE *next;
33 } NODE;
34 
35 typedef struct
36 {
37 	char	   *buf;
38 	int32		state;
39 	int32		count;
40 	/* reverse polish notation in list (for temporary usage) */
41 	NODE	   *str;
42 	/* number in str */
43 	int32		num;
44 
45 	/* user-friendly operand */
46 	int32		lenop;
47 	int32		sumlen;
48 	char	   *op;
49 	char	   *curop;
50 } QPRS_STATE;
51 
52 /*
53  * get token from query string
54  */
55 static int32
gettoken_query(QPRS_STATE * state,int32 * val,int32 * lenval,char ** strval,uint16 * flag)56 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
57 {
58 	int			charlen;
59 
60 	for (;;)
61 	{
62 		charlen = pg_mblen(state->buf);
63 
64 		switch (state->state)
65 		{
66 			case WAITOPERAND:
67 				if (charlen == 1 && t_iseq(state->buf, '!'))
68 				{
69 					(state->buf)++;
70 					*val = (int32) '!';
71 					return OPR;
72 				}
73 				else if (charlen == 1 && t_iseq(state->buf, '('))
74 				{
75 					state->count++;
76 					(state->buf)++;
77 					return OPEN;
78 				}
79 				else if (ISALNUM(state->buf))
80 				{
81 					state->state = INOPERAND;
82 					*strval = state->buf;
83 					*lenval = charlen;
84 					*flag = 0;
85 				}
86 				else if (!t_isspace(state->buf))
87 					ereport(ERROR,
88 							(errcode(ERRCODE_SYNTAX_ERROR),
89 							 errmsg("operand syntax error")));
90 				break;
91 			case INOPERAND:
92 				if (ISALNUM(state->buf))
93 				{
94 					if (*flag)
95 						ereport(ERROR,
96 								(errcode(ERRCODE_SYNTAX_ERROR),
97 								 errmsg("modifiers syntax error")));
98 					*lenval += charlen;
99 				}
100 				else if (charlen == 1 && t_iseq(state->buf, '%'))
101 					*flag |= LVAR_SUBLEXEME;
102 				else if (charlen == 1 && t_iseq(state->buf, '@'))
103 					*flag |= LVAR_INCASE;
104 				else if (charlen == 1 && t_iseq(state->buf, '*'))
105 					*flag |= LVAR_ANYEND;
106 				else
107 				{
108 					state->state = WAITOPERATOR;
109 					return VAL;
110 				}
111 				break;
112 			case WAITOPERATOR:
113 				if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
114 				{
115 					state->state = WAITOPERAND;
116 					*val = (int32) *(state->buf);
117 					(state->buf)++;
118 					return OPR;
119 				}
120 				else if (charlen == 1 && t_iseq(state->buf, ')'))
121 				{
122 					(state->buf)++;
123 					state->count--;
124 					return (state->count < 0) ? ERR : CLOSE;
125 				}
126 				else if (*(state->buf) == '\0')
127 					return (state->count) ? ERR : END;
128 				else if (charlen == 1 && !t_iseq(state->buf, ' '))
129 					return ERR;
130 				break;
131 			default:
132 				return ERR;
133 				break;
134 		}
135 
136 		state->buf += charlen;
137 	}
138 }
139 
140 /*
141  * push new one in polish notation reverse view
142  */
143 static void
pushquery(QPRS_STATE * state,int32 type,int32 val,int32 distance,int32 lenval,uint16 flag)144 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
145 {
146 	NODE	   *tmp = (NODE *) palloc(sizeof(NODE));
147 
148 	tmp->type = type;
149 	tmp->val = val;
150 	tmp->flag = flag;
151 	if (distance > 0xffff)
152 		ereport(ERROR,
153 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
154 				 errmsg("value is too big")));
155 	if (lenval > 0xff)
156 		ereport(ERROR,
157 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
158 				 errmsg("operand is too long")));
159 	tmp->distance = distance;
160 	tmp->length = lenval;
161 	tmp->next = state->str;
162 	state->str = tmp;
163 	state->num++;
164 }
165 
166 /*
167  * This function is used for query text parsing
168  */
169 static void
pushval_asis(QPRS_STATE * state,int type,char * strval,int lenval,uint16 flag)170 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
171 {
172 	if (lenval > 0xffff)
173 		ereport(ERROR,
174 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
175 				 errmsg("word is too long")));
176 
177 	pushquery(state, type, ltree_crc32_sz(strval, lenval),
178 			  state->curop - state->op, lenval, flag);
179 
180 	while (state->curop - state->op + lenval + 1 >= state->lenop)
181 	{
182 		int32		tmp = state->curop - state->op;
183 
184 		state->lenop *= 2;
185 		state->op = (char *) repalloc((void *) state->op, state->lenop);
186 		state->curop = state->op + tmp;
187 	}
188 	memcpy((void *) state->curop, (void *) strval, lenval);
189 	state->curop += lenval;
190 	*(state->curop) = '\0';
191 	state->curop++;
192 	state->sumlen += lenval + 1;
193 }
194 
195 #define STACKDEPTH		32
196 /*
197  * make polish notation of query
198  */
199 static int32
makepol(QPRS_STATE * state)200 makepol(QPRS_STATE *state)
201 {
202 	int32		val = 0,
203 				type;
204 	int32		lenval = 0;
205 	char	   *strval = NULL;
206 	int32		stack[STACKDEPTH];
207 	int32		lenstack = 0;
208 	uint16		flag = 0;
209 
210 	/* since this function recurses, it could be driven to stack overflow */
211 	check_stack_depth();
212 
213 	while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
214 	{
215 		switch (type)
216 		{
217 			case VAL:
218 				pushval_asis(state, VAL, strval, lenval, flag);
219 				while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
220 									stack[lenstack - 1] == (int32) '!'))
221 				{
222 					lenstack--;
223 					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
224 				}
225 				break;
226 			case OPR:
227 				if (lenstack && val == (int32) '|')
228 					pushquery(state, OPR, val, 0, 0, 0);
229 				else
230 				{
231 					if (lenstack == STACKDEPTH)
232 						/* internal error */
233 						elog(ERROR, "stack too short");
234 					stack[lenstack] = val;
235 					lenstack++;
236 				}
237 				break;
238 			case OPEN:
239 				if (makepol(state) == ERR)
240 					return ERR;
241 				while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
242 									stack[lenstack - 1] == (int32) '!'))
243 				{
244 					lenstack--;
245 					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
246 				}
247 				break;
248 			case CLOSE:
249 				while (lenstack)
250 				{
251 					lenstack--;
252 					pushquery(state, OPR, stack[lenstack], 0, 0, 0);
253 				};
254 				return END;
255 				break;
256 			case ERR:
257 			default:
258 				ereport(ERROR,
259 						(errcode(ERRCODE_SYNTAX_ERROR),
260 						 errmsg("syntax error")));
261 
262 				return ERR;
263 
264 		}
265 	}
266 	while (lenstack)
267 	{
268 		lenstack--;
269 		pushquery(state, OPR, stack[lenstack], 0, 0, 0);
270 	};
271 	return END;
272 }
273 
274 static void
findoprnd(ITEM * ptr,int32 * pos)275 findoprnd(ITEM *ptr, int32 *pos)
276 {
277 	/* since this function recurses, it could be driven to stack overflow. */
278 	check_stack_depth();
279 
280 	if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
281 	{
282 		ptr[*pos].left = 0;
283 		(*pos)++;
284 	}
285 	else if (ptr[*pos].val == (int32) '!')
286 	{
287 		ptr[*pos].left = 1;
288 		(*pos)++;
289 		findoprnd(ptr, pos);
290 	}
291 	else
292 	{
293 		ITEM	   *curitem = &ptr[*pos];
294 		int32		tmp = *pos;
295 
296 		(*pos)++;
297 		findoprnd(ptr, pos);
298 		curitem->left = *pos - tmp;
299 		findoprnd(ptr, pos);
300 	}
301 }
302 
303 
304 /*
305  * input
306  */
307 static ltxtquery *
queryin(char * buf)308 queryin(char *buf)
309 {
310 	QPRS_STATE	state;
311 	int32		i;
312 	ltxtquery  *query;
313 	int32		commonlen;
314 	ITEM	   *ptr;
315 	NODE	   *tmp;
316 	int32		pos = 0;
317 
318 #ifdef BS_DEBUG
319 	char		pbuf[16384],
320 			   *cur;
321 #endif
322 
323 	/* init state */
324 	state.buf = buf;
325 	state.state = WAITOPERAND;
326 	state.count = 0;
327 	state.num = 0;
328 	state.str = NULL;
329 
330 	/* init list of operand */
331 	state.sumlen = 0;
332 	state.lenop = 64;
333 	state.curop = state.op = (char *) palloc(state.lenop);
334 	*(state.curop) = '\0';
335 
336 	/* parse query & make polish notation (postfix, but in reverse order) */
337 	makepol(&state);
338 	if (!state.num)
339 		ereport(ERROR,
340 				(errcode(ERRCODE_SYNTAX_ERROR),
341 				 errmsg("syntax error"),
342 				 errdetail("Empty query.")));
343 
344 	if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
345 		ereport(ERROR,
346 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
347 				 errmsg("ltxtquery is too large")));
348 	commonlen = COMPUTESIZE(state.num, state.sumlen);
349 
350 	query = (ltxtquery *) palloc0(commonlen);
351 	SET_VARSIZE(query, commonlen);
352 	query->size = state.num;
353 	ptr = GETQUERY(query);
354 
355 	/* set item in polish notation */
356 	for (i = 0; i < state.num; i++)
357 	{
358 		ptr[i].type = state.str->type;
359 		ptr[i].val = state.str->val;
360 		ptr[i].distance = state.str->distance;
361 		ptr[i].length = state.str->length;
362 		ptr[i].flag = state.str->flag;
363 		tmp = state.str->next;
364 		pfree(state.str);
365 		state.str = tmp;
366 	}
367 
368 	/* set user-friendly operand view */
369 	memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
370 	pfree(state.op);
371 
372 	/* set left operand's position for every operator */
373 	pos = 0;
374 	findoprnd(ptr, &pos);
375 
376 	return query;
377 }
378 
379 /*
380  * in without morphology
381  */
382 PG_FUNCTION_INFO_V1(ltxtq_in);
383 Datum
ltxtq_in(PG_FUNCTION_ARGS)384 ltxtq_in(PG_FUNCTION_ARGS)
385 {
386 	PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
387 }
388 
389 /*
390  * ltxtquery type recv function
391  *
392  * The type is sent as text in binary mode, so this is almost the same
393  * as the input function, but it's prefixed with a version number so we
394  * can change the binary format sent in future if necessary. For now,
395  * only version 1 is supported.
396  */
397 PG_FUNCTION_INFO_V1(ltxtq_recv);
398 Datum
ltxtq_recv(PG_FUNCTION_ARGS)399 ltxtq_recv(PG_FUNCTION_ARGS)
400 {
401 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
402 	int			version = pq_getmsgint(buf, 1);
403 	char	   *str;
404 	int			nbytes;
405 	ltxtquery  *res;
406 
407 	if (version != 1)
408 		elog(ERROR, "unsupported ltxtquery version number %d", version);
409 
410 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
411 	res = queryin(str);
412 	pfree(str);
413 
414 	PG_RETURN_POINTER(res);
415 }
416 
417 /*
418  * out function
419  */
420 typedef struct
421 {
422 	ITEM	   *curpol;
423 	char	   *buf;
424 	char	   *cur;
425 	char	   *op;
426 	int32		buflen;
427 } INFIX;
428 
429 #define RESIZEBUF(inf,addsize) \
430 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
431 { \
432 	int32 len = (inf)->cur - (inf)->buf; \
433 	(inf)->buflen *= 2; \
434 	(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
435 	(inf)->cur = (inf)->buf + len; \
436 }
437 
438 /*
439  * recursive walk on tree and print it in
440  * infix (human-readable) view
441  */
442 static void
infix(INFIX * in,bool first)443 infix(INFIX *in, bool first)
444 {
445 	/* since this function recurses, it could be driven to stack overflow. */
446 	check_stack_depth();
447 
448 	if (in->curpol->type == VAL)
449 	{
450 		char	   *op = in->op + in->curpol->distance;
451 
452 		RESIZEBUF(in, in->curpol->length * 2 + 5);
453 		while (*op)
454 		{
455 			*(in->cur) = *op;
456 			op++;
457 			in->cur++;
458 		}
459 		if (in->curpol->flag & LVAR_SUBLEXEME)
460 		{
461 			*(in->cur) = '%';
462 			in->cur++;
463 		}
464 		if (in->curpol->flag & LVAR_INCASE)
465 		{
466 			*(in->cur) = '@';
467 			in->cur++;
468 		}
469 		if (in->curpol->flag & LVAR_ANYEND)
470 		{
471 			*(in->cur) = '*';
472 			in->cur++;
473 		}
474 		*(in->cur) = '\0';
475 		in->curpol++;
476 	}
477 	else if (in->curpol->val == (int32) '!')
478 	{
479 		bool		isopr = false;
480 
481 		RESIZEBUF(in, 1);
482 		*(in->cur) = '!';
483 		in->cur++;
484 		*(in->cur) = '\0';
485 		in->curpol++;
486 		if (in->curpol->type == OPR)
487 		{
488 			isopr = true;
489 			RESIZEBUF(in, 2);
490 			sprintf(in->cur, "( ");
491 			in->cur = strchr(in->cur, '\0');
492 		}
493 		infix(in, isopr);
494 		if (isopr)
495 		{
496 			RESIZEBUF(in, 2);
497 			sprintf(in->cur, " )");
498 			in->cur = strchr(in->cur, '\0');
499 		}
500 	}
501 	else
502 	{
503 		int32		op = in->curpol->val;
504 		INFIX		nrm;
505 
506 		in->curpol++;
507 		if (op == (int32) '|' && !first)
508 		{
509 			RESIZEBUF(in, 2);
510 			sprintf(in->cur, "( ");
511 			in->cur = strchr(in->cur, '\0');
512 		}
513 
514 		nrm.curpol = in->curpol;
515 		nrm.op = in->op;
516 		nrm.buflen = 16;
517 		nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
518 
519 		/* get right operand */
520 		infix(&nrm, false);
521 
522 		/* get & print left operand */
523 		in->curpol = nrm.curpol;
524 		infix(in, false);
525 
526 		/* print operator & right operand */
527 		RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
528 		sprintf(in->cur, " %c %s", op, nrm.buf);
529 		in->cur = strchr(in->cur, '\0');
530 		pfree(nrm.buf);
531 
532 		if (op == (int32) '|' && !first)
533 		{
534 			RESIZEBUF(in, 2);
535 			sprintf(in->cur, " )");
536 			in->cur = strchr(in->cur, '\0');
537 		}
538 	}
539 }
540 
541 PG_FUNCTION_INFO_V1(ltxtq_out);
542 Datum
ltxtq_out(PG_FUNCTION_ARGS)543 ltxtq_out(PG_FUNCTION_ARGS)
544 {
545 	ltxtquery  *query = PG_GETARG_LTXTQUERY_P(0);
546 	INFIX		nrm;
547 
548 	if (query->size == 0)
549 		ereport(ERROR,
550 				(errcode(ERRCODE_SYNTAX_ERROR),
551 				 errmsg("syntax error"),
552 				 errdetail("Empty query.")));
553 
554 	nrm.curpol = GETQUERY(query);
555 	nrm.buflen = 32;
556 	nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
557 	*(nrm.cur) = '\0';
558 	nrm.op = GETOPERAND(query);
559 	infix(&nrm, true);
560 
561 	PG_RETURN_POINTER(nrm.buf);
562 }
563 
564 /*
565  * ltxtquery type send function
566  *
567  * The type is sent as text in binary mode, so this is almost the same
568  * as the output function, but it's prefixed with a version number so we
569  * can change the binary format sent in future if necessary. For now,
570  * only version 1 is supported.
571  */
572 PG_FUNCTION_INFO_V1(ltxtq_send);
573 Datum
ltxtq_send(PG_FUNCTION_ARGS)574 ltxtq_send(PG_FUNCTION_ARGS)
575 {
576 	ltxtquery  *query = PG_GETARG_LTXTQUERY_P(0);
577 	StringInfoData buf;
578 	int			version = 1;
579 	INFIX		nrm;
580 
581 	if (query->size == 0)
582 		ereport(ERROR,
583 				(errcode(ERRCODE_SYNTAX_ERROR),
584 				 errmsg("syntax error"),
585 				 errdetail("Empty query.")));
586 
587 	nrm.curpol = GETQUERY(query);
588 	nrm.buflen = 32;
589 	nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
590 	*(nrm.cur) = '\0';
591 	nrm.op = GETOPERAND(query);
592 	infix(&nrm, true);
593 
594 	pq_begintypsend(&buf);
595 	pq_sendint8(&buf, version);
596 	pq_sendtext(&buf, nrm.buf, strlen(nrm.buf));
597 	pfree(nrm.buf);
598 
599 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
600 }
601