1 /*
2  * in/out function for ltree and lquery
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltree_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "ltree.h"
11 #include "utils/memutils.h"
12 #include "crc32.h"
13 
14 PG_FUNCTION_INFO_V1(ltree_in);
15 PG_FUNCTION_INFO_V1(ltree_out);
16 PG_FUNCTION_INFO_V1(lquery_in);
17 PG_FUNCTION_INFO_V1(lquery_out);
18 
19 
20 #define UNCHAR ereport(ERROR, \
21 					   (errcode(ERRCODE_SYNTAX_ERROR), \
22 						errmsg("syntax error at position %d", \
23 						pos)));
24 
25 
26 typedef struct
27 {
28 	char	   *start;
29 	int			len;			/* length in bytes */
30 	int			flag;
31 	int			wlen;			/* length in characters */
32 } nodeitem;
33 
34 #define LTPRS_WAITNAME	0
35 #define LTPRS_WAITDELIM 1
36 
37 Datum
ltree_in(PG_FUNCTION_ARGS)38 ltree_in(PG_FUNCTION_ARGS)
39 {
40 	char	   *buf = (char *) PG_GETARG_POINTER(0);
41 	char	   *ptr;
42 	nodeitem   *list,
43 			   *lptr;
44 	int			num = 0,
45 				totallen = 0;
46 	int			state = LTPRS_WAITNAME;
47 	ltree	   *result;
48 	ltree_level *curlevel;
49 	int			charlen;
50 	int			pos = 0;
51 
52 	ptr = buf;
53 	while (*ptr)
54 	{
55 		charlen = pg_mblen(ptr);
56 		if (charlen == 1 && t_iseq(ptr, '.'))
57 			num++;
58 		ptr += charlen;
59 	}
60 
61 	if (num + 1 > LTREE_MAX_LEVELS)
62 		ereport(ERROR,
63 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
64 				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
65 						num + 1, LTREE_MAX_LEVELS)));
66 	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
67 	ptr = buf;
68 	while (*ptr)
69 	{
70 		charlen = pg_mblen(ptr);
71 
72 		if (state == LTPRS_WAITNAME)
73 		{
74 			if (ISALNUM(ptr))
75 			{
76 				lptr->start = ptr;
77 				lptr->wlen = 0;
78 				state = LTPRS_WAITDELIM;
79 			}
80 			else
81 				UNCHAR;
82 		}
83 		else if (state == LTPRS_WAITDELIM)
84 		{
85 			if (charlen == 1 && t_iseq(ptr, '.'))
86 			{
87 				lptr->len = ptr - lptr->start;
88 				if (lptr->wlen > 255)
89 					ereport(ERROR,
90 							(errcode(ERRCODE_NAME_TOO_LONG),
91 							 errmsg("name of level is too long"),
92 							 errdetail("Name length is %d, must "
93 									   "be < 256, in position %d.",
94 									   lptr->wlen, pos)));
95 
96 				totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
97 				lptr++;
98 				state = LTPRS_WAITNAME;
99 			}
100 			else if (!ISALNUM(ptr))
101 				UNCHAR;
102 		}
103 		else
104 			/* internal error */
105 			elog(ERROR, "internal error in parser");
106 
107 		ptr += charlen;
108 		lptr->wlen++;
109 		pos++;
110 	}
111 
112 	if (state == LTPRS_WAITDELIM)
113 	{
114 		lptr->len = ptr - lptr->start;
115 		if (lptr->wlen > 255)
116 			ereport(ERROR,
117 					(errcode(ERRCODE_NAME_TOO_LONG),
118 					 errmsg("name of level is too long"),
119 					 errdetail("Name length is %d, must "
120 							   "be < 256, in position %d.",
121 							   lptr->wlen, pos)));
122 
123 		totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
124 		lptr++;
125 	}
126 	else if (!(state == LTPRS_WAITNAME && lptr == list))
127 		ereport(ERROR,
128 				(errcode(ERRCODE_SYNTAX_ERROR),
129 				 errmsg("syntax error"),
130 				 errdetail("Unexpected end of line.")));
131 
132 	result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
133 	SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
134 	result->numlevel = lptr - list;
135 	curlevel = LTREE_FIRST(result);
136 	lptr = list;
137 	while (lptr - list < result->numlevel)
138 	{
139 		curlevel->len = (uint16) lptr->len;
140 		memcpy(curlevel->name, lptr->start, lptr->len);
141 		curlevel = LEVEL_NEXT(curlevel);
142 		lptr++;
143 	}
144 
145 	pfree(list);
146 	PG_RETURN_POINTER(result);
147 }
148 
149 Datum
ltree_out(PG_FUNCTION_ARGS)150 ltree_out(PG_FUNCTION_ARGS)
151 {
152 	ltree	   *in = PG_GETARG_LTREE(0);
153 	char	   *buf,
154 			   *ptr;
155 	int			i;
156 	ltree_level *curlevel;
157 
158 	ptr = buf = (char *) palloc(VARSIZE(in));
159 	curlevel = LTREE_FIRST(in);
160 	for (i = 0; i < in->numlevel; i++)
161 	{
162 		if (i != 0)
163 		{
164 			*ptr = '.';
165 			ptr++;
166 		}
167 		memcpy(ptr, curlevel->name, curlevel->len);
168 		ptr += curlevel->len;
169 		curlevel = LEVEL_NEXT(curlevel);
170 	}
171 
172 	*ptr = '\0';
173 	PG_FREE_IF_COPY(in, 0);
174 
175 	PG_RETURN_POINTER(buf);
176 }
177 
178 #define LQPRS_WAITLEVEL 0
179 #define LQPRS_WAITDELIM 1
180 #define LQPRS_WAITOPEN	2
181 #define LQPRS_WAITFNUM	3
182 #define LQPRS_WAITSNUM	4
183 #define LQPRS_WAITND	5
184 #define LQPRS_WAITCLOSE 6
185 #define LQPRS_WAITEND	7
186 #define LQPRS_WAITVAR	8
187 
188 
189 #define GETVAR(x) ( *((nodeitem**)LQL_FIRST(x)) )
190 #define ITEMSIZE	MAXALIGN(LQL_HDRSIZE+sizeof(nodeitem*))
191 #define NEXTLEV(x) ( (lquery_level*)( ((char*)(x)) + ITEMSIZE) )
192 
193 Datum
lquery_in(PG_FUNCTION_ARGS)194 lquery_in(PG_FUNCTION_ARGS)
195 {
196 	char	   *buf = (char *) PG_GETARG_POINTER(0);
197 	char	   *ptr;
198 	int			num = 0,
199 				totallen = 0,
200 				numOR = 0;
201 	int			state = LQPRS_WAITLEVEL;
202 	lquery	   *result;
203 	nodeitem   *lptr = NULL;
204 	lquery_level *cur,
205 			   *curqlevel,
206 			   *tmpql;
207 	lquery_variant *lrptr = NULL;
208 	bool		hasnot = false;
209 	bool		wasbad = false;
210 	int			charlen;
211 	int			pos = 0;
212 
213 	ptr = buf;
214 	while (*ptr)
215 	{
216 		charlen = pg_mblen(ptr);
217 
218 		if (charlen == 1)
219 		{
220 			if (t_iseq(ptr, '.'))
221 				num++;
222 			else if (t_iseq(ptr, '|'))
223 				numOR++;
224 		}
225 
226 		ptr += charlen;
227 	}
228 
229 	num++;
230 	if (num > LQUERY_MAX_LEVELS)
231 		ereport(ERROR,
232 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
233 				 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
234 						num, LQUERY_MAX_LEVELS)));
235 	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
236 	ptr = buf;
237 	while (*ptr)
238 	{
239 		charlen = pg_mblen(ptr);
240 
241 		if (state == LQPRS_WAITLEVEL)
242 		{
243 			if (ISALNUM(ptr))
244 			{
245 				GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
246 				lptr->start = ptr;
247 				state = LQPRS_WAITDELIM;
248 				curqlevel->numvar = 1;
249 			}
250 			else if (charlen == 1 && t_iseq(ptr, '!'))
251 			{
252 				GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
253 				lptr->start = ptr + 1;
254 				state = LQPRS_WAITDELIM;
255 				curqlevel->numvar = 1;
256 				curqlevel->flag |= LQL_NOT;
257 				hasnot = true;
258 			}
259 			else if (charlen == 1 && t_iseq(ptr, '*'))
260 				state = LQPRS_WAITOPEN;
261 			else
262 				UNCHAR;
263 		}
264 		else if (state == LQPRS_WAITVAR)
265 		{
266 			if (ISALNUM(ptr))
267 			{
268 				lptr++;
269 				lptr->start = ptr;
270 				state = LQPRS_WAITDELIM;
271 				curqlevel->numvar++;
272 			}
273 			else
274 				UNCHAR;
275 		}
276 		else if (state == LQPRS_WAITDELIM)
277 		{
278 			if (charlen == 1 && t_iseq(ptr, '@'))
279 			{
280 				if (lptr->start == ptr)
281 					UNCHAR;
282 				lptr->flag |= LVAR_INCASE;
283 				curqlevel->flag |= LVAR_INCASE;
284 			}
285 			else if (charlen == 1 && t_iseq(ptr, '*'))
286 			{
287 				if (lptr->start == ptr)
288 					UNCHAR;
289 				lptr->flag |= LVAR_ANYEND;
290 				curqlevel->flag |= LVAR_ANYEND;
291 			}
292 			else if (charlen == 1 && t_iseq(ptr, '%'))
293 			{
294 				if (lptr->start == ptr)
295 					UNCHAR;
296 				lptr->flag |= LVAR_SUBLEXEME;
297 				curqlevel->flag |= LVAR_SUBLEXEME;
298 			}
299 			else if (charlen == 1 && t_iseq(ptr, '|'))
300 			{
301 				lptr->len = ptr - lptr->start -
302 					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
303 					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
304 					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
305 				if (lptr->wlen > 255)
306 					ereport(ERROR,
307 							(errcode(ERRCODE_NAME_TOO_LONG),
308 							 errmsg("name of level is too long"),
309 							 errdetail("Name length is %d, must "
310 									   "be < 256, in position %d.",
311 									   lptr->wlen, pos)));
312 
313 				state = LQPRS_WAITVAR;
314 			}
315 			else if (charlen == 1 && t_iseq(ptr, '.'))
316 			{
317 				lptr->len = ptr - lptr->start -
318 					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
319 					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
320 					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
321 				if (lptr->wlen > 255)
322 					ereport(ERROR,
323 							(errcode(ERRCODE_NAME_TOO_LONG),
324 							 errmsg("name of level is too long"),
325 							 errdetail("Name length is %d, must "
326 									   "be < 256, in position %d.",
327 									   lptr->wlen, pos)));
328 
329 				state = LQPRS_WAITLEVEL;
330 				curqlevel = NEXTLEV(curqlevel);
331 			}
332 			else if (ISALNUM(ptr))
333 			{
334 				if (lptr->flag)
335 					UNCHAR;
336 			}
337 			else
338 				UNCHAR;
339 		}
340 		else if (state == LQPRS_WAITOPEN)
341 		{
342 			if (charlen == 1 && t_iseq(ptr, '{'))
343 				state = LQPRS_WAITFNUM;
344 			else if (charlen == 1 && t_iseq(ptr, '.'))
345 			{
346 				curqlevel->low = 0;
347 				curqlevel->high = LTREE_MAX_LEVELS;
348 				curqlevel = NEXTLEV(curqlevel);
349 				state = LQPRS_WAITLEVEL;
350 			}
351 			else
352 				UNCHAR;
353 		}
354 		else if (state == LQPRS_WAITFNUM)
355 		{
356 			if (charlen == 1 && t_iseq(ptr, ','))
357 				state = LQPRS_WAITSNUM;
358 			else if (t_isdigit(ptr))
359 			{
360 				int			low = atoi(ptr);
361 
362 				if (low < 0 || low > LTREE_MAX_LEVELS)
363 					ereport(ERROR,
364 							(errcode(ERRCODE_SYNTAX_ERROR),
365 							 errmsg("lquery syntax error"),
366 							 errdetail("Low limit (%d) exceeds the maximum allowed (%d).",
367 									   low, LTREE_MAX_LEVELS)));
368 
369 				curqlevel->low = (uint16) low;
370 				state = LQPRS_WAITND;
371 			}
372 			else
373 				UNCHAR;
374 		}
375 		else if (state == LQPRS_WAITSNUM)
376 		{
377 			if (t_isdigit(ptr))
378 			{
379 				int			high = atoi(ptr);
380 
381 				if (high < 0 || high > LTREE_MAX_LEVELS)
382 					ereport(ERROR,
383 							(errcode(ERRCODE_SYNTAX_ERROR),
384 							 errmsg("lquery syntax error"),
385 							 errdetail("High limit (%d) exceeds the maximum allowed (%d).",
386 									   high, LTREE_MAX_LEVELS)));
387 
388 				curqlevel->high = (uint16) high;
389 				state = LQPRS_WAITCLOSE;
390 			}
391 			else if (charlen == 1 && t_iseq(ptr, '}'))
392 			{
393 				curqlevel->high = LTREE_MAX_LEVELS;
394 				state = LQPRS_WAITEND;
395 			}
396 			else
397 				UNCHAR;
398 		}
399 		else if (state == LQPRS_WAITCLOSE)
400 		{
401 			if (charlen == 1 && t_iseq(ptr, '}'))
402 				state = LQPRS_WAITEND;
403 			else if (!t_isdigit(ptr))
404 				UNCHAR;
405 		}
406 		else if (state == LQPRS_WAITND)
407 		{
408 			if (charlen == 1 && t_iseq(ptr, '}'))
409 			{
410 				curqlevel->high = curqlevel->low;
411 				state = LQPRS_WAITEND;
412 			}
413 			else if (charlen == 1 && t_iseq(ptr, ','))
414 				state = LQPRS_WAITSNUM;
415 			else if (!t_isdigit(ptr))
416 				UNCHAR;
417 		}
418 		else if (state == LQPRS_WAITEND)
419 		{
420 			if (charlen == 1 && t_iseq(ptr, '.'))
421 			{
422 				state = LQPRS_WAITLEVEL;
423 				curqlevel = NEXTLEV(curqlevel);
424 			}
425 			else
426 				UNCHAR;
427 		}
428 		else
429 			/* internal error */
430 			elog(ERROR, "internal error in parser");
431 
432 		ptr += charlen;
433 		if (state == LQPRS_WAITDELIM)
434 			lptr->wlen++;
435 		pos++;
436 	}
437 
438 	if (state == LQPRS_WAITDELIM)
439 	{
440 		if (lptr->start == ptr)
441 			ereport(ERROR,
442 					(errcode(ERRCODE_SYNTAX_ERROR),
443 					 errmsg("lquery syntax error"),
444 					 errdetail("Unexpected end of line.")));
445 
446 		lptr->len = ptr - lptr->start -
447 			((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
448 			((lptr->flag & LVAR_INCASE) ? 1 : 0) -
449 			((lptr->flag & LVAR_ANYEND) ? 1 : 0);
450 		if (lptr->len == 0)
451 			ereport(ERROR,
452 					(errcode(ERRCODE_SYNTAX_ERROR),
453 					 errmsg("lquery syntax error"),
454 					 errdetail("Unexpected end of line.")));
455 
456 		if (lptr->wlen > 255)
457 			ereport(ERROR,
458 					(errcode(ERRCODE_NAME_TOO_LONG),
459 					 errmsg("name of level is too long"),
460 					 errdetail("Name length is %d, must "
461 							   "be < 256, in position %d.",
462 							   lptr->wlen, pos)));
463 	}
464 	else if (state == LQPRS_WAITOPEN)
465 		curqlevel->high = LTREE_MAX_LEVELS;
466 	else if (state != LQPRS_WAITEND)
467 		ereport(ERROR,
468 				(errcode(ERRCODE_SYNTAX_ERROR),
469 				 errmsg("lquery syntax error"),
470 				 errdetail("Unexpected end of line.")));
471 
472 	curqlevel = tmpql;
473 	totallen = LQUERY_HDRSIZE;
474 	while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
475 	{
476 		totallen += LQL_HDRSIZE;
477 		if (curqlevel->numvar)
478 		{
479 			lptr = GETVAR(curqlevel);
480 			while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
481 			{
482 				totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
483 				lptr++;
484 			}
485 		}
486 		else if (curqlevel->low > curqlevel->high)
487 			ereport(ERROR,
488 					(errcode(ERRCODE_SYNTAX_ERROR),
489 					 errmsg("lquery syntax error"),
490 					 errdetail("Low limit (%d) is greater than upper (%d).",
491 							   curqlevel->low, curqlevel->high)));
492 
493 		curqlevel = NEXTLEV(curqlevel);
494 	}
495 
496 	result = (lquery *) palloc0(totallen);
497 	SET_VARSIZE(result, totallen);
498 	result->numlevel = num;
499 	result->firstgood = 0;
500 	result->flag = 0;
501 	if (hasnot)
502 		result->flag |= LQUERY_HASNOT;
503 	cur = LQUERY_FIRST(result);
504 	curqlevel = tmpql;
505 	while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
506 	{
507 		memcpy(cur, curqlevel, LQL_HDRSIZE);
508 		cur->totallen = LQL_HDRSIZE;
509 		if (curqlevel->numvar)
510 		{
511 			lrptr = LQL_FIRST(cur);
512 			lptr = GETVAR(curqlevel);
513 			while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
514 			{
515 				cur->totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
516 				lrptr->len = lptr->len;
517 				lrptr->flag = lptr->flag;
518 				lrptr->val = ltree_crc32_sz(lptr->start, lptr->len);
519 				memcpy(lrptr->name, lptr->start, lptr->len);
520 				lptr++;
521 				lrptr = LVAR_NEXT(lrptr);
522 			}
523 			pfree(GETVAR(curqlevel));
524 			if (cur->numvar > 1 || cur->flag != 0)
525 				wasbad = true;
526 			else if (wasbad == false)
527 				(result->firstgood)++;
528 		}
529 		else
530 			wasbad = true;
531 		curqlevel = NEXTLEV(curqlevel);
532 		cur = LQL_NEXT(cur);
533 	}
534 
535 	pfree(tmpql);
536 	PG_RETURN_POINTER(result);
537 }
538 
539 Datum
lquery_out(PG_FUNCTION_ARGS)540 lquery_out(PG_FUNCTION_ARGS)
541 {
542 	lquery	   *in = PG_GETARG_LQUERY(0);
543 	char	   *buf,
544 			   *ptr;
545 	int			i,
546 				j,
547 				totallen = 1;
548 	lquery_level *curqlevel;
549 	lquery_variant *curtlevel;
550 
551 	curqlevel = LQUERY_FIRST(in);
552 	for (i = 0; i < in->numlevel; i++)
553 	{
554 		totallen++;
555 		if (curqlevel->numvar)
556 			totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
557 		else
558 			totallen += 2 * 11 + 4;
559 		curqlevel = LQL_NEXT(curqlevel);
560 	}
561 
562 	ptr = buf = (char *) palloc(totallen);
563 	curqlevel = LQUERY_FIRST(in);
564 	for (i = 0; i < in->numlevel; i++)
565 	{
566 		if (i != 0)
567 		{
568 			*ptr = '.';
569 			ptr++;
570 		}
571 		if (curqlevel->numvar)
572 		{
573 			if (curqlevel->flag & LQL_NOT)
574 			{
575 				*ptr = '!';
576 				ptr++;
577 			}
578 			curtlevel = LQL_FIRST(curqlevel);
579 			for (j = 0; j < curqlevel->numvar; j++)
580 			{
581 				if (j != 0)
582 				{
583 					*ptr = '|';
584 					ptr++;
585 				}
586 				memcpy(ptr, curtlevel->name, curtlevel->len);
587 				ptr += curtlevel->len;
588 				if ((curtlevel->flag & LVAR_SUBLEXEME))
589 				{
590 					*ptr = '%';
591 					ptr++;
592 				}
593 				if ((curtlevel->flag & LVAR_INCASE))
594 				{
595 					*ptr = '@';
596 					ptr++;
597 				}
598 				if ((curtlevel->flag & LVAR_ANYEND))
599 				{
600 					*ptr = '*';
601 					ptr++;
602 				}
603 				curtlevel = LVAR_NEXT(curtlevel);
604 			}
605 		}
606 		else
607 		{
608 			if (curqlevel->low == curqlevel->high)
609 			{
610 				sprintf(ptr, "*{%d}", curqlevel->low);
611 			}
612 			else if (curqlevel->low == 0)
613 			{
614 				if (curqlevel->high == LTREE_MAX_LEVELS)
615 				{
616 					*ptr = '*';
617 					*(ptr + 1) = '\0';
618 				}
619 				else
620 					sprintf(ptr, "*{,%d}", curqlevel->high);
621 			}
622 			else if (curqlevel->high == LTREE_MAX_LEVELS)
623 			{
624 				sprintf(ptr, "*{%d,}", curqlevel->low);
625 			}
626 			else
627 				sprintf(ptr, "*{%d,%d}", curqlevel->low, curqlevel->high);
628 			ptr = strchr(ptr, '\0');
629 		}
630 
631 		curqlevel = LQL_NEXT(curqlevel);
632 	}
633 
634 	*ptr = '\0';
635 	PG_FREE_IF_COPY(in, 0);
636 
637 	PG_RETURN_POINTER(buf);
638 }
639