1 /*
2 * txtquery io
3 * Teodor Sigaev <teodor@stack.net>
4 * contrib/ltree/ltxtquery_io.c
5 */
6 #include "postgres.h"
7
8 #include <ctype.h>
9
10 #include "crc32.h"
11 #include "libpq/pqformat.h"
12 #include "ltree.h"
13 #include "miscadmin.h"
14
15
16 /* parser's states */
17 #define WAITOPERAND 1
18 #define INOPERAND 2
19 #define WAITOPERATOR 3
20
21 /*
22 * node of query tree, also used
23 * for storing polish notation in parser
24 */
25 typedef struct NODE
26 {
27 int32 type;
28 int32 val;
29 int16 distance;
30 int16 length;
31 uint16 flag;
32 struct NODE *next;
33 } NODE;
34
35 typedef struct
36 {
37 char *buf;
38 int32 state;
39 int32 count;
40 /* reverse polish notation in list (for temporary usage) */
41 NODE *str;
42 /* number in str */
43 int32 num;
44
45 /* user-friendly operand */
46 int32 lenop;
47 int32 sumlen;
48 char *op;
49 char *curop;
50 } QPRS_STATE;
51
52 /*
53 * get token from query string
54 */
55 static int32
gettoken_query(QPRS_STATE * state,int32 * val,int32 * lenval,char ** strval,uint16 * flag)56 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
57 {
58 int charlen;
59
60 for (;;)
61 {
62 charlen = pg_mblen(state->buf);
63
64 switch (state->state)
65 {
66 case WAITOPERAND:
67 if (charlen == 1 && t_iseq(state->buf, '!'))
68 {
69 (state->buf)++;
70 *val = (int32) '!';
71 return OPR;
72 }
73 else if (charlen == 1 && t_iseq(state->buf, '('))
74 {
75 state->count++;
76 (state->buf)++;
77 return OPEN;
78 }
79 else if (ISALNUM(state->buf))
80 {
81 state->state = INOPERAND;
82 *strval = state->buf;
83 *lenval = charlen;
84 *flag = 0;
85 }
86 else if (!t_isspace(state->buf))
87 ereport(ERROR,
88 (errcode(ERRCODE_SYNTAX_ERROR),
89 errmsg("operand syntax error")));
90 break;
91 case INOPERAND:
92 if (ISALNUM(state->buf))
93 {
94 if (*flag)
95 ereport(ERROR,
96 (errcode(ERRCODE_SYNTAX_ERROR),
97 errmsg("modifiers syntax error")));
98 *lenval += charlen;
99 }
100 else if (charlen == 1 && t_iseq(state->buf, '%'))
101 *flag |= LVAR_SUBLEXEME;
102 else if (charlen == 1 && t_iseq(state->buf, '@'))
103 *flag |= LVAR_INCASE;
104 else if (charlen == 1 && t_iseq(state->buf, '*'))
105 *flag |= LVAR_ANYEND;
106 else
107 {
108 state->state = WAITOPERATOR;
109 return VAL;
110 }
111 break;
112 case WAITOPERATOR:
113 if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
114 {
115 state->state = WAITOPERAND;
116 *val = (int32) *(state->buf);
117 (state->buf)++;
118 return OPR;
119 }
120 else if (charlen == 1 && t_iseq(state->buf, ')'))
121 {
122 (state->buf)++;
123 state->count--;
124 return (state->count < 0) ? ERR : CLOSE;
125 }
126 else if (*(state->buf) == '\0')
127 return (state->count) ? ERR : END;
128 else if (charlen == 1 && !t_iseq(state->buf, ' '))
129 return ERR;
130 break;
131 default:
132 return ERR;
133 break;
134 }
135
136 state->buf += charlen;
137 }
138 }
139
140 /*
141 * push new one in polish notation reverse view
142 */
143 static void
pushquery(QPRS_STATE * state,int32 type,int32 val,int32 distance,int32 lenval,uint16 flag)144 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
145 {
146 NODE *tmp = (NODE *) palloc(sizeof(NODE));
147
148 tmp->type = type;
149 tmp->val = val;
150 tmp->flag = flag;
151 if (distance > 0xffff)
152 ereport(ERROR,
153 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
154 errmsg("value is too big")));
155 if (lenval > 0xff)
156 ereport(ERROR,
157 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
158 errmsg("operand is too long")));
159 tmp->distance = distance;
160 tmp->length = lenval;
161 tmp->next = state->str;
162 state->str = tmp;
163 state->num++;
164 }
165
166 /*
167 * This function is used for query text parsing
168 */
169 static void
pushval_asis(QPRS_STATE * state,int type,char * strval,int lenval,uint16 flag)170 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
171 {
172 if (lenval > 0xffff)
173 ereport(ERROR,
174 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
175 errmsg("word is too long")));
176
177 pushquery(state, type, ltree_crc32_sz(strval, lenval),
178 state->curop - state->op, lenval, flag);
179
180 while (state->curop - state->op + lenval + 1 >= state->lenop)
181 {
182 int32 tmp = state->curop - state->op;
183
184 state->lenop *= 2;
185 state->op = (char *) repalloc((void *) state->op, state->lenop);
186 state->curop = state->op + tmp;
187 }
188 memcpy((void *) state->curop, (void *) strval, lenval);
189 state->curop += lenval;
190 *(state->curop) = '\0';
191 state->curop++;
192 state->sumlen += lenval + 1;
193 }
194
195 #define STACKDEPTH 32
196 /*
197 * make polish notation of query
198 */
199 static int32
makepol(QPRS_STATE * state)200 makepol(QPRS_STATE *state)
201 {
202 int32 val = 0,
203 type;
204 int32 lenval = 0;
205 char *strval = NULL;
206 int32 stack[STACKDEPTH];
207 int32 lenstack = 0;
208 uint16 flag = 0;
209
210 /* since this function recurses, it could be driven to stack overflow */
211 check_stack_depth();
212
213 while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
214 {
215 switch (type)
216 {
217 case VAL:
218 pushval_asis(state, VAL, strval, lenval, flag);
219 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
220 stack[lenstack - 1] == (int32) '!'))
221 {
222 lenstack--;
223 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
224 }
225 break;
226 case OPR:
227 if (lenstack && val == (int32) '|')
228 pushquery(state, OPR, val, 0, 0, 0);
229 else
230 {
231 if (lenstack == STACKDEPTH)
232 /* internal error */
233 elog(ERROR, "stack too short");
234 stack[lenstack] = val;
235 lenstack++;
236 }
237 break;
238 case OPEN:
239 if (makepol(state) == ERR)
240 return ERR;
241 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
242 stack[lenstack - 1] == (int32) '!'))
243 {
244 lenstack--;
245 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
246 }
247 break;
248 case CLOSE:
249 while (lenstack)
250 {
251 lenstack--;
252 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
253 };
254 return END;
255 break;
256 case ERR:
257 default:
258 ereport(ERROR,
259 (errcode(ERRCODE_SYNTAX_ERROR),
260 errmsg("syntax error")));
261
262 return ERR;
263
264 }
265 }
266 while (lenstack)
267 {
268 lenstack--;
269 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
270 };
271 return END;
272 }
273
274 static void
findoprnd(ITEM * ptr,int32 * pos)275 findoprnd(ITEM *ptr, int32 *pos)
276 {
277 /* since this function recurses, it could be driven to stack overflow. */
278 check_stack_depth();
279
280 if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
281 {
282 ptr[*pos].left = 0;
283 (*pos)++;
284 }
285 else if (ptr[*pos].val == (int32) '!')
286 {
287 ptr[*pos].left = 1;
288 (*pos)++;
289 findoprnd(ptr, pos);
290 }
291 else
292 {
293 ITEM *curitem = &ptr[*pos];
294 int32 tmp = *pos;
295
296 (*pos)++;
297 findoprnd(ptr, pos);
298 curitem->left = *pos - tmp;
299 findoprnd(ptr, pos);
300 }
301 }
302
303
304 /*
305 * input
306 */
307 static ltxtquery *
queryin(char * buf)308 queryin(char *buf)
309 {
310 QPRS_STATE state;
311 int32 i;
312 ltxtquery *query;
313 int32 commonlen;
314 ITEM *ptr;
315 NODE *tmp;
316 int32 pos = 0;
317
318 #ifdef BS_DEBUG
319 char pbuf[16384],
320 *cur;
321 #endif
322
323 /* init state */
324 state.buf = buf;
325 state.state = WAITOPERAND;
326 state.count = 0;
327 state.num = 0;
328 state.str = NULL;
329
330 /* init list of operand */
331 state.sumlen = 0;
332 state.lenop = 64;
333 state.curop = state.op = (char *) palloc(state.lenop);
334 *(state.curop) = '\0';
335
336 /* parse query & make polish notation (postfix, but in reverse order) */
337 makepol(&state);
338 if (!state.num)
339 ereport(ERROR,
340 (errcode(ERRCODE_SYNTAX_ERROR),
341 errmsg("syntax error"),
342 errdetail("Empty query.")));
343
344 if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
345 ereport(ERROR,
346 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
347 errmsg("ltxtquery is too large")));
348 commonlen = COMPUTESIZE(state.num, state.sumlen);
349
350 query = (ltxtquery *) palloc0(commonlen);
351 SET_VARSIZE(query, commonlen);
352 query->size = state.num;
353 ptr = GETQUERY(query);
354
355 /* set item in polish notation */
356 for (i = 0; i < state.num; i++)
357 {
358 ptr[i].type = state.str->type;
359 ptr[i].val = state.str->val;
360 ptr[i].distance = state.str->distance;
361 ptr[i].length = state.str->length;
362 ptr[i].flag = state.str->flag;
363 tmp = state.str->next;
364 pfree(state.str);
365 state.str = tmp;
366 }
367
368 /* set user-friendly operand view */
369 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
370 pfree(state.op);
371
372 /* set left operand's position for every operator */
373 pos = 0;
374 findoprnd(ptr, &pos);
375
376 return query;
377 }
378
379 /*
380 * in without morphology
381 */
382 PG_FUNCTION_INFO_V1(ltxtq_in);
383 Datum
ltxtq_in(PG_FUNCTION_ARGS)384 ltxtq_in(PG_FUNCTION_ARGS)
385 {
386 PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
387 }
388
389 /*
390 * ltxtquery type recv function
391 *
392 * The type is sent as text in binary mode, so this is almost the same
393 * as the input function, but it's prefixed with a version number so we
394 * can change the binary format sent in future if necessary. For now,
395 * only version 1 is supported.
396 */
397 PG_FUNCTION_INFO_V1(ltxtq_recv);
398 Datum
ltxtq_recv(PG_FUNCTION_ARGS)399 ltxtq_recv(PG_FUNCTION_ARGS)
400 {
401 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
402 int version = pq_getmsgint(buf, 1);
403 char *str;
404 int nbytes;
405 ltxtquery *res;
406
407 if (version != 1)
408 elog(ERROR, "unsupported ltxtquery version number %d", version);
409
410 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
411 res = queryin(str);
412 pfree(str);
413
414 PG_RETURN_POINTER(res);
415 }
416
417 /*
418 * out function
419 */
420 typedef struct
421 {
422 ITEM *curpol;
423 char *buf;
424 char *cur;
425 char *op;
426 int32 buflen;
427 } INFIX;
428
429 #define RESIZEBUF(inf,addsize) \
430 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
431 { \
432 int32 len = (inf)->cur - (inf)->buf; \
433 (inf)->buflen *= 2; \
434 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
435 (inf)->cur = (inf)->buf + len; \
436 }
437
438 /*
439 * recursive walk on tree and print it in
440 * infix (human-readable) view
441 */
442 static void
infix(INFIX * in,bool first)443 infix(INFIX *in, bool first)
444 {
445 /* since this function recurses, it could be driven to stack overflow. */
446 check_stack_depth();
447
448 if (in->curpol->type == VAL)
449 {
450 char *op = in->op + in->curpol->distance;
451
452 RESIZEBUF(in, in->curpol->length * 2 + 5);
453 while (*op)
454 {
455 *(in->cur) = *op;
456 op++;
457 in->cur++;
458 }
459 if (in->curpol->flag & LVAR_SUBLEXEME)
460 {
461 *(in->cur) = '%';
462 in->cur++;
463 }
464 if (in->curpol->flag & LVAR_INCASE)
465 {
466 *(in->cur) = '@';
467 in->cur++;
468 }
469 if (in->curpol->flag & LVAR_ANYEND)
470 {
471 *(in->cur) = '*';
472 in->cur++;
473 }
474 *(in->cur) = '\0';
475 in->curpol++;
476 }
477 else if (in->curpol->val == (int32) '!')
478 {
479 bool isopr = false;
480
481 RESIZEBUF(in, 1);
482 *(in->cur) = '!';
483 in->cur++;
484 *(in->cur) = '\0';
485 in->curpol++;
486 if (in->curpol->type == OPR)
487 {
488 isopr = true;
489 RESIZEBUF(in, 2);
490 sprintf(in->cur, "( ");
491 in->cur = strchr(in->cur, '\0');
492 }
493 infix(in, isopr);
494 if (isopr)
495 {
496 RESIZEBUF(in, 2);
497 sprintf(in->cur, " )");
498 in->cur = strchr(in->cur, '\0');
499 }
500 }
501 else
502 {
503 int32 op = in->curpol->val;
504 INFIX nrm;
505
506 in->curpol++;
507 if (op == (int32) '|' && !first)
508 {
509 RESIZEBUF(in, 2);
510 sprintf(in->cur, "( ");
511 in->cur = strchr(in->cur, '\0');
512 }
513
514 nrm.curpol = in->curpol;
515 nrm.op = in->op;
516 nrm.buflen = 16;
517 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
518
519 /* get right operand */
520 infix(&nrm, false);
521
522 /* get & print left operand */
523 in->curpol = nrm.curpol;
524 infix(in, false);
525
526 /* print operator & right operand */
527 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
528 sprintf(in->cur, " %c %s", op, nrm.buf);
529 in->cur = strchr(in->cur, '\0');
530 pfree(nrm.buf);
531
532 if (op == (int32) '|' && !first)
533 {
534 RESIZEBUF(in, 2);
535 sprintf(in->cur, " )");
536 in->cur = strchr(in->cur, '\0');
537 }
538 }
539 }
540
541 PG_FUNCTION_INFO_V1(ltxtq_out);
542 Datum
ltxtq_out(PG_FUNCTION_ARGS)543 ltxtq_out(PG_FUNCTION_ARGS)
544 {
545 ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
546 INFIX nrm;
547
548 if (query->size == 0)
549 ereport(ERROR,
550 (errcode(ERRCODE_SYNTAX_ERROR),
551 errmsg("syntax error"),
552 errdetail("Empty query.")));
553
554 nrm.curpol = GETQUERY(query);
555 nrm.buflen = 32;
556 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
557 *(nrm.cur) = '\0';
558 nrm.op = GETOPERAND(query);
559 infix(&nrm, true);
560
561 PG_RETURN_POINTER(nrm.buf);
562 }
563
564 /*
565 * ltxtquery type send function
566 *
567 * The type is sent as text in binary mode, so this is almost the same
568 * as the output function, but it's prefixed with a version number so we
569 * can change the binary format sent in future if necessary. For now,
570 * only version 1 is supported.
571 */
572 PG_FUNCTION_INFO_V1(ltxtq_send);
573 Datum
ltxtq_send(PG_FUNCTION_ARGS)574 ltxtq_send(PG_FUNCTION_ARGS)
575 {
576 ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
577 StringInfoData buf;
578 int version = 1;
579 INFIX nrm;
580
581 if (query->size == 0)
582 ereport(ERROR,
583 (errcode(ERRCODE_SYNTAX_ERROR),
584 errmsg("syntax error"),
585 errdetail("Empty query.")));
586
587 nrm.curpol = GETQUERY(query);
588 nrm.buflen = 32;
589 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
590 *(nrm.cur) = '\0';
591 nrm.op = GETOPERAND(query);
592 infix(&nrm, true);
593
594 pq_begintypsend(&buf);
595 pq_sendint8(&buf, version);
596 pq_sendtext(&buf, nrm.buf, strlen(nrm.buf));
597 pfree(nrm.buf);
598
599 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
600 }
601