1 /*
2 * txtquery io
3 * Teodor Sigaev <teodor@stack.net>
4 * contrib/ltree/ltxtquery_io.c
5 */
6 #include "postgres.h"
7
8 #include <ctype.h>
9
10 #include "crc32.h"
11 #include "ltree.h"
12 #include "miscadmin.h"
13
14 PG_FUNCTION_INFO_V1(ltxtq_in);
15 PG_FUNCTION_INFO_V1(ltxtq_out);
16
17
18 /* parser's states */
19 #define WAITOPERAND 1
20 #define INOPERAND 2
21 #define WAITOPERATOR 3
22
23 /*
24 * node of query tree, also used
25 * for storing polish notation in parser
26 */
27 typedef struct NODE
28 {
29 int32 type;
30 int32 val;
31 int16 distance;
32 int16 length;
33 uint16 flag;
34 struct NODE *next;
35 } NODE;
36
37 typedef struct
38 {
39 char *buf;
40 int32 state;
41 int32 count;
42 /* reverse polish notation in list (for temporary usage) */
43 NODE *str;
44 /* number in str */
45 int32 num;
46
47 /* user-friendly operand */
48 int32 lenop;
49 int32 sumlen;
50 char *op;
51 char *curop;
52 } QPRS_STATE;
53
54 /*
55 * get token from query string
56 */
57 static int32
gettoken_query(QPRS_STATE * state,int32 * val,int32 * lenval,char ** strval,uint16 * flag)58 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
59 {
60 int charlen;
61
62 for (;;)
63 {
64 charlen = pg_mblen(state->buf);
65
66 switch (state->state)
67 {
68 case WAITOPERAND:
69 if (charlen == 1 && t_iseq(state->buf, '!'))
70 {
71 (state->buf)++;
72 *val = (int32) '!';
73 return OPR;
74 }
75 else if (charlen == 1 && t_iseq(state->buf, '('))
76 {
77 state->count++;
78 (state->buf)++;
79 return OPEN;
80 }
81 else if (ISALNUM(state->buf))
82 {
83 state->state = INOPERAND;
84 *strval = state->buf;
85 *lenval = charlen;
86 *flag = 0;
87 }
88 else if (!t_isspace(state->buf))
89 ereport(ERROR,
90 (errcode(ERRCODE_SYNTAX_ERROR),
91 errmsg("operand syntax error")));
92 break;
93 case INOPERAND:
94 if (ISALNUM(state->buf))
95 {
96 if (*flag)
97 ereport(ERROR,
98 (errcode(ERRCODE_SYNTAX_ERROR),
99 errmsg("modifiers syntax error")));
100 *lenval += charlen;
101 }
102 else if (charlen == 1 && t_iseq(state->buf, '%'))
103 *flag |= LVAR_SUBLEXEME;
104 else if (charlen == 1 && t_iseq(state->buf, '@'))
105 *flag |= LVAR_INCASE;
106 else if (charlen == 1 && t_iseq(state->buf, '*'))
107 *flag |= LVAR_ANYEND;
108 else
109 {
110 state->state = WAITOPERATOR;
111 return VAL;
112 }
113 break;
114 case WAITOPERATOR:
115 if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
116 {
117 state->state = WAITOPERAND;
118 *val = (int32) *(state->buf);
119 (state->buf)++;
120 return OPR;
121 }
122 else if (charlen == 1 && t_iseq(state->buf, ')'))
123 {
124 (state->buf)++;
125 state->count--;
126 return (state->count < 0) ? ERR : CLOSE;
127 }
128 else if (*(state->buf) == '\0')
129 return (state->count) ? ERR : END;
130 else if (charlen == 1 && !t_iseq(state->buf, ' '))
131 return ERR;
132 break;
133 default:
134 return ERR;
135 break;
136 }
137
138 state->buf += charlen;
139 }
140 }
141
142 /*
143 * push new one in polish notation reverse view
144 */
145 static void
pushquery(QPRS_STATE * state,int32 type,int32 val,int32 distance,int32 lenval,uint16 flag)146 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
147 {
148 NODE *tmp = (NODE *) palloc(sizeof(NODE));
149
150 tmp->type = type;
151 tmp->val = val;
152 tmp->flag = flag;
153 if (distance > 0xffff)
154 ereport(ERROR,
155 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
156 errmsg("value is too big")));
157 if (lenval > 0xff)
158 ereport(ERROR,
159 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
160 errmsg("operand is too long")));
161 tmp->distance = distance;
162 tmp->length = lenval;
163 tmp->next = state->str;
164 state->str = tmp;
165 state->num++;
166 }
167
168 /*
169 * This function is used for query_txt parsing
170 */
171 static void
pushval_asis(QPRS_STATE * state,int type,char * strval,int lenval,uint16 flag)172 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
173 {
174 if (lenval > 0xffff)
175 ereport(ERROR,
176 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
177 errmsg("word is too long")));
178
179 pushquery(state, type, ltree_crc32_sz(strval, lenval),
180 state->curop - state->op, lenval, flag);
181
182 while (state->curop - state->op + lenval + 1 >= state->lenop)
183 {
184 int32 tmp = state->curop - state->op;
185
186 state->lenop *= 2;
187 state->op = (char *) repalloc((void *) state->op, state->lenop);
188 state->curop = state->op + tmp;
189 }
190 memcpy((void *) state->curop, (void *) strval, lenval);
191 state->curop += lenval;
192 *(state->curop) = '\0';
193 state->curop++;
194 state->sumlen += lenval + 1;
195 return;
196 }
197
198 #define STACKDEPTH 32
199 /*
200 * make polish notation of query
201 */
202 static int32
makepol(QPRS_STATE * state)203 makepol(QPRS_STATE *state)
204 {
205 int32 val = 0,
206 type;
207 int32 lenval = 0;
208 char *strval = NULL;
209 int32 stack[STACKDEPTH];
210 int32 lenstack = 0;
211 uint16 flag = 0;
212
213 /* since this function recurses, it could be driven to stack overflow */
214 check_stack_depth();
215
216 while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
217 {
218 switch (type)
219 {
220 case VAL:
221 pushval_asis(state, VAL, strval, lenval, flag);
222 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
223 stack[lenstack - 1] == (int32) '!'))
224 {
225 lenstack--;
226 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
227 }
228 break;
229 case OPR:
230 if (lenstack && val == (int32) '|')
231 pushquery(state, OPR, val, 0, 0, 0);
232 else
233 {
234 if (lenstack == STACKDEPTH)
235 /* internal error */
236 elog(ERROR, "stack too short");
237 stack[lenstack] = val;
238 lenstack++;
239 }
240 break;
241 case OPEN:
242 if (makepol(state) == ERR)
243 return ERR;
244 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
245 stack[lenstack - 1] == (int32) '!'))
246 {
247 lenstack--;
248 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
249 }
250 break;
251 case CLOSE:
252 while (lenstack)
253 {
254 lenstack--;
255 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
256 };
257 return END;
258 break;
259 case ERR:
260 default:
261 ereport(ERROR,
262 (errcode(ERRCODE_SYNTAX_ERROR),
263 errmsg("syntax error")));
264
265 return ERR;
266
267 }
268 }
269 while (lenstack)
270 {
271 lenstack--;
272 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
273 };
274 return END;
275 }
276
277 static void
findoprnd(ITEM * ptr,int32 * pos)278 findoprnd(ITEM *ptr, int32 *pos)
279 {
280 /* since this function recurses, it could be driven to stack overflow. */
281 check_stack_depth();
282
283 if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
284 {
285 ptr[*pos].left = 0;
286 (*pos)++;
287 }
288 else if (ptr[*pos].val == (int32) '!')
289 {
290 ptr[*pos].left = 1;
291 (*pos)++;
292 findoprnd(ptr, pos);
293 }
294 else
295 {
296 ITEM *curitem = &ptr[*pos];
297 int32 tmp = *pos;
298
299 (*pos)++;
300 findoprnd(ptr, pos);
301 curitem->left = *pos - tmp;
302 findoprnd(ptr, pos);
303 }
304 }
305
306
307 /*
308 * input
309 */
310 static ltxtquery *
queryin(char * buf)311 queryin(char *buf)
312 {
313 QPRS_STATE state;
314 int32 i;
315 ltxtquery *query;
316 int32 commonlen;
317 ITEM *ptr;
318 NODE *tmp;
319 int32 pos = 0;
320
321 #ifdef BS_DEBUG
322 char pbuf[16384],
323 *cur;
324 #endif
325
326 /* init state */
327 state.buf = buf;
328 state.state = WAITOPERAND;
329 state.count = 0;
330 state.num = 0;
331 state.str = NULL;
332
333 /* init list of operand */
334 state.sumlen = 0;
335 state.lenop = 64;
336 state.curop = state.op = (char *) palloc(state.lenop);
337 *(state.curop) = '\0';
338
339 /* parse query & make polish notation (postfix, but in reverse order) */
340 makepol(&state);
341 if (!state.num)
342 ereport(ERROR,
343 (errcode(ERRCODE_SYNTAX_ERROR),
344 errmsg("syntax error"),
345 errdetail("Empty query.")));
346
347 if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
348 ereport(ERROR,
349 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
350 errmsg("ltxtquery is too large")));
351 commonlen = COMPUTESIZE(state.num, state.sumlen);
352
353 query = (ltxtquery *) palloc0(commonlen);
354 SET_VARSIZE(query, commonlen);
355 query->size = state.num;
356 ptr = GETQUERY(query);
357
358 /* set item in polish notation */
359 for (i = 0; i < state.num; i++)
360 {
361 ptr[i].type = state.str->type;
362 ptr[i].val = state.str->val;
363 ptr[i].distance = state.str->distance;
364 ptr[i].length = state.str->length;
365 ptr[i].flag = state.str->flag;
366 tmp = state.str->next;
367 pfree(state.str);
368 state.str = tmp;
369 }
370
371 /* set user friendly-operand view */
372 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
373 pfree(state.op);
374
375 /* set left operand's position for every operator */
376 pos = 0;
377 findoprnd(ptr, &pos);
378
379 return query;
380 }
381
382 /*
383 * in without morphology
384 */
385 Datum
ltxtq_in(PG_FUNCTION_ARGS)386 ltxtq_in(PG_FUNCTION_ARGS)
387 {
388 PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
389 }
390
391 /*
392 * out function
393 */
394 typedef struct
395 {
396 ITEM *curpol;
397 char *buf;
398 char *cur;
399 char *op;
400 int32 buflen;
401 } INFIX;
402
403 #define RESIZEBUF(inf,addsize) \
404 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
405 { \
406 int32 len = (inf)->cur - (inf)->buf; \
407 (inf)->buflen *= 2; \
408 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
409 (inf)->cur = (inf)->buf + len; \
410 }
411
412 /*
413 * recursive walk on tree and print it in
414 * infix (human-readable) view
415 */
416 static void
infix(INFIX * in,bool first)417 infix(INFIX *in, bool first)
418 {
419 /* since this function recurses, it could be driven to stack overflow. */
420 check_stack_depth();
421
422 if (in->curpol->type == VAL)
423 {
424 char *op = in->op + in->curpol->distance;
425
426 RESIZEBUF(in, in->curpol->length * 2 + 5);
427 while (*op)
428 {
429 *(in->cur) = *op;
430 op++;
431 in->cur++;
432 }
433 if (in->curpol->flag & LVAR_SUBLEXEME)
434 {
435 *(in->cur) = '%';
436 in->cur++;
437 }
438 if (in->curpol->flag & LVAR_INCASE)
439 {
440 *(in->cur) = '@';
441 in->cur++;
442 }
443 if (in->curpol->flag & LVAR_ANYEND)
444 {
445 *(in->cur) = '*';
446 in->cur++;
447 }
448 *(in->cur) = '\0';
449 in->curpol++;
450 }
451 else if (in->curpol->val == (int32) '!')
452 {
453 bool isopr = false;
454
455 RESIZEBUF(in, 1);
456 *(in->cur) = '!';
457 in->cur++;
458 *(in->cur) = '\0';
459 in->curpol++;
460 if (in->curpol->type == OPR)
461 {
462 isopr = true;
463 RESIZEBUF(in, 2);
464 sprintf(in->cur, "( ");
465 in->cur = strchr(in->cur, '\0');
466 }
467 infix(in, isopr);
468 if (isopr)
469 {
470 RESIZEBUF(in, 2);
471 sprintf(in->cur, " )");
472 in->cur = strchr(in->cur, '\0');
473 }
474 }
475 else
476 {
477 int32 op = in->curpol->val;
478 INFIX nrm;
479
480 in->curpol++;
481 if (op == (int32) '|' && !first)
482 {
483 RESIZEBUF(in, 2);
484 sprintf(in->cur, "( ");
485 in->cur = strchr(in->cur, '\0');
486 }
487
488 nrm.curpol = in->curpol;
489 nrm.op = in->op;
490 nrm.buflen = 16;
491 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
492
493 /* get right operand */
494 infix(&nrm, false);
495
496 /* get & print left operand */
497 in->curpol = nrm.curpol;
498 infix(in, false);
499
500 /* print operator & right operand */
501 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
502 sprintf(in->cur, " %c %s", op, nrm.buf);
503 in->cur = strchr(in->cur, '\0');
504 pfree(nrm.buf);
505
506 if (op == (int32) '|' && !first)
507 {
508 RESIZEBUF(in, 2);
509 sprintf(in->cur, " )");
510 in->cur = strchr(in->cur, '\0');
511 }
512 }
513 }
514
515 Datum
ltxtq_out(PG_FUNCTION_ARGS)516 ltxtq_out(PG_FUNCTION_ARGS)
517 {
518 ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
519 INFIX nrm;
520
521 if (query->size == 0)
522 ereport(ERROR,
523 (errcode(ERRCODE_SYNTAX_ERROR),
524 errmsg("syntax error"),
525 errdetail("Empty query.")));
526
527 nrm.curpol = GETQUERY(query);
528 nrm.buflen = 32;
529 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
530 *(nrm.cur) = '\0';
531 nrm.op = GETOPERAND(query);
532 infix(&nrm, true);
533
534 PG_FREE_IF_COPY(query, 0);
535 PG_RETURN_POINTER(nrm.buf);
536 }
537