1 /* 2 * txtquery io 3 * Teodor Sigaev <teodor@stack.net> 4 * contrib/ltree/ltxtquery_io.c 5 */ 6 #include "postgres.h" 7 8 #include <ctype.h> 9 10 #include "crc32.h" 11 #include "ltree.h" 12 #include "miscadmin.h" 13 14 PG_FUNCTION_INFO_V1(ltxtq_in); 15 PG_FUNCTION_INFO_V1(ltxtq_out); 16 17 18 /* parser's states */ 19 #define WAITOPERAND 1 20 #define INOPERAND 2 21 #define WAITOPERATOR 3 22 23 /* 24 * node of query tree, also used 25 * for storing polish notation in parser 26 */ 27 typedef struct NODE 28 { 29 int32 type; 30 int32 val; 31 int16 distance; 32 int16 length; 33 uint16 flag; 34 struct NODE *next; 35 } NODE; 36 37 typedef struct 38 { 39 char *buf; 40 int32 state; 41 int32 count; 42 /* reverse polish notation in list (for temporary usage) */ 43 NODE *str; 44 /* number in str */ 45 int32 num; 46 47 /* user-friendly operand */ 48 int32 lenop; 49 int32 sumlen; 50 char *op; 51 char *curop; 52 } QPRS_STATE; 53 54 /* 55 * get token from query string 56 */ 57 static int32 58 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag) 59 { 60 int charlen; 61 62 for (;;) 63 { 64 charlen = pg_mblen(state->buf); 65 66 switch (state->state) 67 { 68 case WAITOPERAND: 69 if (charlen == 1 && t_iseq(state->buf, '!')) 70 { 71 (state->buf)++; 72 *val = (int32) '!'; 73 return OPR; 74 } 75 else if (charlen == 1 && t_iseq(state->buf, '(')) 76 { 77 state->count++; 78 (state->buf)++; 79 return OPEN; 80 } 81 else if (ISALNUM(state->buf)) 82 { 83 state->state = INOPERAND; 84 *strval = state->buf; 85 *lenval = charlen; 86 *flag = 0; 87 } 88 else if (!t_isspace(state->buf)) 89 ereport(ERROR, 90 (errcode(ERRCODE_SYNTAX_ERROR), 91 errmsg("operand syntax error"))); 92 break; 93 case INOPERAND: 94 if (ISALNUM(state->buf)) 95 { 96 if (*flag) 97 ereport(ERROR, 98 (errcode(ERRCODE_SYNTAX_ERROR), 99 errmsg("modifiers syntax error"))); 100 *lenval += charlen; 101 } 102 else if (charlen == 1 && t_iseq(state->buf, '%')) 103 *flag |= LVAR_SUBLEXEME; 104 else if (charlen == 1 && t_iseq(state->buf, '@')) 105 *flag |= LVAR_INCASE; 106 else if (charlen == 1 && t_iseq(state->buf, '*')) 107 *flag |= LVAR_ANYEND; 108 else 109 { 110 state->state = WAITOPERATOR; 111 return VAL; 112 } 113 break; 114 case WAITOPERATOR: 115 if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))) 116 { 117 state->state = WAITOPERAND; 118 *val = (int32) *(state->buf); 119 (state->buf)++; 120 return OPR; 121 } 122 else if (charlen == 1 && t_iseq(state->buf, ')')) 123 { 124 (state->buf)++; 125 state->count--; 126 return (state->count < 0) ? ERR : CLOSE; 127 } 128 else if (*(state->buf) == '\0') 129 return (state->count) ? ERR : END; 130 else if (charlen == 1 && !t_iseq(state->buf, ' ')) 131 return ERR; 132 break; 133 default: 134 return ERR; 135 break; 136 } 137 138 state->buf += charlen; 139 } 140 } 141 142 /* 143 * push new one in polish notation reverse view 144 */ 145 static void 146 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag) 147 { 148 NODE *tmp = (NODE *) palloc(sizeof(NODE)); 149 150 tmp->type = type; 151 tmp->val = val; 152 tmp->flag = flag; 153 if (distance > 0xffff) 154 ereport(ERROR, 155 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 156 errmsg("value is too big"))); 157 if (lenval > 0xff) 158 ereport(ERROR, 159 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 160 errmsg("operand is too long"))); 161 tmp->distance = distance; 162 tmp->length = lenval; 163 tmp->next = state->str; 164 state->str = tmp; 165 state->num++; 166 } 167 168 /* 169 * This function is used for query_txt parsing 170 */ 171 static void 172 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag) 173 { 174 if (lenval > 0xffff) 175 ereport(ERROR, 176 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 177 errmsg("word is too long"))); 178 179 pushquery(state, type, ltree_crc32_sz(strval, lenval), 180 state->curop - state->op, lenval, flag); 181 182 while (state->curop - state->op + lenval + 1 >= state->lenop) 183 { 184 int32 tmp = state->curop - state->op; 185 186 state->lenop *= 2; 187 state->op = (char *) repalloc((void *) state->op, state->lenop); 188 state->curop = state->op + tmp; 189 } 190 memcpy((void *) state->curop, (void *) strval, lenval); 191 state->curop += lenval; 192 *(state->curop) = '\0'; 193 state->curop++; 194 state->sumlen += lenval + 1; 195 return; 196 } 197 198 #define STACKDEPTH 32 199 /* 200 * make polish notation of query 201 */ 202 static int32 203 makepol(QPRS_STATE *state) 204 { 205 int32 val = 0, 206 type; 207 int32 lenval = 0; 208 char *strval = NULL; 209 int32 stack[STACKDEPTH]; 210 int32 lenstack = 0; 211 uint16 flag = 0; 212 213 /* since this function recurses, it could be driven to stack overflow */ 214 check_stack_depth(); 215 216 while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END) 217 { 218 switch (type) 219 { 220 case VAL: 221 pushval_asis(state, VAL, strval, lenval, flag); 222 while (lenstack && (stack[lenstack - 1] == (int32) '&' || 223 stack[lenstack - 1] == (int32) '!')) 224 { 225 lenstack--; 226 pushquery(state, OPR, stack[lenstack], 0, 0, 0); 227 } 228 break; 229 case OPR: 230 if (lenstack && val == (int32) '|') 231 pushquery(state, OPR, val, 0, 0, 0); 232 else 233 { 234 if (lenstack == STACKDEPTH) 235 /* internal error */ 236 elog(ERROR, "stack too short"); 237 stack[lenstack] = val; 238 lenstack++; 239 } 240 break; 241 case OPEN: 242 if (makepol(state) == ERR) 243 return ERR; 244 while (lenstack && (stack[lenstack - 1] == (int32) '&' || 245 stack[lenstack - 1] == (int32) '!')) 246 { 247 lenstack--; 248 pushquery(state, OPR, stack[lenstack], 0, 0, 0); 249 } 250 break; 251 case CLOSE: 252 while (lenstack) 253 { 254 lenstack--; 255 pushquery(state, OPR, stack[lenstack], 0, 0, 0); 256 }; 257 return END; 258 break; 259 case ERR: 260 default: 261 ereport(ERROR, 262 (errcode(ERRCODE_SYNTAX_ERROR), 263 errmsg("syntax error"))); 264 265 return ERR; 266 267 } 268 } 269 while (lenstack) 270 { 271 lenstack--; 272 pushquery(state, OPR, stack[lenstack], 0, 0, 0); 273 }; 274 return END; 275 } 276 277 static void 278 findoprnd(ITEM *ptr, int32 *pos) 279 { 280 /* since this function recurses, it could be driven to stack overflow. */ 281 check_stack_depth(); 282 283 if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE) 284 { 285 ptr[*pos].left = 0; 286 (*pos)++; 287 } 288 else if (ptr[*pos].val == (int32) '!') 289 { 290 ptr[*pos].left = 1; 291 (*pos)++; 292 findoprnd(ptr, pos); 293 } 294 else 295 { 296 ITEM *curitem = &ptr[*pos]; 297 int32 tmp = *pos; 298 299 (*pos)++; 300 findoprnd(ptr, pos); 301 curitem->left = *pos - tmp; 302 findoprnd(ptr, pos); 303 } 304 } 305 306 307 /* 308 * input 309 */ 310 static ltxtquery * 311 queryin(char *buf) 312 { 313 QPRS_STATE state; 314 int32 i; 315 ltxtquery *query; 316 int32 commonlen; 317 ITEM *ptr; 318 NODE *tmp; 319 int32 pos = 0; 320 321 #ifdef BS_DEBUG 322 char pbuf[16384], 323 *cur; 324 #endif 325 326 /* init state */ 327 state.buf = buf; 328 state.state = WAITOPERAND; 329 state.count = 0; 330 state.num = 0; 331 state.str = NULL; 332 333 /* init list of operand */ 334 state.sumlen = 0; 335 state.lenop = 64; 336 state.curop = state.op = (char *) palloc(state.lenop); 337 *(state.curop) = '\0'; 338 339 /* parse query & make polish notation (postfix, but in reverse order) */ 340 makepol(&state); 341 if (!state.num) 342 ereport(ERROR, 343 (errcode(ERRCODE_SYNTAX_ERROR), 344 errmsg("syntax error"), 345 errdetail("Empty query."))); 346 347 if (LTXTQUERY_TOO_BIG(state.num, state.sumlen)) 348 ereport(ERROR, 349 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 350 errmsg("ltxtquery is too large"))); 351 commonlen = COMPUTESIZE(state.num, state.sumlen); 352 353 query = (ltxtquery *) palloc0(commonlen); 354 SET_VARSIZE(query, commonlen); 355 query->size = state.num; 356 ptr = GETQUERY(query); 357 358 /* set item in polish notation */ 359 for (i = 0; i < state.num; i++) 360 { 361 ptr[i].type = state.str->type; 362 ptr[i].val = state.str->val; 363 ptr[i].distance = state.str->distance; 364 ptr[i].length = state.str->length; 365 ptr[i].flag = state.str->flag; 366 tmp = state.str->next; 367 pfree(state.str); 368 state.str = tmp; 369 } 370 371 /* set user friendly-operand view */ 372 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); 373 pfree(state.op); 374 375 /* set left operand's position for every operator */ 376 pos = 0; 377 findoprnd(ptr, &pos); 378 379 return query; 380 } 381 382 /* 383 * in without morphology 384 */ 385 Datum 386 ltxtq_in(PG_FUNCTION_ARGS) 387 { 388 PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0))); 389 } 390 391 /* 392 * out function 393 */ 394 typedef struct 395 { 396 ITEM *curpol; 397 char *buf; 398 char *cur; 399 char *op; 400 int32 buflen; 401 } INFIX; 402 403 #define RESIZEBUF(inf,addsize) \ 404 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ 405 { \ 406 int32 len = (inf)->cur - (inf)->buf; \ 407 (inf)->buflen *= 2; \ 408 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ 409 (inf)->cur = (inf)->buf + len; \ 410 } 411 412 /* 413 * recursive walk on tree and print it in 414 * infix (human-readable) view 415 */ 416 static void 417 infix(INFIX *in, bool first) 418 { 419 /* since this function recurses, it could be driven to stack overflow. */ 420 check_stack_depth(); 421 422 if (in->curpol->type == VAL) 423 { 424 char *op = in->op + in->curpol->distance; 425 426 RESIZEBUF(in, in->curpol->length * 2 + 5); 427 while (*op) 428 { 429 *(in->cur) = *op; 430 op++; 431 in->cur++; 432 } 433 if (in->curpol->flag & LVAR_SUBLEXEME) 434 { 435 *(in->cur) = '%'; 436 in->cur++; 437 } 438 if (in->curpol->flag & LVAR_INCASE) 439 { 440 *(in->cur) = '@'; 441 in->cur++; 442 } 443 if (in->curpol->flag & LVAR_ANYEND) 444 { 445 *(in->cur) = '*'; 446 in->cur++; 447 } 448 *(in->cur) = '\0'; 449 in->curpol++; 450 } 451 else if (in->curpol->val == (int32) '!') 452 { 453 bool isopr = false; 454 455 RESIZEBUF(in, 1); 456 *(in->cur) = '!'; 457 in->cur++; 458 *(in->cur) = '\0'; 459 in->curpol++; 460 if (in->curpol->type == OPR) 461 { 462 isopr = true; 463 RESIZEBUF(in, 2); 464 sprintf(in->cur, "( "); 465 in->cur = strchr(in->cur, '\0'); 466 } 467 infix(in, isopr); 468 if (isopr) 469 { 470 RESIZEBUF(in, 2); 471 sprintf(in->cur, " )"); 472 in->cur = strchr(in->cur, '\0'); 473 } 474 } 475 else 476 { 477 int32 op = in->curpol->val; 478 INFIX nrm; 479 480 in->curpol++; 481 if (op == (int32) '|' && !first) 482 { 483 RESIZEBUF(in, 2); 484 sprintf(in->cur, "( "); 485 in->cur = strchr(in->cur, '\0'); 486 } 487 488 nrm.curpol = in->curpol; 489 nrm.op = in->op; 490 nrm.buflen = 16; 491 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); 492 493 /* get right operand */ 494 infix(&nrm, false); 495 496 /* get & print left operand */ 497 in->curpol = nrm.curpol; 498 infix(in, false); 499 500 /* print operator & right operand */ 501 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); 502 sprintf(in->cur, " %c %s", op, nrm.buf); 503 in->cur = strchr(in->cur, '\0'); 504 pfree(nrm.buf); 505 506 if (op == (int32) '|' && !first) 507 { 508 RESIZEBUF(in, 2); 509 sprintf(in->cur, " )"); 510 in->cur = strchr(in->cur, '\0'); 511 } 512 } 513 } 514 515 Datum 516 ltxtq_out(PG_FUNCTION_ARGS) 517 { 518 ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); 519 INFIX nrm; 520 521 if (query->size == 0) 522 ereport(ERROR, 523 (errcode(ERRCODE_SYNTAX_ERROR), 524 errmsg("syntax error"), 525 errdetail("Empty query."))); 526 527 nrm.curpol = GETQUERY(query); 528 nrm.buflen = 32; 529 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); 530 *(nrm.cur) = '\0'; 531 nrm.op = GETOPERAND(query); 532 infix(&nrm, true); 533 534 PG_FREE_IF_COPY(query, 0); 535 PG_RETURN_POINTER(nrm.buf); 536 } 537