1 /*------------------------------------------------------------------------- 2 * 3 * dict_thesaurus.c 4 * Thesaurus dictionary: phrase to phrase substitution 5 * 6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 7 * 8 * 9 * IDENTIFICATION 10 * src/backend/tsearch/dict_thesaurus.c 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #include "postgres.h" 15 16 #include "catalog/namespace.h" 17 #include "commands/defrem.h" 18 #include "tsearch/ts_cache.h" 19 #include "tsearch/ts_locale.h" 20 #include "tsearch/ts_utils.h" 21 #include "utils/builtins.h" 22 #include "utils/regproc.h" 23 24 25 /* 26 * Temporary we use TSLexeme.flags for inner use... 27 */ 28 #define DT_USEASIS 0x1000 29 30 typedef struct LexemeInfo 31 { 32 uint32 idsubst; /* entry's number in DictThesaurus->subst */ 33 uint16 posinsubst; /* pos info in entry */ 34 uint16 tnvariant; /* total num lexemes in one variant */ 35 struct LexemeInfo *nextentry; 36 struct LexemeInfo *nextvariant; 37 } LexemeInfo; 38 39 typedef struct 40 { 41 char *lexeme; 42 LexemeInfo *entries; 43 } TheLexeme; 44 45 typedef struct 46 { 47 uint16 lastlexeme; /* number lexemes to substitute */ 48 uint16 reslen; 49 TSLexeme *res; /* prepared substituted result */ 50 } TheSubstitute; 51 52 typedef struct 53 { 54 /* subdictionary to normalize lexemes */ 55 Oid subdictOid; 56 TSDictionaryCacheEntry *subdict; 57 58 /* Array to search lexeme by exact match */ 59 TheLexeme *wrds; 60 int nwrds; /* current number of words */ 61 int ntwrds; /* allocated array length */ 62 63 /* 64 * Storage of substituted result, n-th element is for n-th expression 65 */ 66 TheSubstitute *subst; 67 int nsubst; 68 } DictThesaurus; 69 70 71 static void 72 newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst) 73 { 74 TheLexeme *ptr; 75 76 if (d->nwrds >= d->ntwrds) 77 { 78 if (d->ntwrds == 0) 79 { 80 d->ntwrds = 16; 81 d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds); 82 } 83 else 84 { 85 d->ntwrds *= 2; 86 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds); 87 } 88 } 89 90 ptr = d->wrds + d->nwrds; 91 d->nwrds++; 92 93 ptr->lexeme = palloc(e - b + 1); 94 95 memcpy(ptr->lexeme, b, e - b); 96 ptr->lexeme[e - b] = '\0'; 97 98 ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo)); 99 100 ptr->entries->nextentry = NULL; 101 ptr->entries->idsubst = idsubst; 102 ptr->entries->posinsubst = posinsubst; 103 } 104 105 static void 106 addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis) 107 { 108 static int nres = 0; 109 static int ntres = 0; 110 TheSubstitute *ptr; 111 112 if (nwrd == 0) 113 { 114 nres = ntres = 0; 115 116 if (idsubst >= d->nsubst) 117 { 118 if (d->nsubst == 0) 119 { 120 d->nsubst = 16; 121 d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst); 122 } 123 else 124 { 125 d->nsubst *= 2; 126 d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst); 127 } 128 } 129 } 130 131 ptr = d->subst + idsubst; 132 133 ptr->lastlexeme = posinsubst - 1; 134 135 if (nres + 1 >= ntres) 136 { 137 if (ntres == 0) 138 { 139 ntres = 2; 140 ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres); 141 } 142 else 143 { 144 ntres *= 2; 145 ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres); 146 } 147 } 148 149 ptr->res[nres].lexeme = palloc(e - b + 1); 150 memcpy(ptr->res[nres].lexeme, b, e - b); 151 ptr->res[nres].lexeme[e - b] = '\0'; 152 153 ptr->res[nres].nvariant = nwrd; 154 if (useasis) 155 ptr->res[nres].flags = DT_USEASIS; 156 else 157 ptr->res[nres].flags = 0; 158 159 ptr->res[++nres].lexeme = NULL; 160 } 161 162 #define TR_WAITLEX 1 163 #define TR_INLEX 2 164 #define TR_WAITSUBS 3 165 #define TR_INSUBS 4 166 167 static void 168 thesaurusRead(const char *filename, DictThesaurus *d) 169 { 170 tsearch_readline_state trst; 171 uint32 idsubst = 0; 172 bool useasis = false; 173 char *line; 174 175 filename = get_tsearch_config_filename(filename, "ths"); 176 if (!tsearch_readline_begin(&trst, filename)) 177 ereport(ERROR, 178 (errcode(ERRCODE_CONFIG_FILE_ERROR), 179 errmsg("could not open thesaurus file \"%s\": %m", 180 filename))); 181 182 while ((line = tsearch_readline(&trst)) != NULL) 183 { 184 char *ptr; 185 int state = TR_WAITLEX; 186 char *beginwrd = NULL; 187 uint32 posinsubst = 0; 188 uint32 nwrd = 0; 189 190 ptr = line; 191 192 /* is it a comment? */ 193 while (*ptr && t_isspace(ptr)) 194 ptr += pg_mblen(ptr); 195 196 if (t_iseq(ptr, '#') || *ptr == '\0' || 197 t_iseq(ptr, '\n') || t_iseq(ptr, '\r')) 198 { 199 pfree(line); 200 continue; 201 } 202 203 while (*ptr) 204 { 205 if (state == TR_WAITLEX) 206 { 207 if (t_iseq(ptr, ':')) 208 { 209 if (posinsubst == 0) 210 ereport(ERROR, 211 (errcode(ERRCODE_CONFIG_FILE_ERROR), 212 errmsg("unexpected delimiter"))); 213 state = TR_WAITSUBS; 214 } 215 else if (!t_isspace(ptr)) 216 { 217 beginwrd = ptr; 218 state = TR_INLEX; 219 } 220 } 221 else if (state == TR_INLEX) 222 { 223 if (t_iseq(ptr, ':')) 224 { 225 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); 226 state = TR_WAITSUBS; 227 } 228 else if (t_isspace(ptr)) 229 { 230 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); 231 state = TR_WAITLEX; 232 } 233 } 234 else if (state == TR_WAITSUBS) 235 { 236 if (t_iseq(ptr, '*')) 237 { 238 useasis = true; 239 state = TR_INSUBS; 240 beginwrd = ptr + pg_mblen(ptr); 241 } 242 else if (t_iseq(ptr, '\\')) 243 { 244 useasis = false; 245 state = TR_INSUBS; 246 beginwrd = ptr + pg_mblen(ptr); 247 } 248 else if (!t_isspace(ptr)) 249 { 250 useasis = false; 251 beginwrd = ptr; 252 state = TR_INSUBS; 253 } 254 } 255 else if (state == TR_INSUBS) 256 { 257 if (t_isspace(ptr)) 258 { 259 if (ptr == beginwrd) 260 ereport(ERROR, 261 (errcode(ERRCODE_CONFIG_FILE_ERROR), 262 errmsg("unexpected end of line or lexeme"))); 263 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); 264 state = TR_WAITSUBS; 265 } 266 } 267 else 268 elog(ERROR, "unrecognized thesaurus state: %d", state); 269 270 ptr += pg_mblen(ptr); 271 } 272 273 if (state == TR_INSUBS) 274 { 275 if (ptr == beginwrd) 276 ereport(ERROR, 277 (errcode(ERRCODE_CONFIG_FILE_ERROR), 278 errmsg("unexpected end of line or lexeme"))); 279 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); 280 } 281 282 idsubst++; 283 284 if (!(nwrd && posinsubst)) 285 ereport(ERROR, 286 (errcode(ERRCODE_CONFIG_FILE_ERROR), 287 errmsg("unexpected end of line"))); 288 289 /* 290 * Note: currently, tsearch_readline can't return lines exceeding 4KB, 291 * so overflow of the word counts is impossible. But that may not 292 * always be true, so let's check. 293 */ 294 if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst) 295 ereport(ERROR, 296 (errcode(ERRCODE_CONFIG_FILE_ERROR), 297 errmsg("too many lexemes in thesaurus entry"))); 298 299 pfree(line); 300 } 301 302 d->nsubst = idsubst; 303 304 tsearch_readline_end(&trst); 305 } 306 307 static TheLexeme * 308 addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant) 309 { 310 if (*nnw >= *tnm) 311 { 312 *tnm *= 2; 313 newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm); 314 } 315 316 newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo)); 317 318 if (lexeme && lexeme->lexeme) 319 { 320 newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme); 321 newwrds[*nnw].entries->tnvariant = tnvariant; 322 } 323 else 324 { 325 newwrds[*nnw].lexeme = NULL; 326 newwrds[*nnw].entries->tnvariant = 1; 327 } 328 329 newwrds[*nnw].entries->idsubst = src->idsubst; 330 newwrds[*nnw].entries->posinsubst = src->posinsubst; 331 332 newwrds[*nnw].entries->nextentry = NULL; 333 334 (*nnw)++; 335 return newwrds; 336 } 337 338 static int 339 cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b) 340 { 341 if (a == NULL || b == NULL) 342 return 0; 343 344 if (a->idsubst == b->idsubst) 345 { 346 if (a->posinsubst == b->posinsubst) 347 { 348 if (a->tnvariant == b->tnvariant) 349 return 0; 350 351 return (a->tnvariant > b->tnvariant) ? 1 : -1; 352 } 353 354 return (a->posinsubst > b->posinsubst) ? 1 : -1; 355 } 356 357 return (a->idsubst > b->idsubst) ? 1 : -1; 358 } 359 360 static int 361 cmpLexeme(const TheLexeme *a, const TheLexeme *b) 362 { 363 if (a->lexeme == NULL) 364 { 365 if (b->lexeme == NULL) 366 return 0; 367 else 368 return 1; 369 } 370 else if (b->lexeme == NULL) 371 return -1; 372 373 return strcmp(a->lexeme, b->lexeme); 374 } 375 376 static int 377 cmpLexemeQ(const void *a, const void *b) 378 { 379 return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b); 380 } 381 382 static int 383 cmpTheLexeme(const void *a, const void *b) 384 { 385 const TheLexeme *la = (const TheLexeme *) a; 386 const TheLexeme *lb = (const TheLexeme *) b; 387 int res; 388 389 if ((res = cmpLexeme(la, lb)) != 0) 390 return res; 391 392 return -cmpLexemeInfo(la->entries, lb->entries); 393 } 394 395 static void 396 compileTheLexeme(DictThesaurus *d) 397 { 398 int i, 399 nnw = 0, 400 tnm = 16; 401 TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm), 402 *ptrwrds; 403 404 for (i = 0; i < d->nwrds; i++) 405 { 406 TSLexeme *ptr; 407 408 if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */ 409 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); 410 else 411 { 412 ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), 413 PointerGetDatum(d->subdict->dictData), 414 PointerGetDatum(d->wrds[i].lexeme), 415 Int32GetDatum(strlen(d->wrds[i].lexeme)), 416 PointerGetDatum(NULL))); 417 418 if (!ptr) 419 ereport(ERROR, 420 (errcode(ERRCODE_CONFIG_FILE_ERROR), 421 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)", 422 d->wrds[i].lexeme, 423 d->wrds[i].entries->idsubst + 1))); 424 else if (!(ptr->lexeme)) 425 ereport(ERROR, 426 (errcode(ERRCODE_CONFIG_FILE_ERROR), 427 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)", 428 d->wrds[i].lexeme, 429 d->wrds[i].entries->idsubst + 1), 430 errhint("Use \"?\" to represent a stop word within a sample phrase."))); 431 else 432 { 433 while (ptr->lexeme) 434 { 435 TSLexeme *remptr = ptr + 1; 436 int tnvar = 1; 437 int curvar = ptr->nvariant; 438 439 /* compute n words in one variant */ 440 while (remptr->lexeme) 441 { 442 if (remptr->nvariant != (remptr - 1)->nvariant) 443 break; 444 tnvar++; 445 remptr++; 446 } 447 448 remptr = ptr; 449 while (remptr->lexeme && remptr->nvariant == curvar) 450 { 451 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); 452 remptr++; 453 } 454 455 ptr = remptr; 456 } 457 } 458 } 459 460 pfree(d->wrds[i].lexeme); 461 pfree(d->wrds[i].entries); 462 } 463 464 if (d->wrds) 465 pfree(d->wrds); 466 d->wrds = newwrds; 467 d->nwrds = nnw; 468 d->ntwrds = tnm; 469 470 if (d->nwrds > 1) 471 { 472 qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme); 473 474 /* uniq */ 475 newwrds = d->wrds; 476 ptrwrds = d->wrds + 1; 477 while (ptrwrds - d->wrds < d->nwrds) 478 { 479 if (cmpLexeme(ptrwrds, newwrds) == 0) 480 { 481 if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries)) 482 { 483 ptrwrds->entries->nextentry = newwrds->entries; 484 newwrds->entries = ptrwrds->entries; 485 } 486 else 487 pfree(ptrwrds->entries); 488 489 if (ptrwrds->lexeme) 490 pfree(ptrwrds->lexeme); 491 } 492 else 493 { 494 newwrds++; 495 *newwrds = *ptrwrds; 496 } 497 498 ptrwrds++; 499 } 500 501 d->nwrds = newwrds - d->wrds + 1; 502 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds); 503 } 504 } 505 506 static void 507 compileTheSubstitute(DictThesaurus *d) 508 { 509 int i; 510 511 for (i = 0; i < d->nsubst; i++) 512 { 513 TSLexeme *rem = d->subst[i].res, 514 *outptr, 515 *inptr; 516 int n = 2; 517 518 outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n); 519 outptr->lexeme = NULL; 520 inptr = rem; 521 522 while (inptr && inptr->lexeme) 523 { 524 TSLexeme *lexized, 525 tmplex[2]; 526 527 if (inptr->flags & DT_USEASIS) 528 { /* do not lexize */ 529 tmplex[0] = *inptr; 530 tmplex[0].flags = 0; 531 tmplex[1].lexeme = NULL; 532 lexized = tmplex; 533 } 534 else 535 { 536 lexized = (TSLexeme *) DatumGetPointer( 537 FunctionCall4( 538 &(d->subdict->lexize), 539 PointerGetDatum(d->subdict->dictData), 540 PointerGetDatum(inptr->lexeme), 541 Int32GetDatum(strlen(inptr->lexeme)), 542 PointerGetDatum(NULL) 543 ) 544 ); 545 } 546 547 if (lexized && lexized->lexeme) 548 { 549 int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1; 550 551 while (lexized->lexeme) 552 { 553 if (outptr - d->subst[i].res + 1 >= n) 554 { 555 int diff = outptr - d->subst[i].res; 556 557 n *= 2; 558 d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n); 559 outptr = d->subst[i].res + diff; 560 } 561 562 *outptr = *lexized; 563 outptr->lexeme = pstrdup(lexized->lexeme); 564 565 outptr++; 566 lexized++; 567 } 568 569 if (toset > 0) 570 d->subst[i].res[toset].flags |= TSL_ADDPOS; 571 } 572 else if (lexized) 573 { 574 ereport(ERROR, 575 (errcode(ERRCODE_CONFIG_FILE_ERROR), 576 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)", 577 inptr->lexeme, i + 1))); 578 } 579 else 580 { 581 ereport(ERROR, 582 (errcode(ERRCODE_CONFIG_FILE_ERROR), 583 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)", 584 inptr->lexeme, i + 1))); 585 } 586 587 if (inptr->lexeme) 588 pfree(inptr->lexeme); 589 inptr++; 590 } 591 592 if (outptr == d->subst[i].res) 593 ereport(ERROR, 594 (errcode(ERRCODE_CONFIG_FILE_ERROR), 595 errmsg("thesaurus substitute phrase is empty (rule %d)", 596 i + 1))); 597 598 d->subst[i].reslen = outptr - d->subst[i].res; 599 600 pfree(rem); 601 } 602 } 603 604 Datum 605 thesaurus_init(PG_FUNCTION_ARGS) 606 { 607 List *dictoptions = (List *) PG_GETARG_POINTER(0); 608 DictThesaurus *d; 609 char *subdictname = NULL; 610 bool fileloaded = false; 611 ListCell *l; 612 613 d = (DictThesaurus *) palloc0(sizeof(DictThesaurus)); 614 615 foreach(l, dictoptions) 616 { 617 DefElem *defel = (DefElem *) lfirst(l); 618 619 if (strcmp(defel->defname, "dictfile") == 0) 620 { 621 if (fileloaded) 622 ereport(ERROR, 623 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 624 errmsg("multiple DictFile parameters"))); 625 thesaurusRead(defGetString(defel), d); 626 fileloaded = true; 627 } 628 else if (strcmp(defel->defname, "dictionary") == 0) 629 { 630 if (subdictname) 631 ereport(ERROR, 632 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 633 errmsg("multiple Dictionary parameters"))); 634 subdictname = pstrdup(defGetString(defel)); 635 } 636 else 637 { 638 ereport(ERROR, 639 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 640 errmsg("unrecognized Thesaurus parameter: \"%s\"", 641 defel->defname))); 642 } 643 } 644 645 if (!fileloaded) 646 ereport(ERROR, 647 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 648 errmsg("missing DictFile parameter"))); 649 if (!subdictname) 650 ereport(ERROR, 651 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 652 errmsg("missing Dictionary parameter"))); 653 654 d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false); 655 d->subdict = lookup_ts_dictionary_cache(d->subdictOid); 656 657 compileTheLexeme(d); 658 compileTheSubstitute(d); 659 660 PG_RETURN_POINTER(d); 661 } 662 663 static LexemeInfo * 664 findTheLexeme(DictThesaurus *d, char *lexeme) 665 { 666 TheLexeme key, 667 *res; 668 669 if (d->nwrds == 0) 670 return NULL; 671 672 key.lexeme = lexeme; 673 key.entries = NULL; 674 675 res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ); 676 677 if (res == NULL) 678 return NULL; 679 return res->entries; 680 } 681 682 static bool 683 matchIdSubst(LexemeInfo *stored, uint32 idsubst) 684 { 685 bool res = true; 686 687 if (stored) 688 { 689 res = false; 690 691 for (; stored; stored = stored->nextvariant) 692 if (stored->idsubst == idsubst) 693 { 694 res = true; 695 break; 696 } 697 } 698 699 return res; 700 } 701 702 static LexemeInfo * 703 findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn) 704 { 705 for (;;) 706 { 707 int i; 708 LexemeInfo *ptr = newin[0]; 709 710 for (i = 0; i < newn; i++) 711 { 712 while (newin[i] && newin[i]->idsubst < ptr->idsubst) 713 newin[i] = newin[i]->nextentry; 714 715 if (newin[i] == NULL) 716 return in; 717 718 if (newin[i]->idsubst > ptr->idsubst) 719 { 720 ptr = newin[i]; 721 i = -1; 722 continue; 723 } 724 725 while (newin[i]->idsubst == ptr->idsubst) 726 { 727 if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn) 728 { 729 ptr = newin[i]; 730 break; 731 } 732 733 newin[i] = newin[i]->nextentry; 734 if (newin[i] == NULL) 735 return in; 736 } 737 738 if (newin[i]->idsubst != ptr->idsubst) 739 { 740 ptr = newin[i]; 741 i = -1; 742 continue; 743 } 744 } 745 746 if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst))) 747 { /* found */ 748 749 ptr->nextvariant = in; 750 in = ptr; 751 } 752 753 /* step forward */ 754 for (i = 0; i < newn; i++) 755 newin[i] = newin[i]->nextentry; 756 } 757 } 758 759 static TSLexeme * 760 copyTSLexeme(TheSubstitute *ts) 761 { 762 TSLexeme *res; 763 uint16 i; 764 765 res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1)); 766 for (i = 0; i < ts->reslen; i++) 767 { 768 res[i] = ts->res[i]; 769 res[i].lexeme = pstrdup(ts->res[i].lexeme); 770 } 771 772 res[ts->reslen].lexeme = NULL; 773 774 return res; 775 } 776 777 static TSLexeme * 778 checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres) 779 { 780 *moreres = false; 781 while (info) 782 { 783 Assert(info->idsubst < d->nsubst); 784 if (info->nextvariant) 785 *moreres = true; 786 if (d->subst[info->idsubst].lastlexeme == curpos) 787 return copyTSLexeme(d->subst + info->idsubst); 788 info = info->nextvariant; 789 } 790 791 return NULL; 792 } 793 794 Datum 795 thesaurus_lexize(PG_FUNCTION_ARGS) 796 { 797 DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0); 798 DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3); 799 TSLexeme *res = NULL; 800 LexemeInfo *stored, 801 *info = NULL; 802 uint16 curpos = 0; 803 bool moreres = false; 804 805 if (PG_NARGS() != 4 || dstate == NULL) 806 elog(ERROR, "forbidden call of thesaurus or nested call"); 807 808 if (dstate->isend) 809 PG_RETURN_POINTER(NULL); 810 stored = (LexemeInfo *) dstate->private_state; 811 812 if (stored) 813 curpos = stored->posinsubst + 1; 814 815 if (!d->subdict->isvalid) 816 d->subdict = lookup_ts_dictionary_cache(d->subdictOid); 817 818 res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), 819 PointerGetDatum(d->subdict->dictData), 820 PG_GETARG_DATUM(1), 821 PG_GETARG_DATUM(2), 822 PointerGetDatum(NULL))); 823 824 if (res && res->lexeme) 825 { 826 TSLexeme *ptr = res, 827 *basevar; 828 829 while (ptr->lexeme) 830 { 831 uint16 nv = ptr->nvariant; 832 uint16 i, 833 nlex = 0; 834 LexemeInfo **infos; 835 836 basevar = ptr; 837 while (ptr->lexeme && nv == ptr->nvariant) 838 { 839 nlex++; 840 ptr++; 841 } 842 843 infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex); 844 for (i = 0; i < nlex; i++) 845 if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL) 846 break; 847 848 if (i < nlex) 849 { 850 /* no chance to find */ 851 pfree(infos); 852 continue; 853 } 854 855 info = findVariant(info, stored, curpos, infos, nlex); 856 } 857 } 858 else if (res) 859 { /* stop-word */ 860 LexemeInfo *infos = findTheLexeme(d, NULL); 861 862 info = findVariant(NULL, stored, curpos, &infos, 1); 863 } 864 else 865 { 866 info = NULL; /* word isn't recognized */ 867 } 868 869 dstate->private_state = (void *) info; 870 871 if (!info) 872 { 873 dstate->getnext = false; 874 PG_RETURN_POINTER(NULL); 875 } 876 877 if ((res = checkMatch(d, info, curpos, &moreres)) != NULL) 878 { 879 dstate->getnext = moreres; 880 PG_RETURN_POINTER(res); 881 } 882 883 dstate->getnext = true; 884 885 PG_RETURN_POINTER(NULL); 886 } 887