1 /* 2 tre-match-approx.c - TRE approximate regex matching engine 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifdef HAVE_CONFIG_H 10 #include <config.h> 11 #endif /* HAVE_CONFIG_H */ 12 13 /* AIX requires this to be the first thing in the file. */ 14 #ifdef TRE_USE_ALLOCA 15 #ifndef __GNUC__ 16 # if HAVE_ALLOCA_H 17 # include <alloca.h> 18 # else 19 # ifdef _AIX 20 #pragma alloca 21 # else 22 # ifndef alloca /* predefined by HP cc +Olibcalls */ 23 char *alloca (); 24 # endif 25 # endif 26 # endif 27 #endif 28 #endif /* TRE_USE_ALLOCA */ 29 30 #define __USE_STRING_INLINES 31 #undef __NO_INLINE__ 32 33 #include <assert.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <limits.h> 37 #ifdef HAVE_WCHAR_H 38 #include <wchar.h> 39 #endif /* HAVE_WCHAR_H */ 40 #ifdef HAVE_WCTYPE_H 41 #include <wctype.h> 42 #endif /* HAVE_WCTYPE_H */ 43 #ifndef TRE_WCHAR 44 #include <ctype.h> 45 #endif /* !TRE_WCHAR */ 46 #ifdef HAVE_MALLOC_H 47 #include <malloc.h> 48 #endif /* HAVE_MALLOC_H */ 49 50 #include "tre-internal.h" 51 #include "tre-match-utils.h" 52 #include "tre.h" 53 #include "xmalloc.h" 54 55 #define TRE_M_COST 0 56 #define TRE_M_NUM_INS 1 57 #define TRE_M_NUM_DEL 2 58 #define TRE_M_NUM_SUBST 3 59 #define TRE_M_NUM_ERR 4 60 #define TRE_M_LAST 5 61 62 #define TRE_M_MAX_DEPTH 3 63 64 typedef struct { 65 /* State in the TNFA transition table. */ 66 tre_tnfa_transition_t *state; 67 /* Position in input string. */ 68 int pos; 69 /* Tag values. */ 70 int *tags; 71 /* Matching parameters. */ 72 regaparams_t params; 73 /* Nesting depth of parameters. This is used as an index in 74 the `costs' array. */ 75 int depth; 76 /* Costs and counter values for different parameter nesting depths. */ 77 int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST]; 78 } tre_tnfa_approx_reach_t; 79 80 81 #ifdef TRE_DEBUG 82 /* Prints the `reach' array in a readable fashion with DPRINT. */ 83 static void 84 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach, 85 int pos, int num_tags) 86 { 87 int id; 88 89 /* Print each state on one line. */ 90 DPRINT((" reach:\n")); 91 for (id = 0; id < tnfa->num_states; id++) 92 { 93 int i, j; 94 if (reach[id].pos < pos) 95 continue; /* Not reached. */ 96 DPRINT((" %03d, costs ", id)); 97 for (i = 0; i <= reach[id].depth; i++) 98 { 99 DPRINT(("[")); 100 for (j = 0; j < TRE_M_LAST; j++) 101 { 102 DPRINT(("%2d", reach[id].costs[i][j])); 103 if (j + 1 < TRE_M_LAST) 104 DPRINT((",")); 105 } 106 DPRINT(("]")); 107 if (i + 1 <= reach[id].depth) 108 DPRINT((", ")); 109 } 110 DPRINT(("\n tags ")); 111 for (i = 0; i < num_tags; i++) 112 { 113 DPRINT(("%02d", reach[id].tags[i])); 114 if (i + 1 < num_tags) 115 DPRINT((",")); 116 } 117 DPRINT(("\n")); 118 } 119 DPRINT(("\n")); 120 } 121 #endif /* TRE_DEBUG */ 122 123 124 /* Sets the matching parameters in `reach' to the ones defined in the `pa' 125 array. If `pa' specifies default values, they are taken from 126 `default_params'. */ 127 inline static void 128 tre_set_params(tre_tnfa_approx_reach_t *reach, 129 int *pa, regaparams_t default_params) 130 { 131 int value; 132 133 /* If depth is increased reset costs and counters to zero for the 134 new levels. */ 135 value = pa[TRE_PARAM_DEPTH]; 136 assert(value <= TRE_M_MAX_DEPTH); 137 if (value > reach->depth) 138 { 139 int i, j; 140 for (i = reach->depth + 1; i <= value; i++) 141 for (j = 0; j < TRE_M_LAST; j++) 142 reach->costs[i][j] = 0; 143 } 144 reach->depth = value; 145 146 /* Set insert cost. */ 147 value = pa[TRE_PARAM_COST_INS]; 148 if (value == TRE_PARAM_DEFAULT) 149 reach->params.cost_ins = default_params.cost_ins; 150 else if (value != TRE_PARAM_UNSET) 151 reach->params.cost_ins = value; 152 153 /* Set delete cost. */ 154 value = pa[TRE_PARAM_COST_DEL]; 155 if (value == TRE_PARAM_DEFAULT) 156 reach->params.cost_del = default_params.cost_del; 157 else if (value != TRE_PARAM_UNSET) 158 reach->params.cost_del = value; 159 160 /* Set substitute cost. */ 161 value = pa[TRE_PARAM_COST_SUBST]; 162 if (value == TRE_PARAM_DEFAULT) 163 reach->params.cost_subst = default_params.cost_subst; 164 else 165 reach->params.cost_subst = value; 166 167 /* Set maximum cost. */ 168 value = pa[TRE_PARAM_COST_MAX]; 169 if (value == TRE_PARAM_DEFAULT) 170 reach->params.max_cost = default_params.max_cost; 171 else if (value != TRE_PARAM_UNSET) 172 reach->params.max_cost = value; 173 174 /* Set maximum inserts. */ 175 value = pa[TRE_PARAM_MAX_INS]; 176 if (value == TRE_PARAM_DEFAULT) 177 reach->params.max_ins = default_params.max_ins; 178 else if (value != TRE_PARAM_UNSET) 179 reach->params.max_ins = value; 180 181 /* Set maximum deletes. */ 182 value = pa[TRE_PARAM_MAX_DEL]; 183 if (value == TRE_PARAM_DEFAULT) 184 reach->params.max_del = default_params.max_del; 185 else if (value != TRE_PARAM_UNSET) 186 reach->params.max_del = value; 187 188 /* Set maximum substitutes. */ 189 value = pa[TRE_PARAM_MAX_SUBST]; 190 if (value == TRE_PARAM_DEFAULT) 191 reach->params.max_subst = default_params.max_subst; 192 else if (value != TRE_PARAM_UNSET) 193 reach->params.max_subst = value; 194 195 /* Set maximum number of errors. */ 196 value = pa[TRE_PARAM_MAX_ERR]; 197 if (value == TRE_PARAM_DEFAULT) 198 reach->params.max_err = default_params.max_err; 199 else if (value != TRE_PARAM_UNSET) 200 reach->params.max_err = value; 201 } 202 203 reg_errcode_t 204 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, 205 tre_str_type_t type, int *match_tags, 206 regamatch_t *match, regaparams_t default_params, 207 int eflags, int *match_end_ofs) 208 { 209 /* State variables required by GET_NEXT_WCHAR. */ 210 tre_char_t prev_c = 0, next_c = 0; 211 const char *str_byte = string; 212 int pos = -1; 213 unsigned int pos_add_next = 1; 214 #ifdef TRE_WCHAR 215 const wchar_t *str_wide = string; 216 #ifdef TRE_MBSTATE 217 mbstate_t mbstate; 218 #endif /* !TRE_WCHAR */ 219 #endif /* TRE_WCHAR */ 220 int reg_notbol = eflags & REG_NOTBOL; 221 int reg_noteol = eflags & REG_NOTEOL; 222 int reg_newline = tnfa->cflags & REG_NEWLINE; 223 int str_user_end = 0; 224 225 int prev_pos; 226 227 /* Number of tags. */ 228 int num_tags; 229 /* The reach tables. */ 230 tre_tnfa_approx_reach_t *reach, *reach_next; 231 /* Tag array for temporary use. */ 232 int *tmp_tags; 233 234 /* End offset of best match so far, or -1 if no match found yet. */ 235 int match_eo = -1; 236 /* Costs of the match. */ 237 int match_costs[TRE_M_LAST]; 238 239 /* Space for temporary data required for matching. */ 240 unsigned char *buf; 241 242 int i, id; 243 244 if (!match_tags) 245 num_tags = 0; 246 else 247 num_tags = tnfa->num_tags; 248 249 #ifdef TRE_MBSTATE 250 memset(&mbstate, '\0', sizeof(mbstate)); 251 #endif /* TRE_MBSTATE */ 252 253 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, " 254 "match_tags %p\n", 255 type, len, eflags, 256 match_tags)); 257 DPRINT(("max cost %d, ins %d, del %d, subst %d\n", 258 default_params.max_cost, 259 default_params.cost_ins, 260 default_params.cost_del, 261 default_params.cost_subst)); 262 263 /* Allocate memory for temporary data required for matching. This needs to 264 be done for every matching operation to be thread safe. This allocates 265 everything in a single large block from the stack frame using alloca() 266 or with malloc() if alloca is unavailable. */ 267 { 268 unsigned char *buf_cursor; 269 /* Space needed for one array of tags. */ 270 int tag_bytes = sizeof(*tmp_tags) * num_tags; 271 /* Space needed for one reach table. */ 272 int reach_bytes = sizeof(*reach_next) * tnfa->num_states; 273 /* Total space needed. */ 274 int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes; 275 /* Add some extra to make sure we can align the pointers. The multiplier 276 used here must be equal to the number of ALIGN calls below. */ 277 total_bytes += (sizeof(long) - 1) * 3; 278 279 /* Allocate the memory. */ 280 #ifdef TRE_USE_ALLOCA 281 buf = alloca(total_bytes); 282 #else /* !TRE_USE_ALLOCA */ 283 buf = xmalloc((unsigned)total_bytes); 284 #endif /* !TRE_USE_ALLOCA */ 285 if (!buf) 286 return REG_ESPACE; 287 memset(buf, 0, (size_t)total_bytes); 288 289 /* Allocate `tmp_tags' from `buf'. */ 290 tmp_tags = (void *)buf; 291 buf_cursor = buf + tag_bytes; 292 buf_cursor += ALIGN(buf_cursor, long); 293 294 /* Allocate `reach' from `buf'. */ 295 reach = (void *)buf_cursor; 296 buf_cursor += reach_bytes; 297 buf_cursor += ALIGN(buf_cursor, long); 298 299 /* Allocate `reach_next' from `buf'. */ 300 reach_next = (void *)buf_cursor; 301 buf_cursor += reach_bytes; 302 buf_cursor += ALIGN(buf_cursor, long); 303 304 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */ 305 for (i = 0; i < tnfa->num_states; i++) 306 { 307 reach[i].tags = (void *)buf_cursor; 308 buf_cursor += tag_bytes; 309 reach_next[i].tags = (void *)buf_cursor; 310 buf_cursor += tag_bytes; 311 } 312 assert(buf_cursor <= buf + total_bytes); 313 } 314 315 for (i = 0; i < TRE_M_LAST; i++) 316 match_costs[i] = INT_MAX; 317 318 /* Mark the reach arrays empty. */ 319 for (i = 0; i < tnfa->num_states; i++) 320 reach[i].pos = reach_next[i].pos = -2; 321 322 prev_pos = pos; 323 GET_NEXT_WCHAR(); 324 pos = 0; 325 326 while (/*CONSTCOND*/1) 327 { 328 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c)); 329 330 /* Add initial states to `reach_next' if an exact match has not yet 331 been found. */ 332 if (match_costs[TRE_M_COST] > 0) 333 { 334 tre_tnfa_transition_t *trans; 335 DPRINT((" init")); 336 for (trans = tnfa->initial; trans->state; trans++) 337 { 338 int stateid = trans->state_id; 339 340 /* If this state is not currently in `reach_next', add it 341 there. */ 342 if (reach_next[stateid].pos < pos) 343 { 344 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) 345 { 346 /* Assertions failed, don't add this state. */ 347 DPRINT((" !%d (assert)", stateid)); 348 continue; 349 } 350 DPRINT((" %d", stateid)); 351 reach_next[stateid].state = trans->state; 352 reach_next[stateid].pos = pos; 353 354 /* Compute tag values after this transition. */ 355 for (i = 0; i < num_tags; i++) 356 reach_next[stateid].tags[i] = -1; 357 358 if (trans->tags) 359 for (i = 0; trans->tags[i] >= 0; i++) 360 if (trans->tags[i] < num_tags) 361 reach_next[stateid].tags[trans->tags[i]] = pos; 362 363 /* Set the parameters, depth, and costs. */ 364 reach_next[stateid].params = default_params; 365 reach_next[stateid].depth = 0; 366 for (i = 0; i < TRE_M_LAST; i++) 367 reach_next[stateid].costs[0][i] = 0; 368 if (trans->params) 369 tre_set_params(&reach_next[stateid], trans->params, 370 default_params); 371 372 /* If this is the final state, mark the exact match. */ 373 if (trans->state == tnfa->final) 374 { 375 match_eo = pos; 376 for (i = 0; i < num_tags; i++) 377 match_tags[i] = reach_next[stateid].tags[i]; 378 for (i = 0; i < TRE_M_LAST; i++) 379 match_costs[i] = 0; 380 } 381 } 382 } 383 DPRINT(("\n")); 384 } 385 386 387 /* Handle inserts. This is done by pretending there's an epsilon 388 transition from each state in `reach' back to the same state. 389 We don't need to worry about the final state here; this will never 390 give a better match than what we already have. */ 391 for (id = 0; id < tnfa->num_states; id++) 392 { 393 int depth; 394 int cost, cost0; 395 396 if (reach[id].pos != prev_pos) 397 { 398 DPRINT((" insert: %d not reached\n", id)); 399 continue; /* Not reached. */ 400 } 401 402 depth = reach[id].depth; 403 404 /* Compute and check cost at current depth. */ 405 cost = reach[id].costs[depth][TRE_M_COST]; 406 if (reach[id].params.cost_ins != TRE_PARAM_UNSET) 407 cost += reach[id].params.cost_ins; 408 if (cost > reach[id].params.max_cost) 409 continue; /* Cost too large. */ 410 411 /* Check number of inserts at current depth. */ 412 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1 413 > reach[id].params.max_ins) 414 continue; /* Too many inserts. */ 415 416 /* Check total number of errors at current depth. */ 417 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 418 > reach[id].params.max_err) 419 continue; /* Too many errors. */ 420 421 /* Compute overall cost. */ 422 cost0 = cost; 423 if (depth > 0) 424 { 425 cost0 = reach[id].costs[0][TRE_M_COST]; 426 if (reach[id].params.cost_ins != TRE_PARAM_UNSET) 427 cost0 += reach[id].params.cost_ins; 428 else 429 cost0 += default_params.cost_ins; 430 } 431 432 DPRINT((" insert: from %d to %d, cost %d: ", id, id, 433 reach[id].costs[depth][TRE_M_COST])); 434 if (reach_next[id].pos == pos 435 && (cost0 >= reach_next[id].costs[0][TRE_M_COST])) 436 { 437 DPRINT(("lose\n")); 438 continue; 439 } 440 DPRINT(("win\n")); 441 442 /* Copy state, position, tags, parameters, and depth. */ 443 reach_next[id].state = reach[id].state; 444 reach_next[id].pos = pos; 445 for (i = 0; i < num_tags; i++) 446 reach_next[id].tags[i] = reach[id].tags[i]; 447 reach_next[id].params = reach[id].params; 448 reach_next[id].depth = reach[id].depth; 449 450 /* Set the costs after this transition. */ 451 memcpy(reach_next[id].costs, reach[id].costs, 452 sizeof(reach_next[id].costs[0][0]) 453 * TRE_M_LAST * (depth + 1)); 454 reach_next[id].costs[depth][TRE_M_COST] = cost; 455 reach_next[id].costs[depth][TRE_M_NUM_INS]++; 456 reach_next[id].costs[depth][TRE_M_NUM_ERR]++; 457 if (depth > 0) 458 { 459 reach_next[id].costs[0][TRE_M_COST] = cost0; 460 reach_next[id].costs[0][TRE_M_NUM_INS]++; 461 reach_next[id].costs[0][TRE_M_NUM_ERR]++; 462 } 463 464 } 465 466 467 /* Handle deletes. This is done by traversing through the whole TNFA 468 pretending that all transitions are epsilon transitions, until 469 no more states can be reached with better costs. */ 470 { 471 /* XXX - dynamic ringbuffer size */ 472 tre_tnfa_approx_reach_t *ringbuffer[512]; 473 tre_tnfa_approx_reach_t **deque_start, **deque_end; 474 475 deque_start = deque_end = ringbuffer; 476 477 /* Add all states in `reach_next' to the deque. */ 478 for (id = 0; id < tnfa->num_states; id++) 479 { 480 if (reach_next[id].pos != pos) 481 continue; 482 *deque_end = &reach_next[id]; 483 deque_end++; 484 assert(deque_end != deque_start); 485 } 486 487 /* Repeat until the deque is empty. */ 488 while (deque_end != deque_start) 489 { 490 tre_tnfa_approx_reach_t *reach_p; 491 int depth; 492 int cost, cost0; 493 tre_tnfa_transition_t *trans; 494 495 /* Pop the first item off the deque. */ 496 reach_p = *deque_start; 497 id = reach_p - reach_next; 498 depth = reach_p->depth; 499 500 /* Compute cost at current depth. */ 501 cost = reach_p->costs[depth][TRE_M_COST]; 502 if (reach_p->params.cost_del != TRE_PARAM_UNSET) 503 cost += reach_p->params.cost_del; 504 505 /* Check cost, number of deletes, and total number of errors 506 at current depth. */ 507 if (cost > reach_p->params.max_cost 508 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1 509 > reach_p->params.max_del) 510 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1 511 > reach_p->params.max_err)) 512 { 513 /* Too many errors or cost too large. */ 514 DPRINT((" delete: from %03d: cost too large\n", id)); 515 deque_start++; 516 if (deque_start >= (ringbuffer + 512)) 517 deque_start = ringbuffer; 518 continue; 519 } 520 521 /* Compute overall cost. */ 522 cost0 = cost; 523 if (depth > 0) 524 { 525 cost0 = reach_p->costs[0][TRE_M_COST]; 526 if (reach_p->params.cost_del != TRE_PARAM_UNSET) 527 cost0 += reach_p->params.cost_del; 528 else 529 cost0 += default_params.cost_del; 530 } 531 532 for (trans = reach_p->state; trans->state; trans++) 533 { 534 int dest_id = trans->state_id; 535 DPRINT((" delete: from %03d to %03d, cost %d (%d): ", 536 id, dest_id, cost0, reach_p->params.max_cost)); 537 538 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) 539 { 540 DPRINT(("assertion failed\n")); 541 continue; 542 } 543 544 /* Compute tag values after this transition. */ 545 for (i = 0; i < num_tags; i++) 546 tmp_tags[i] = reach_p->tags[i]; 547 if (trans->tags) 548 for (i = 0; trans->tags[i] >= 0; i++) 549 if (trans->tags[i] < num_tags) 550 tmp_tags[trans->tags[i]] = pos; 551 552 /* If another path has also reached this state, choose the one 553 with the smallest cost or best tags if costs are equal. */ 554 if (reach_next[dest_id].pos == pos 555 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] 556 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] 557 && (!match_tags 558 || !tre_tag_order(num_tags, 559 tnfa->tag_directions, 560 tmp_tags, 561 reach_next[dest_id].tags))))) 562 { 563 DPRINT(("lose, cost0 %d, have %d\n", 564 cost0, reach_next[dest_id].costs[0][TRE_M_COST])); 565 continue; 566 } 567 DPRINT(("win\n")); 568 569 /* Set state, position, tags, parameters, depth, and costs. */ 570 reach_next[dest_id].state = trans->state; 571 reach_next[dest_id].pos = pos; 572 for (i = 0; i < num_tags; i++) 573 reach_next[dest_id].tags[i] = tmp_tags[i]; 574 575 reach_next[dest_id].params = reach_p->params; 576 if (trans->params) 577 tre_set_params(&reach_next[dest_id], trans->params, 578 default_params); 579 580 reach_next[dest_id].depth = reach_p->depth; 581 memcpy(&reach_next[dest_id].costs, 582 reach_p->costs, 583 sizeof(reach_p->costs[0][0]) 584 * TRE_M_LAST * (depth + 1)); 585 reach_next[dest_id].costs[depth][TRE_M_COST] = cost; 586 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++; 587 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++; 588 if (depth > 0) 589 { 590 reach_next[dest_id].costs[0][TRE_M_COST] = cost0; 591 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++; 592 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++; 593 } 594 595 if (trans->state == tnfa->final 596 && (match_eo < 0 597 || match_costs[TRE_M_COST] > cost0 598 || (match_costs[TRE_M_COST] == cost0 599 && (num_tags > 0 600 && tmp_tags[0] <= match_tags[0])))) 601 { 602 DPRINT((" setting new match at %d, cost %d\n", 603 pos, cost0)); 604 match_eo = pos; 605 memcpy(match_costs, reach_next[dest_id].costs[0], 606 sizeof(match_costs[0]) * TRE_M_LAST); 607 for (i = 0; i < num_tags; i++) 608 match_tags[i] = tmp_tags[i]; 609 } 610 611 /* Add to the end of the deque. */ 612 *deque_end = &reach_next[dest_id]; 613 deque_end++; 614 if (deque_end >= (ringbuffer + 512)) 615 deque_end = ringbuffer; 616 assert(deque_end != deque_start); 617 } 618 deque_start++; 619 if (deque_start >= (ringbuffer + 512)) 620 deque_start = ringbuffer; 621 } 622 623 } 624 625 #ifdef TRE_DEBUG 626 tre_print_reach(tnfa, reach_next, pos, num_tags); 627 #endif /* TRE_DEBUG */ 628 629 /* Check for end of string. */ 630 if (len < 0) 631 { 632 if (type == STR_USER) 633 { 634 if (str_user_end) 635 break; 636 } 637 else if (next_c == L'\0') 638 break; 639 } 640 else 641 { 642 if (pos >= len) 643 break; 644 } 645 646 prev_pos = pos; 647 GET_NEXT_WCHAR(); 648 649 /* Swap `reach' and `reach_next'. */ 650 { 651 tre_tnfa_approx_reach_t *tmp; 652 tmp = reach; 653 reach = reach_next; 654 reach_next = tmp; 655 } 656 657 /* Handle exact matches and substitutions. */ 658 for (id = 0; id < tnfa->num_states; id++) 659 { 660 tre_tnfa_transition_t *trans; 661 662 if (reach[id].pos < prev_pos) 663 continue; /* Not reached. */ 664 for (trans = reach[id].state; trans->state; trans++) 665 { 666 int dest_id; 667 int depth; 668 int cost, cost0, err; 669 670 if (trans->assertions 671 && (CHECK_ASSERTIONS(trans->assertions) 672 || CHECK_CHAR_CLASSES(trans, tnfa, eflags))) 673 { 674 DPRINT((" exact, from %d: assert failed\n", id)); 675 continue; 676 } 677 678 depth = reach[id].depth; 679 dest_id = trans->state_id; 680 681 cost = reach[id].costs[depth][TRE_M_COST]; 682 cost0 = reach[id].costs[0][TRE_M_COST]; 683 err = 0; 684 685 if (trans->code_min > (tre_cint_t)prev_c 686 || trans->code_max < (tre_cint_t)prev_c) 687 { 688 /* Handle substitutes. The required character was not in 689 the string, so match it in place of whatever was supposed 690 to be there and increase costs accordingly. */ 691 err = 1; 692 693 /* Compute and check cost at current depth. */ 694 cost = reach[id].costs[depth][TRE_M_COST]; 695 if (reach[id].params.cost_subst != TRE_PARAM_UNSET) 696 cost += reach[id].params.cost_subst; 697 if (cost > reach[id].params.max_cost) 698 continue; /* Cost too large. */ 699 700 /* Check number of substitutes at current depth. */ 701 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1 702 > reach[id].params.max_subst) 703 continue; /* Too many substitutes. */ 704 705 /* Check total number of errors at current depth. */ 706 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 707 > reach[id].params.max_err) 708 continue; /* Too many errors. */ 709 710 /* Compute overall cost. */ 711 cost0 = cost; 712 if (depth > 0) 713 { 714 cost0 = reach[id].costs[0][TRE_M_COST]; 715 if (reach[id].params.cost_subst != TRE_PARAM_UNSET) 716 cost0 += reach[id].params.cost_subst; 717 else 718 cost0 += default_params.cost_subst; 719 } 720 DPRINT((" subst, from %03d to %03d, cost %d: ", 721 id, dest_id, cost0)); 722 } 723 else 724 DPRINT((" exact, from %03d to %03d, cost %d: ", 725 id, dest_id, cost0)); 726 727 /* Compute tag values after this transition. */ 728 for (i = 0; i < num_tags; i++) 729 tmp_tags[i] = reach[id].tags[i]; 730 if (trans->tags) 731 for (i = 0; trans->tags[i] >= 0; i++) 732 if (trans->tags[i] < num_tags) 733 tmp_tags[trans->tags[i]] = pos; 734 735 /* If another path has also reached this state, choose the 736 one with the smallest cost or best tags if costs are equal. */ 737 if (reach_next[dest_id].pos == pos 738 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] 739 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] 740 && !tre_tag_order(num_tags, tnfa->tag_directions, 741 tmp_tags, 742 reach_next[dest_id].tags)))) 743 { 744 DPRINT(("lose\n")); 745 continue; 746 } 747 DPRINT(("win %d %d\n", 748 reach_next[dest_id].pos, 749 reach_next[dest_id].costs[0][TRE_M_COST])); 750 751 /* Set state, position, tags, and depth. */ 752 reach_next[dest_id].state = trans->state; 753 reach_next[dest_id].pos = pos; 754 for (i = 0; i < num_tags; i++) 755 reach_next[dest_id].tags[i] = tmp_tags[i]; 756 reach_next[dest_id].depth = reach[id].depth; 757 758 /* Set parameters. */ 759 reach_next[dest_id].params = reach[id].params; 760 if (trans->params) 761 tre_set_params(&reach_next[dest_id], trans->params, 762 default_params); 763 764 /* Set the costs after this transition. */ 765 memcpy(&reach_next[dest_id].costs, 766 reach[id].costs, 767 sizeof(reach[id].costs[0][0]) 768 * TRE_M_LAST * (depth + 1)); 769 reach_next[dest_id].costs[depth][TRE_M_COST] = cost; 770 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err; 771 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err; 772 if (depth > 0) 773 { 774 reach_next[dest_id].costs[0][TRE_M_COST] = cost0; 775 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err; 776 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err; 777 } 778 779 if (trans->state == tnfa->final 780 && (match_eo < 0 781 || cost0 < match_costs[TRE_M_COST] 782 || (cost0 == match_costs[TRE_M_COST] 783 && num_tags > 0 && tmp_tags[0] <= match_tags[0]))) 784 { 785 DPRINT((" setting new match at %d, cost %d\n", 786 pos, cost0)); 787 match_eo = pos; 788 for (i = 0; i < TRE_M_LAST; i++) 789 match_costs[i] = reach_next[dest_id].costs[0][i]; 790 for (i = 0; i < num_tags; i++) 791 match_tags[i] = tmp_tags[i]; 792 } 793 } 794 } 795 } 796 797 DPRINT(("match end offset = %d, match cost = %d\n", match_eo, 798 match_costs[TRE_M_COST])); 799 800 #ifndef TRE_USE_ALLOCA 801 if (buf) 802 xfree(buf); 803 #endif /* !TRE_USE_ALLOCA */ 804 805 match->cost = match_costs[TRE_M_COST]; 806 match->num_ins = match_costs[TRE_M_NUM_INS]; 807 match->num_del = match_costs[TRE_M_NUM_DEL]; 808 match->num_subst = match_costs[TRE_M_NUM_SUBST]; 809 *match_end_ofs = match_eo; 810 811 return match_eo >= 0 ? REG_OK : REG_NOMATCH; 812 } 813