1 /*
2 tre-match-approx.c - TRE approximate regex matching engine
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12
13 /* AIX requires this to be the first thing in the file. */
14 #ifdef TRE_USE_ALLOCA
15 #ifndef __GNUC__
16 # if HAVE_ALLOCA_H
17 # include <alloca.h>
18 # else
19 # ifdef _AIX
20 #pragma alloca
21 # else
22 # ifndef alloca /* predefined by HP cc +Olibcalls */
23 char *alloca ();
24 # endif
25 # endif
26 # endif
27 #endif
28 #endif /* TRE_USE_ALLOCA */
29
30 /* These seem compiler/OS-specific, but unexplained
31 On Linux the first is intended to be used only with GCC.
32 #define __USE_STRING_INLINES
33 #undef __NO_INLINE__
34 */
35
36 // #include <assert.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <limits.h>
40 #ifdef HAVE_WCHAR_H
41 #include <wchar.h>
42 #endif /* HAVE_WCHAR_H */
43 #ifdef HAVE_WCTYPE_H
44 #include <wctype.h>
45 #endif /* HAVE_WCTYPE_H */
46 #ifndef TRE_WCHAR
47 #include <ctype.h>
48 #endif /* !TRE_WCHAR */
49 #ifdef HAVE_MALLOC_H
50 #include <malloc.h>
51 #endif /* HAVE_MALLOC_H */
52
53 #include "tre-internal.h"
54 #include "tre-match-utils.h"
55 #include "tre.h"
56 #include "xmalloc.h"
57
58 #define assert(a) R_assert(a)
59
60 #define TRE_M_COST 0
61 #define TRE_M_NUM_INS 1
62 #define TRE_M_NUM_DEL 2
63 #define TRE_M_NUM_SUBST 3
64 #define TRE_M_NUM_ERR 4
65 #define TRE_M_LAST 5
66
67 #define TRE_M_MAX_DEPTH 3
68
69 typedef struct {
70 /* State in the TNFA transition table. */
71 tre_tnfa_transition_t *state;
72 /* Position in input string. */
73 int pos;
74 /* Tag values. */
75 int *tags;
76 /* Matching parameters. */
77 regaparams_t params;
78 /* Nesting depth of parameters. This is used as an index in
79 the `costs' array. */
80 int depth;
81 /* Costs and counter values for different parameter nesting depths. */
82 int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
83 } tre_tnfa_approx_reach_t;
84
85
86 #ifdef TRE_DEBUG
87 /* Prints the `reach' array in a readable fashion with DPRINT. */
88 static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_approx_reach_t * reach,int pos,int num_tags)89 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
90 int pos, int num_tags)
91 {
92 int id;
93
94 /* Print each state on one line. */
95 DPRINT((" reach:\n"));
96 for (id = 0; id < tnfa->num_states; id++)
97 {
98 int i, j;
99 if (reach[id].pos < pos)
100 continue; /* Not reached. */
101 DPRINT((" %03d, costs ", id));
102 for (i = 0; i <= reach[id].depth; i++)
103 {
104 DPRINT(("["));
105 for (j = 0; j < TRE_M_LAST; j++)
106 {
107 DPRINT(("%2d", reach[id].costs[i][j]));
108 if (j + 1 < TRE_M_LAST)
109 DPRINT((","));
110 }
111 DPRINT(("]"));
112 if (i + 1 <= reach[id].depth)
113 DPRINT((", "));
114 }
115 DPRINT(("\n tags "));
116 for (i = 0; i < num_tags; i++)
117 {
118 DPRINT(("%02d", reach[id].tags[i]));
119 if (i + 1 < num_tags)
120 DPRINT((","));
121 }
122 DPRINT(("\n"));
123 }
124 DPRINT(("\n"));
125 }
126 #endif /* TRE_DEBUG */
127
128
129 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
130 array. If `pa' specifies default values, they are taken from
131 `default_params'. */
132 inline static void
tre_set_params(tre_tnfa_approx_reach_t * reach,int * pa,regaparams_t default_params)133 tre_set_params(tre_tnfa_approx_reach_t *reach,
134 int *pa, regaparams_t default_params)
135 {
136 int value;
137
138 /* If depth is increased reset costs and counters to zero for the
139 new levels. */
140 value = pa[TRE_PARAM_DEPTH];
141 assert(value <= TRE_M_MAX_DEPTH);
142 if (value > reach->depth)
143 {
144 int i, j;
145 for (i = reach->depth + 1; i <= value; i++)
146 for (j = 0; j < TRE_M_LAST; j++)
147 reach->costs[i][j] = 0;
148 }
149 reach->depth = value;
150
151 /* Set insert cost. */
152 value = pa[TRE_PARAM_COST_INS];
153 if (value == TRE_PARAM_DEFAULT)
154 reach->params.cost_ins = default_params.cost_ins;
155 else if (value != TRE_PARAM_UNSET)
156 reach->params.cost_ins = value;
157
158 /* Set delete cost. */
159 value = pa[TRE_PARAM_COST_DEL];
160 if (value == TRE_PARAM_DEFAULT)
161 reach->params.cost_del = default_params.cost_del;
162 else if (value != TRE_PARAM_UNSET)
163 reach->params.cost_del = value;
164
165 /* Set substitute cost. */
166 value = pa[TRE_PARAM_COST_SUBST];
167 if (value == TRE_PARAM_DEFAULT)
168 reach->params.cost_subst = default_params.cost_subst;
169 else
170 reach->params.cost_subst = value;
171
172 /* Set maximum cost. */
173 value = pa[TRE_PARAM_COST_MAX];
174 if (value == TRE_PARAM_DEFAULT)
175 reach->params.max_cost = default_params.max_cost;
176 else if (value != TRE_PARAM_UNSET)
177 reach->params.max_cost = value;
178
179 /* Set maximum inserts. */
180 value = pa[TRE_PARAM_MAX_INS];
181 if (value == TRE_PARAM_DEFAULT)
182 reach->params.max_ins = default_params.max_ins;
183 else if (value != TRE_PARAM_UNSET)
184 reach->params.max_ins = value;
185
186 /* Set maximum deletes. */
187 value = pa[TRE_PARAM_MAX_DEL];
188 if (value == TRE_PARAM_DEFAULT)
189 reach->params.max_del = default_params.max_del;
190 else if (value != TRE_PARAM_UNSET)
191 reach->params.max_del = value;
192
193 /* Set maximum substitutes. */
194 value = pa[TRE_PARAM_MAX_SUBST];
195 if (value == TRE_PARAM_DEFAULT)
196 reach->params.max_subst = default_params.max_subst;
197 else if (value != TRE_PARAM_UNSET)
198 reach->params.max_subst = value;
199
200 /* Set maximum number of errors. */
201 value = pa[TRE_PARAM_MAX_ERR];
202 if (value == TRE_PARAM_DEFAULT)
203 reach->params.max_err = default_params.max_err;
204 else if (value != TRE_PARAM_UNSET)
205 reach->params.max_err = value;
206 }
207
208 reg_errcode_t
tre_tnfa_run_approx(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,int * match_tags,regamatch_t * match,regaparams_t default_params,int eflags,int * match_end_ofs)209 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
210 tre_str_type_t type, int *match_tags,
211 regamatch_t *match, regaparams_t default_params,
212 int eflags, int *match_end_ofs)
213 {
214 /* State variables required by GET_NEXT_WCHAR. */
215 tre_char_t prev_c = 0, next_c = 0;
216 const char *str_byte = string;
217 int pos = -1;
218 unsigned int pos_add_next = 1;
219 #ifdef TRE_WCHAR
220 const wchar_t *str_wide = string;
221 #ifdef TRE_MBSTATE
222 mbstate_t mbstate;
223 #endif /* !TRE_WCHAR */
224 #endif /* TRE_WCHAR */
225 int reg_notbol = eflags & REG_NOTBOL;
226 int reg_noteol = eflags & REG_NOTEOL;
227 int reg_newline = tnfa->cflags & REG_NEWLINE;
228 int str_user_end = 0;
229
230 int prev_pos;
231
232 /* Number of tags. */
233 int num_tags;
234 /* The reach tables. */
235 tre_tnfa_approx_reach_t *reach, *reach_next;
236 /* Tag array for temporary use. */
237 int *tmp_tags;
238
239 /* End offset of best match so far, or -1 if no match found yet. */
240 int match_eo = -1;
241 /* Costs of the match. */
242 int match_costs[TRE_M_LAST];
243
244 /* Space for temporary data required for matching. */
245 unsigned char *buf;
246
247 int i, id;
248
249 if (!match_tags)
250 num_tags = 0;
251 else
252 num_tags = tnfa->num_tags;
253
254 #ifdef TRE_MBSTATE
255 memset(&mbstate, '\0', sizeof(mbstate));
256 #endif /* TRE_MBSTATE */
257
258 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
259 "match_tags %p\n",
260 type, len, eflags,
261 match_tags));
262 DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
263 default_params.max_cost,
264 default_params.cost_ins,
265 default_params.cost_del,
266 default_params.cost_subst));
267
268 /* Allocate memory for temporary data required for matching. This needs to
269 be done for every matching operation to be thread safe. This allocates
270 everything in a single large block from the stack frame using alloca()
271 or with malloc() if alloca is unavailable. */
272 {
273 unsigned char *buf_cursor;
274 /* Space needed for one array of tags. */
275 int tag_bytes = sizeof(*tmp_tags) * num_tags;
276 /* Space needed for one reach table. */
277 int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
278 /* Total space needed. */
279 int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
280 /* Add some extra to make sure we can align the pointers. The multiplier
281 used here must be equal to the number of ALIGN calls below. */
282 total_bytes += (sizeof(long) - 1) * 3;
283
284 /* Allocate the memory. */
285 #ifdef TRE_USE_ALLOCA
286 buf = alloca(total_bytes);
287 #else /* !TRE_USE_ALLOCA */
288 buf = xmalloc((unsigned)total_bytes);
289 #endif /* !TRE_USE_ALLOCA */
290 if (!buf)
291 return REG_ESPACE;
292 memset(buf, 0, (size_t)total_bytes);
293
294 /* Allocate `tmp_tags' from `buf'. */
295 tmp_tags = (void *)buf;
296 buf_cursor = buf + tag_bytes;
297 buf_cursor += ALIGN(buf_cursor, long);
298
299 /* Allocate `reach' from `buf'. */
300 reach = (void *)buf_cursor;
301 buf_cursor += reach_bytes;
302 buf_cursor += ALIGN(buf_cursor, long);
303
304 /* Allocate `reach_next' from `buf'. */
305 reach_next = (void *)buf_cursor;
306 buf_cursor += reach_bytes;
307 buf_cursor += ALIGN(buf_cursor, long);
308
309 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
310 for (i = 0; i < tnfa->num_states; i++)
311 {
312 reach[i].tags = (void *)buf_cursor;
313 buf_cursor += tag_bytes;
314 reach_next[i].tags = (void *)buf_cursor;
315 buf_cursor += tag_bytes;
316 }
317 assert(buf_cursor <= buf + total_bytes);
318 }
319
320 for (i = 0; i < TRE_M_LAST; i++)
321 match_costs[i] = INT_MAX;
322
323 /* Mark the reach arrays empty. */
324 for (i = 0; i < tnfa->num_states; i++)
325 reach[i].pos = reach_next[i].pos = -2;
326
327 prev_pos = pos;
328 GET_NEXT_WCHAR();
329 pos = 0;
330
331 while (/*CONSTCOND*/(void)1,1)
332 {
333 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
334
335 /* Add initial states to `reach_next' if an exact match has not yet
336 been found. */
337 if (match_costs[TRE_M_COST] > 0)
338 {
339 tre_tnfa_transition_t *trans;
340 DPRINT((" init"));
341 for (trans = tnfa->initial; trans->state; trans++)
342 {
343 int stateid = trans->state_id;
344
345 /* If this state is not currently in `reach_next', add it
346 there. */
347 if (reach_next[stateid].pos < pos)
348 {
349 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
350 {
351 /* Assertions failed, don't add this state. */
352 DPRINT((" !%d (assert)", stateid));
353 continue;
354 }
355 DPRINT((" %d", stateid));
356 reach_next[stateid].state = trans->state;
357 reach_next[stateid].pos = pos;
358
359 /* Compute tag values after this transition. */
360 for (i = 0; i < num_tags; i++)
361 reach_next[stateid].tags[i] = -1;
362
363 if (trans->tags)
364 for (i = 0; trans->tags[i] >= 0; i++)
365 if (trans->tags[i] < num_tags)
366 reach_next[stateid].tags[trans->tags[i]] = pos;
367
368 /* Set the parameters, depth, and costs. */
369 reach_next[stateid].params = default_params;
370 reach_next[stateid].depth = 0;
371 for (i = 0; i < TRE_M_LAST; i++)
372 reach_next[stateid].costs[0][i] = 0;
373 if (trans->params)
374 tre_set_params(&reach_next[stateid], trans->params,
375 default_params);
376
377 /* If this is the final state, mark the exact match. */
378 if (trans->state == tnfa->final)
379 {
380 match_eo = pos;
381 for (i = 0; i < num_tags; i++)
382 match_tags[i] = reach_next[stateid].tags[i];
383 for (i = 0; i < TRE_M_LAST; i++)
384 match_costs[i] = 0;
385 }
386 }
387 }
388 DPRINT(("\n"));
389 }
390
391
392 /* Handle inserts. This is done by pretending there's an epsilon
393 transition from each state in `reach' back to the same state.
394 We don't need to worry about the final state here; this will never
395 give a better match than what we already have. */
396 for (id = 0; id < tnfa->num_states; id++)
397 {
398 int depth;
399 int cost, cost0;
400
401 if (reach[id].pos != prev_pos)
402 {
403 DPRINT((" insert: %d not reached\n", id));
404 continue; /* Not reached. */
405 }
406
407 depth = reach[id].depth;
408
409 /* Compute and check cost at current depth. */
410 cost = reach[id].costs[depth][TRE_M_COST];
411 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
412 cost += reach[id].params.cost_ins;
413 if (cost > reach[id].params.max_cost)
414 continue; /* Cost too large. */
415
416 /* Check number of inserts at current depth. */
417 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
418 > reach[id].params.max_ins)
419 continue; /* Too many inserts. */
420
421 /* Check total number of errors at current depth. */
422 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
423 > reach[id].params.max_err)
424 continue; /* Too many errors. */
425
426 /* Compute overall cost. */
427 cost0 = cost;
428 if (depth > 0)
429 {
430 cost0 = reach[id].costs[0][TRE_M_COST];
431 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
432 cost0 += reach[id].params.cost_ins;
433 else
434 cost0 += default_params.cost_ins;
435 }
436
437 DPRINT((" insert: from %d to %d, cost %d: ", id, id,
438 reach[id].costs[depth][TRE_M_COST]));
439 if (reach_next[id].pos == pos
440 && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
441 {
442 DPRINT(("lose\n"));
443 continue;
444 }
445 DPRINT(("win\n"));
446
447 /* Copy state, position, tags, parameters, and depth. */
448 reach_next[id].state = reach[id].state;
449 reach_next[id].pos = pos;
450 for (i = 0; i < num_tags; i++)
451 reach_next[id].tags[i] = reach[id].tags[i];
452 reach_next[id].params = reach[id].params;
453 reach_next[id].depth = reach[id].depth;
454
455 /* Set the costs after this transition. */
456 memcpy(reach_next[id].costs, reach[id].costs,
457 sizeof(reach_next[id].costs[0][0])
458 * TRE_M_LAST * (depth + 1));
459 reach_next[id].costs[depth][TRE_M_COST] = cost;
460 reach_next[id].costs[depth][TRE_M_NUM_INS]++;
461 reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
462 if (depth > 0)
463 {
464 reach_next[id].costs[0][TRE_M_COST] = cost0;
465 reach_next[id].costs[0][TRE_M_NUM_INS]++;
466 reach_next[id].costs[0][TRE_M_NUM_ERR]++;
467 }
468
469 }
470
471
472 /* Handle deletes. This is done by traversing through the whole TNFA
473 pretending that all transitions are epsilon transitions, until
474 no more states can be reached with better costs. */
475 {
476 int rb_size = 256;
477 tre_tnfa_approx_reach_t *static_ringbuffer[256];
478 tre_tnfa_approx_reach_t **ringbuffer = static_ringbuffer;
479 tre_tnfa_approx_reach_t **deque_start, **deque_end;
480
481 deque_start = deque_end = ringbuffer;
482
483 /* Add all states in `reach_next' to the deque. */
484 for (id = 0; id < tnfa->num_states; id++)
485 {
486 if (reach_next[id].pos != pos)
487 continue;
488 *deque_end = &reach_next[id];
489 deque_end++;
490 /* check if we need to resize the buffer */
491 if (deque_end >= (ringbuffer + rb_size)) {
492 tre_tnfa_approx_reach_t **larger_buf;
493 rb_size += 512;
494 larger_buf = (tre_tnfa_approx_reach_t **)
495 ((ringbuffer == static_ringbuffer) ?
496 xmalloc(sizeof(tre_tnfa_approx_reach_t *) * rb_size) :
497 xrealloc(ringbuffer, sizeof(tre_tnfa_approx_reach_t *) * rb_size));
498 if (!larger_buf) {
499 DPRINT(("tre_tnfa_run_approx: cannot resize ring buffer\n"));
500 if (ringbuffer != static_ringbuffer)
501 xfree(ringbuffer);
502 #ifndef TRE_USE_ALLOCA
503 if (buf)
504 xfree(buf);
505 #endif /* !TRE_USE_ALLOCA */
506 return REG_ESPACE;
507 }
508 deque_start = deque_start - ringbuffer + larger_buf;
509 deque_end = deque_end - ringbuffer + larger_buf;
510 if (ringbuffer == static_ringbuffer) /* when switching from stack to heap we need to copy */
511 memcpy(larger_buf, ringbuffer, sizeof(static_ringbuffer));
512 ringbuffer = larger_buf;
513 }
514 }
515
516 /* Repeat until the deque is empty. */
517 while (deque_end != deque_start)
518 {
519 tre_tnfa_approx_reach_t *reach_p;
520 int depth;
521 int cost, cost0;
522 tre_tnfa_transition_t *trans;
523
524 /* Pop the first item off the deque. */
525 reach_p = *deque_start;
526 id = (int)(reach_p - reach_next);
527 depth = reach_p->depth;
528
529 /* Compute cost at current depth. */
530 cost = reach_p->costs[depth][TRE_M_COST];
531 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
532 cost += reach_p->params.cost_del;
533
534 /* Check cost, number of deletes, and total number of errors
535 at current depth. */
536 if (cost > reach_p->params.max_cost
537 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
538 > reach_p->params.max_del)
539 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
540 > reach_p->params.max_err))
541 {
542 /* Too many errors or cost too large. */
543 DPRINT((" delete: from %03d: cost too large\n", id));
544 deque_start++;
545 if (deque_start >= (ringbuffer + rb_size))
546 deque_start = ringbuffer;
547 continue;
548 }
549
550 /* Compute overall cost. */
551 cost0 = cost;
552 if (depth > 0)
553 {
554 cost0 = reach_p->costs[0][TRE_M_COST];
555 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
556 cost0 += reach_p->params.cost_del;
557 else
558 cost0 += default_params.cost_del;
559 }
560
561 for (trans = reach_p->state; trans->state; trans++)
562 {
563 int dest_id = trans->state_id;
564 DPRINT((" delete: from %03d to %03d, cost %d (%d): ",
565 id, dest_id, cost0, reach_p->params.max_cost));
566
567 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
568 {
569 DPRINT(("assertion failed\n"));
570 continue;
571 }
572
573 /* Compute tag values after this transition. */
574 for (i = 0; i < num_tags; i++)
575 tmp_tags[i] = reach_p->tags[i];
576 if (trans->tags)
577 for (i = 0; trans->tags[i] >= 0; i++)
578 if (trans->tags[i] < num_tags)
579 tmp_tags[trans->tags[i]] = pos;
580
581 /* If another path has also reached this state, choose the one
582 with the smallest cost or best tags if costs are equal. */
583 if (reach_next[dest_id].pos == pos
584 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
585 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
586 && (!match_tags
587 || !tre_tag_order(num_tags,
588 tnfa->tag_directions,
589 tmp_tags,
590 reach_next[dest_id].tags)))))
591 {
592 DPRINT(("lose, cost0 %d, have %d\n",
593 cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
594 continue;
595 }
596 DPRINT(("win\n"));
597
598 /* Set state, position, tags, parameters, depth, and costs. */
599 reach_next[dest_id].state = trans->state;
600 reach_next[dest_id].pos = pos;
601 for (i = 0; i < num_tags; i++)
602 reach_next[dest_id].tags[i] = tmp_tags[i];
603
604 reach_next[dest_id].params = reach_p->params;
605 if (trans->params)
606 tre_set_params(&reach_next[dest_id], trans->params,
607 default_params);
608
609 reach_next[dest_id].depth = reach_p->depth;
610 memcpy(&reach_next[dest_id].costs,
611 reach_p->costs,
612 sizeof(reach_p->costs[0][0])
613 * TRE_M_LAST * (depth + 1));
614 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
615 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
616 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
617 if (depth > 0)
618 {
619 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
620 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
621 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
622 }
623
624 if (trans->state == tnfa->final
625 && (match_eo < 0
626 || match_costs[TRE_M_COST] > cost0
627 || (match_costs[TRE_M_COST] == cost0
628 && (num_tags > 0
629 && tmp_tags[0] <= match_tags[0]))))
630 {
631 DPRINT((" setting new match at %d, cost %d\n",
632 pos, cost0));
633 match_eo = pos;
634 memcpy(match_costs, reach_next[dest_id].costs[0],
635 sizeof(match_costs[0]) * TRE_M_LAST);
636 for (i = 0; i < num_tags; i++)
637 match_tags[i] = tmp_tags[i];
638 }
639
640 /* Add to the end of the deque. */
641 *deque_end = &reach_next[dest_id];
642 deque_end++;
643 if (deque_end >= (ringbuffer + rb_size))
644 deque_end = ringbuffer;
645 assert(deque_end != deque_start);
646 }
647 deque_start++;
648 if (deque_start >= (ringbuffer + rb_size))
649 deque_start = ringbuffer;
650 }
651
652 if (ringbuffer != static_ringbuffer)
653 xfree(ringbuffer);
654 }
655
656 #ifdef TRE_DEBUG
657 tre_print_reach(tnfa, reach_next, pos, num_tags);
658 #endif /* TRE_DEBUG */
659
660 /* Check for end of string. */
661 if (len < 0)
662 {
663 if (type == STR_USER)
664 {
665 if (str_user_end)
666 break;
667 }
668 else if (next_c == L'\0')
669 break;
670 }
671 else
672 {
673 if (pos >= len)
674 break;
675 }
676
677 prev_pos = pos;
678 GET_NEXT_WCHAR();
679
680 /* Swap `reach' and `reach_next'. */
681 {
682 tre_tnfa_approx_reach_t *tmp;
683 tmp = reach;
684 reach = reach_next;
685 reach_next = tmp;
686 }
687
688 /* Handle exact matches and substitutions. */
689 for (id = 0; id < tnfa->num_states; id++)
690 {
691 tre_tnfa_transition_t *trans;
692
693 if (reach[id].pos < prev_pos)
694 continue; /* Not reached. */
695 for (trans = reach[id].state; trans->state; trans++)
696 {
697 int dest_id;
698 int depth;
699 int cost, cost0, err;
700
701 if (trans->assertions
702 && (CHECK_ASSERTIONS(trans->assertions)
703 || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
704 {
705 DPRINT((" exact, from %d: assert failed\n", id));
706 continue;
707 }
708
709 depth = reach[id].depth;
710 dest_id = trans->state_id;
711
712 cost = reach[id].costs[depth][TRE_M_COST];
713 cost0 = reach[id].costs[0][TRE_M_COST];
714 err = 0;
715
716 if (trans->code_min > (tre_cint_t)prev_c
717 || trans->code_max < (tre_cint_t)prev_c)
718 {
719 /* Handle substitutes. The required character was not in
720 the string, so match it in place of whatever was supposed
721 to be there and increase costs accordingly. */
722 err = 1;
723
724 /* Compute and check cost at current depth. */
725 cost = reach[id].costs[depth][TRE_M_COST];
726 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
727 cost += reach[id].params.cost_subst;
728 if (cost > reach[id].params.max_cost)
729 continue; /* Cost too large. */
730
731 /* Check number of substitutes at current depth. */
732 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
733 > reach[id].params.max_subst)
734 continue; /* Too many substitutes. */
735
736 /* Check total number of errors at current depth. */
737 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
738 > reach[id].params.max_err)
739 continue; /* Too many errors. */
740
741 /* Compute overall cost. */
742 cost0 = cost;
743 if (depth > 0)
744 {
745 cost0 = reach[id].costs[0][TRE_M_COST];
746 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
747 cost0 += reach[id].params.cost_subst;
748 else
749 cost0 += default_params.cost_subst;
750 }
751 DPRINT((" subst, from %03d to %03d, cost %d: ",
752 id, dest_id, cost0));
753 }
754 else
755 DPRINT((" exact, from %03d to %03d, cost %d: ",
756 id, dest_id, cost0));
757
758 /* Compute tag values after this transition. */
759 for (i = 0; i < num_tags; i++)
760 tmp_tags[i] = reach[id].tags[i];
761 if (trans->tags)
762 for (i = 0; trans->tags[i] >= 0; i++)
763 if (trans->tags[i] < num_tags)
764 tmp_tags[trans->tags[i]] = pos;
765
766 /* If another path has also reached this state, choose the
767 one with the smallest cost or best tags if costs are equal. */
768 if (reach_next[dest_id].pos == pos
769 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
770 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
771 && !tre_tag_order(num_tags, tnfa->tag_directions,
772 tmp_tags,
773 reach_next[dest_id].tags))))
774 {
775 DPRINT(("lose\n"));
776 continue;
777 }
778 DPRINT(("win %d %d\n",
779 reach_next[dest_id].pos,
780 reach_next[dest_id].costs[0][TRE_M_COST]));
781
782 /* Set state, position, tags, and depth. */
783 reach_next[dest_id].state = trans->state;
784 reach_next[dest_id].pos = pos;
785 for (i = 0; i < num_tags; i++)
786 reach_next[dest_id].tags[i] = tmp_tags[i];
787 reach_next[dest_id].depth = reach[id].depth;
788
789 /* Set parameters. */
790 reach_next[dest_id].params = reach[id].params;
791 if (trans->params)
792 tre_set_params(&reach_next[dest_id], trans->params,
793 default_params);
794
795 /* Set the costs after this transition. */
796 memcpy(&reach_next[dest_id].costs,
797 reach[id].costs,
798 sizeof(reach[id].costs[0][0])
799 * TRE_M_LAST * (depth + 1));
800 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
801 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
802 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
803 if (depth > 0)
804 {
805 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
806 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
807 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
808 }
809
810 if (trans->state == tnfa->final
811 && (match_eo < 0
812 || cost0 < match_costs[TRE_M_COST]
813 || (cost0 == match_costs[TRE_M_COST]
814 && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
815 {
816 DPRINT((" setting new match at %d, cost %d\n",
817 pos, cost0));
818 match_eo = pos;
819 for (i = 0; i < TRE_M_LAST; i++)
820 match_costs[i] = reach_next[dest_id].costs[0][i];
821 for (i = 0; i < num_tags; i++)
822 match_tags[i] = tmp_tags[i];
823 }
824 }
825 }
826 }
827
828 DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
829 match_costs[TRE_M_COST]));
830
831 #ifndef TRE_USE_ALLOCA
832 if (buf)
833 xfree(buf);
834 #endif /* !TRE_USE_ALLOCA */
835
836 match->cost = match_costs[TRE_M_COST];
837 match->num_ins = match_costs[TRE_M_NUM_INS];
838 match->num_del = match_costs[TRE_M_NUM_DEL];
839 match->num_subst = match_costs[TRE_M_NUM_SUBST];
840 *match_end_ofs = match_eo;
841
842 return match_eo >= 0 ? REG_OK : REG_NOMATCH;
843 }
844