1 /*
2   tre-match-approx.c - TRE approximate regex matching engine
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 
13 /* AIX requires this to be the first thing in the file.	 */
14 #ifdef TRE_USE_ALLOCA
15 #ifndef __GNUC__
16 # if HAVE_ALLOCA_H
17 #  include <alloca.h>
18 # else
19 #  ifdef _AIX
20  #pragma alloca
21 #  else
22 #   ifndef alloca /* predefined by HP cc +Olibcalls */
23 char *alloca ();
24 #   endif
25 #  endif
26 # endif
27 #endif
28 #endif /* TRE_USE_ALLOCA */
29 
30 /* These seem compiler/OS-specific, but unexplained
31 On Linux the first is intended to be used only with GCC.
32 #define __USE_STRING_INLINES
33 #undef __NO_INLINE__
34 */
35 
36 // #include <assert.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <limits.h>
40 #ifdef HAVE_WCHAR_H
41 #include <wchar.h>
42 #endif /* HAVE_WCHAR_H */
43 #ifdef HAVE_WCTYPE_H
44 #include <wctype.h>
45 #endif /* HAVE_WCTYPE_H */
46 #ifndef TRE_WCHAR
47 #include <ctype.h>
48 #endif /* !TRE_WCHAR */
49 #ifdef HAVE_MALLOC_H
50 #include <malloc.h>
51 #endif /* HAVE_MALLOC_H */
52 
53 #include "tre-internal.h"
54 #include "tre-match-utils.h"
55 #include "tre.h"
56 #include "xmalloc.h"
57 
58 #define assert(a) R_assert(a)
59 
60 #define TRE_M_COST	0
61 #define TRE_M_NUM_INS	1
62 #define TRE_M_NUM_DEL	2
63 #define TRE_M_NUM_SUBST 3
64 #define TRE_M_NUM_ERR	4
65 #define TRE_M_LAST	5
66 
67 #define TRE_M_MAX_DEPTH 3
68 
69 typedef struct {
70   /* State in the TNFA transition table. */
71   tre_tnfa_transition_t *state;
72   /* Position in input string. */
73   int pos;
74   /* Tag values. */
75   int *tags;
76   /* Matching parameters. */
77   regaparams_t params;
78   /* Nesting depth of parameters.  This is used as an index in
79      the `costs' array. */
80   int depth;
81   /* Costs and counter values for different parameter nesting depths. */
82   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
83 } tre_tnfa_approx_reach_t;
84 
85 
86 #ifdef TRE_DEBUG
87 /* Prints the `reach' array in a readable fashion with DPRINT. */
88 static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_approx_reach_t * reach,int pos,int num_tags)89 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
90 		int pos, int num_tags)
91 {
92   int id;
93 
94   /* Print each state on one line. */
95   DPRINT(("  reach:\n"));
96   for (id = 0; id < tnfa->num_states; id++)
97     {
98       int i, j;
99       if (reach[id].pos < pos)
100 	continue;  /* Not reached. */
101       DPRINT(("	 %03d, costs ", id));
102       for (i = 0; i <= reach[id].depth; i++)
103 	{
104 	  DPRINT(("["));
105 	  for (j = 0; j < TRE_M_LAST; j++)
106 	    {
107 	      DPRINT(("%2d", reach[id].costs[i][j]));
108 	      if (j + 1 < TRE_M_LAST)
109 		DPRINT((","));
110 	    }
111 	  DPRINT(("]"));
112 	  if (i + 1 <= reach[id].depth)
113 	    DPRINT((", "));
114 	}
115       DPRINT(("\n	tags "));
116       for (i = 0; i < num_tags; i++)
117 	{
118 	  DPRINT(("%02d", reach[id].tags[i]));
119 	  if (i + 1 < num_tags)
120 	    DPRINT((","));
121 	}
122       DPRINT(("\n"));
123     }
124   DPRINT(("\n"));
125 }
126 #endif /* TRE_DEBUG */
127 
128 
129 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
130    array.  If `pa' specifies default values, they are taken from
131    `default_params'. */
132 inline static void
tre_set_params(tre_tnfa_approx_reach_t * reach,int * pa,regaparams_t default_params)133 tre_set_params(tre_tnfa_approx_reach_t *reach,
134 	       int *pa, regaparams_t default_params)
135 {
136   int value;
137 
138   /* If depth is increased reset costs and counters to zero for the
139      new levels. */
140   value = pa[TRE_PARAM_DEPTH];
141   assert(value <= TRE_M_MAX_DEPTH);
142   if (value > reach->depth)
143     {
144       int i, j;
145       for (i = reach->depth + 1; i <= value; i++)
146 	for (j = 0; j < TRE_M_LAST; j++)
147 	  reach->costs[i][j] = 0;
148     }
149   reach->depth = value;
150 
151   /* Set insert cost. */
152   value = pa[TRE_PARAM_COST_INS];
153   if (value == TRE_PARAM_DEFAULT)
154     reach->params.cost_ins = default_params.cost_ins;
155   else if (value != TRE_PARAM_UNSET)
156     reach->params.cost_ins = value;
157 
158   /* Set delete cost. */
159   value = pa[TRE_PARAM_COST_DEL];
160   if (value == TRE_PARAM_DEFAULT)
161     reach->params.cost_del = default_params.cost_del;
162   else if (value != TRE_PARAM_UNSET)
163     reach->params.cost_del = value;
164 
165   /* Set substitute cost. */
166   value = pa[TRE_PARAM_COST_SUBST];
167   if (value == TRE_PARAM_DEFAULT)
168     reach->params.cost_subst = default_params.cost_subst;
169   else
170     reach->params.cost_subst = value;
171 
172   /* Set maximum cost. */
173   value = pa[TRE_PARAM_COST_MAX];
174   if (value == TRE_PARAM_DEFAULT)
175     reach->params.max_cost = default_params.max_cost;
176   else if (value != TRE_PARAM_UNSET)
177     reach->params.max_cost = value;
178 
179   /* Set maximum inserts. */
180   value = pa[TRE_PARAM_MAX_INS];
181   if (value == TRE_PARAM_DEFAULT)
182     reach->params.max_ins = default_params.max_ins;
183   else if (value != TRE_PARAM_UNSET)
184     reach->params.max_ins = value;
185 
186   /* Set maximum deletes. */
187   value = pa[TRE_PARAM_MAX_DEL];
188   if (value == TRE_PARAM_DEFAULT)
189     reach->params.max_del = default_params.max_del;
190   else if (value != TRE_PARAM_UNSET)
191     reach->params.max_del = value;
192 
193   /* Set maximum substitutes. */
194   value = pa[TRE_PARAM_MAX_SUBST];
195   if (value == TRE_PARAM_DEFAULT)
196     reach->params.max_subst = default_params.max_subst;
197   else if (value != TRE_PARAM_UNSET)
198     reach->params.max_subst = value;
199 
200   /* Set maximum number of errors. */
201   value = pa[TRE_PARAM_MAX_ERR];
202   if (value == TRE_PARAM_DEFAULT)
203     reach->params.max_err = default_params.max_err;
204   else if (value != TRE_PARAM_UNSET)
205     reach->params.max_err = value;
206 }
207 
208 reg_errcode_t
tre_tnfa_run_approx(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,int * match_tags,regamatch_t * match,regaparams_t default_params,int eflags,int * match_end_ofs)209 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
210 		    tre_str_type_t type, int *match_tags,
211 		    regamatch_t *match, regaparams_t default_params,
212 		    int eflags, int *match_end_ofs)
213 {
214   /* State variables required by GET_NEXT_WCHAR. */
215   tre_char_t prev_c = 0, next_c = 0;
216   const char *str_byte = string;
217   int pos = -1;
218   unsigned int pos_add_next = 1;
219 #ifdef TRE_WCHAR
220   const wchar_t *str_wide = string;
221 #ifdef TRE_MBSTATE
222   mbstate_t mbstate;
223 #endif /* !TRE_WCHAR */
224 #endif /* TRE_WCHAR */
225   int reg_notbol = eflags & REG_NOTBOL;
226   int reg_noteol = eflags & REG_NOTEOL;
227   int reg_newline = tnfa->cflags & REG_NEWLINE;
228   int str_user_end = 0;
229 
230   int prev_pos;
231 
232   /* Number of tags. */
233   int num_tags;
234   /* The reach tables. */
235   tre_tnfa_approx_reach_t *reach, *reach_next;
236   /* Tag array for temporary use. */
237   int *tmp_tags;
238 
239   /* End offset of best match so far, or -1 if no match found yet. */
240   int match_eo = -1;
241   /* Costs of the match. */
242   int match_costs[TRE_M_LAST];
243 
244   /* Space for temporary data required for matching. */
245   unsigned char *buf;
246 
247   int i, id;
248 
249   if (!match_tags)
250     num_tags = 0;
251   else
252     num_tags = tnfa->num_tags;
253 
254 #ifdef TRE_MBSTATE
255   memset(&mbstate, '\0', sizeof(mbstate));
256 #endif /* TRE_MBSTATE */
257 
258   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
259 	  "match_tags %p\n",
260 	  type, len, eflags,
261 	  match_tags));
262   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
263 	  default_params.max_cost,
264 	  default_params.cost_ins,
265 	  default_params.cost_del,
266 	  default_params.cost_subst));
267 
268   /* Allocate memory for temporary data required for matching.	This needs to
269      be done for every matching operation to be thread safe.  This allocates
270      everything in a single large block from the stack frame using alloca()
271      or with malloc() if alloca is unavailable. */
272   {
273     unsigned char *buf_cursor;
274     /* Space needed for one array of tags. */
275     int tag_bytes = sizeof(*tmp_tags) * num_tags;
276     /* Space needed for one reach table. */
277     int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
278     /* Total space needed. */
279     int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
280     /* Add some extra to make sure we can align the pointers.  The multiplier
281        used here must be equal to the number of ALIGN calls below. */
282     total_bytes += (sizeof(long) - 1) * 3;
283 
284     /* Allocate the memory. */
285 #ifdef TRE_USE_ALLOCA
286     buf = alloca(total_bytes);
287 #else /* !TRE_USE_ALLOCA */
288     buf = xmalloc((unsigned)total_bytes);
289 #endif /* !TRE_USE_ALLOCA */
290     if (!buf)
291       return REG_ESPACE;
292     memset(buf, 0, (size_t)total_bytes);
293 
294     /* Allocate `tmp_tags' from `buf'. */
295     tmp_tags = (void *)buf;
296     buf_cursor = buf + tag_bytes;
297     buf_cursor += ALIGN(buf_cursor, long);
298 
299     /* Allocate `reach' from `buf'. */
300     reach = (void *)buf_cursor;
301     buf_cursor += reach_bytes;
302     buf_cursor += ALIGN(buf_cursor, long);
303 
304     /* Allocate `reach_next' from `buf'. */
305     reach_next = (void *)buf_cursor;
306     buf_cursor += reach_bytes;
307     buf_cursor += ALIGN(buf_cursor, long);
308 
309     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
310     for (i = 0; i < tnfa->num_states; i++)
311       {
312 	reach[i].tags = (void *)buf_cursor;
313 	buf_cursor += tag_bytes;
314 	reach_next[i].tags = (void *)buf_cursor;
315 	buf_cursor += tag_bytes;
316       }
317     assert(buf_cursor <= buf + total_bytes);
318   }
319 
320   for (i = 0; i < TRE_M_LAST; i++)
321     match_costs[i] = INT_MAX;
322 
323   /* Mark the reach arrays empty. */
324   for (i = 0; i < tnfa->num_states; i++)
325     reach[i].pos = reach_next[i].pos = -2;
326 
327   prev_pos = pos;
328   GET_NEXT_WCHAR();
329   pos = 0;
330 
331   while (/*CONSTCOND*/(void)1,1)
332     {
333       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
334 
335       /* Add initial states to `reach_next' if an exact match has not yet
336 	 been found. */
337       if (match_costs[TRE_M_COST] > 0)
338 	{
339 	  tre_tnfa_transition_t *trans;
340 	  DPRINT(("  init"));
341 	  for (trans = tnfa->initial; trans->state; trans++)
342 	    {
343 	      int stateid = trans->state_id;
344 
345 	      /* If this state is not currently in `reach_next', add it
346 		 there. */
347 	      if (reach_next[stateid].pos < pos)
348 		{
349 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
350 		    {
351 		      /* Assertions failed, don't add this state. */
352 		      DPRINT((" !%d (assert)", stateid));
353 		      continue;
354 		    }
355 		  DPRINT((" %d", stateid));
356 		  reach_next[stateid].state = trans->state;
357 		  reach_next[stateid].pos = pos;
358 
359 		  /* Compute tag values after this transition. */
360 		  for (i = 0; i < num_tags; i++)
361 		    reach_next[stateid].tags[i] = -1;
362 
363 		  if (trans->tags)
364 		    for (i = 0; trans->tags[i] >= 0; i++)
365 		      if (trans->tags[i] < num_tags)
366 			reach_next[stateid].tags[trans->tags[i]] = pos;
367 
368 		  /* Set the parameters, depth, and costs. */
369 		  reach_next[stateid].params = default_params;
370 		  reach_next[stateid].depth = 0;
371 		  for (i = 0; i < TRE_M_LAST; i++)
372 		    reach_next[stateid].costs[0][i] = 0;
373 		  if (trans->params)
374 		    tre_set_params(&reach_next[stateid], trans->params,
375 				   default_params);
376 
377 		  /* If this is the final state, mark the exact match. */
378 		  if (trans->state == tnfa->final)
379 		    {
380 		      match_eo = pos;
381 		      for (i = 0; i < num_tags; i++)
382 			match_tags[i] = reach_next[stateid].tags[i];
383 		      for (i = 0; i < TRE_M_LAST; i++)
384 			match_costs[i] = 0;
385 		    }
386 		}
387 	    }
388 	    DPRINT(("\n"));
389 	}
390 
391 
392       /* Handle inserts.  This is done by pretending there's an epsilon
393 	 transition from each state in `reach' back to the same state.
394 	 We don't need to worry about the final state here; this will never
395 	 give a better match than what we already have. */
396       for (id = 0; id < tnfa->num_states; id++)
397 	{
398 	  int depth;
399 	  int cost, cost0;
400 
401 	  if (reach[id].pos != prev_pos)
402 	    {
403 	      DPRINT(("	 insert: %d not reached\n", id));
404 	      continue;	 /* Not reached. */
405 	    }
406 
407 	  depth = reach[id].depth;
408 
409 	  /* Compute and check cost at current depth. */
410 	  cost = reach[id].costs[depth][TRE_M_COST];
411 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
412 	    cost += reach[id].params.cost_ins;
413 	  if (cost > reach[id].params.max_cost)
414 	    continue;  /* Cost too large. */
415 
416 	  /* Check number of inserts at current depth. */
417 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
418 	      > reach[id].params.max_ins)
419 	    continue;  /* Too many inserts. */
420 
421 	  /* Check total number of errors at current depth. */
422 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
423 	      > reach[id].params.max_err)
424 	    continue;  /* Too many errors. */
425 
426 	  /* Compute overall cost. */
427 	  cost0 = cost;
428 	  if (depth > 0)
429 	    {
430 	      cost0 = reach[id].costs[0][TRE_M_COST];
431 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
432 		cost0 += reach[id].params.cost_ins;
433 	      else
434 		cost0 += default_params.cost_ins;
435 	    }
436 
437 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
438 		  reach[id].costs[depth][TRE_M_COST]));
439 	  if (reach_next[id].pos == pos
440 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
441 	    {
442 	      DPRINT(("lose\n"));
443 	      continue;
444 	    }
445 	  DPRINT(("win\n"));
446 
447 	  /* Copy state, position, tags, parameters, and depth. */
448 	  reach_next[id].state = reach[id].state;
449 	  reach_next[id].pos = pos;
450 	  for (i = 0; i < num_tags; i++)
451 	    reach_next[id].tags[i] = reach[id].tags[i];
452 	  reach_next[id].params = reach[id].params;
453 	  reach_next[id].depth = reach[id].depth;
454 
455 	  /* Set the costs after this transition. */
456 	  memcpy(reach_next[id].costs, reach[id].costs,
457 		 sizeof(reach_next[id].costs[0][0])
458 		 * TRE_M_LAST * (depth + 1));
459 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
460 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
461 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
462 	  if (depth > 0)
463 	    {
464 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
465 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
466 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
467 	    }
468 
469 	}
470 
471 
472       /* Handle deletes.  This is done by traversing through the whole TNFA
473 	 pretending that all transitions are epsilon transitions, until
474 	 no more states can be reached with better costs. */
475       {
476 	int rb_size = 256;
477 	tre_tnfa_approx_reach_t *static_ringbuffer[256];
478 	tre_tnfa_approx_reach_t **ringbuffer = static_ringbuffer;
479 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
480 
481 	deque_start = deque_end = ringbuffer;
482 
483 	/* Add all states in `reach_next' to the deque. */
484 	for (id = 0; id < tnfa->num_states; id++)
485 	  {
486 	    if (reach_next[id].pos != pos)
487 	      continue;
488 	    *deque_end = &reach_next[id];
489 	    deque_end++;
490 	    /* check if we need to resize the buffer */
491 	    if (deque_end >= (ringbuffer + rb_size)) {
492 	      tre_tnfa_approx_reach_t **larger_buf;
493 	      rb_size += 512;
494 	      larger_buf = (tre_tnfa_approx_reach_t **)
495 		((ringbuffer == static_ringbuffer) ?
496 		 xmalloc(sizeof(tre_tnfa_approx_reach_t *) * rb_size) :
497 		 xrealloc(ringbuffer, sizeof(tre_tnfa_approx_reach_t *) * rb_size));
498 	      if (!larger_buf) {
499 		DPRINT(("tre_tnfa_run_approx: cannot resize ring buffer\n"));
500 		if (ringbuffer != static_ringbuffer)
501 		  xfree(ringbuffer);
502 #ifndef TRE_USE_ALLOCA
503 		if (buf)
504 		  xfree(buf);
505 #endif /* !TRE_USE_ALLOCA */
506 		return REG_ESPACE;
507 	      }
508 	      deque_start = deque_start - ringbuffer + larger_buf;
509 	      deque_end = deque_end - ringbuffer + larger_buf;
510 	      if (ringbuffer == static_ringbuffer) /* when switching from stack to heap we need to copy */
511 		memcpy(larger_buf, ringbuffer, sizeof(static_ringbuffer));
512 	      ringbuffer = larger_buf;
513 	    }
514 	  }
515 
516 	/* Repeat until the deque is empty. */
517 	while (deque_end != deque_start)
518 	  {
519 	    tre_tnfa_approx_reach_t *reach_p;
520 	    int depth;
521 	    int cost, cost0;
522 	    tre_tnfa_transition_t *trans;
523 
524 	    /* Pop the first item off the deque. */
525 	    reach_p = *deque_start;
526 	    id = (int)(reach_p - reach_next);
527 	    depth = reach_p->depth;
528 
529 	    /* Compute cost at current depth. */
530 	    cost = reach_p->costs[depth][TRE_M_COST];
531 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
532 	      cost += reach_p->params.cost_del;
533 
534 	    /* Check cost, number of deletes, and total number of errors
535 	       at current depth. */
536 	    if (cost > reach_p->params.max_cost
537 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
538 		    > reach_p->params.max_del)
539 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
540 		    > reach_p->params.max_err))
541 	      {
542 		/* Too many errors or cost too large. */
543 		DPRINT(("  delete: from %03d: cost too large\n", id));
544 		deque_start++;
545 		if (deque_start >= (ringbuffer + rb_size))
546 		  deque_start = ringbuffer;
547 		continue;
548 	      }
549 
550 	    /* Compute overall cost. */
551 	    cost0 = cost;
552 	    if (depth > 0)
553 	      {
554 		cost0 = reach_p->costs[0][TRE_M_COST];
555 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
556 		  cost0 += reach_p->params.cost_del;
557 		else
558 		  cost0 += default_params.cost_del;
559 	      }
560 
561 	    for (trans = reach_p->state; trans->state; trans++)
562 	      {
563 		int dest_id = trans->state_id;
564 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
565 			id, dest_id, cost0, reach_p->params.max_cost));
566 
567 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
568 		  {
569 		    DPRINT(("assertion failed\n"));
570 		    continue;
571 		  }
572 
573 		/* Compute tag values after this transition. */
574 		for (i = 0; i < num_tags; i++)
575 		  tmp_tags[i] = reach_p->tags[i];
576 		if (trans->tags)
577 		  for (i = 0; trans->tags[i] >= 0; i++)
578 		    if (trans->tags[i] < num_tags)
579 		      tmp_tags[trans->tags[i]] = pos;
580 
581 		/* If another path has also reached this state, choose the one
582 		   with the smallest cost or best tags if costs are equal. */
583 		if (reach_next[dest_id].pos == pos
584 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
585 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
586 			    && (!match_tags
587 				|| !tre_tag_order(num_tags,
588 						  tnfa->tag_directions,
589 						  tmp_tags,
590 						  reach_next[dest_id].tags)))))
591 		  {
592 		    DPRINT(("lose, cost0 %d, have %d\n",
593 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
594 		    continue;
595 		  }
596 		DPRINT(("win\n"));
597 
598 		/* Set state, position, tags, parameters, depth, and costs. */
599 		reach_next[dest_id].state = trans->state;
600 		reach_next[dest_id].pos = pos;
601 		for (i = 0; i < num_tags; i++)
602 		  reach_next[dest_id].tags[i] = tmp_tags[i];
603 
604 		reach_next[dest_id].params = reach_p->params;
605 		if (trans->params)
606 		  tre_set_params(&reach_next[dest_id], trans->params,
607 				 default_params);
608 
609 		reach_next[dest_id].depth = reach_p->depth;
610 		memcpy(&reach_next[dest_id].costs,
611 		       reach_p->costs,
612 		       sizeof(reach_p->costs[0][0])
613 		       * TRE_M_LAST * (depth + 1));
614 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
615 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
616 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
617 		if (depth > 0)
618 		  {
619 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
620 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
621 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
622 		  }
623 
624 		if (trans->state == tnfa->final
625 		    && (match_eo < 0
626 			|| match_costs[TRE_M_COST] > cost0
627 			|| (match_costs[TRE_M_COST] == cost0
628 			    && (num_tags > 0
629 				&& tmp_tags[0] <= match_tags[0]))))
630 		  {
631 		    DPRINT(("	 setting new match at %d, cost %d\n",
632 			    pos, cost0));
633 		    match_eo = pos;
634 		    memcpy(match_costs, reach_next[dest_id].costs[0],
635 			   sizeof(match_costs[0]) * TRE_M_LAST);
636 		    for (i = 0; i < num_tags; i++)
637 		      match_tags[i] = tmp_tags[i];
638 		  }
639 
640 		/* Add to the end of the deque. */
641 		*deque_end = &reach_next[dest_id];
642 		deque_end++;
643 		if (deque_end >= (ringbuffer + rb_size))
644 		  deque_end = ringbuffer;
645 		assert(deque_end != deque_start);
646 	      }
647 	    deque_start++;
648 	    if (deque_start >= (ringbuffer + rb_size))
649 	      deque_start = ringbuffer;
650 	  }
651 
652 	if (ringbuffer != static_ringbuffer)
653 	  xfree(ringbuffer);
654       }
655 
656 #ifdef TRE_DEBUG
657       tre_print_reach(tnfa, reach_next, pos, num_tags);
658 #endif /* TRE_DEBUG */
659 
660       /* Check for end of string. */
661       if (len < 0)
662 	{
663 	  if (type == STR_USER)
664 	    {
665 	      if (str_user_end)
666 		break;
667 	    }
668 	  else if (next_c == L'\0')
669 	    break;
670 	}
671       else
672 	{
673 	  if (pos >= len)
674 	    break;
675 	}
676 
677       prev_pos = pos;
678       GET_NEXT_WCHAR();
679 
680       /* Swap `reach' and `reach_next'. */
681       {
682 	tre_tnfa_approx_reach_t *tmp;
683 	tmp = reach;
684 	reach = reach_next;
685 	reach_next = tmp;
686       }
687 
688       /* Handle exact matches and substitutions. */
689       for (id = 0; id < tnfa->num_states; id++)
690 	{
691 	  tre_tnfa_transition_t *trans;
692 
693 	  if (reach[id].pos < prev_pos)
694 	    continue;  /* Not reached. */
695 	  for (trans = reach[id].state; trans->state; trans++)
696 	    {
697 	      int dest_id;
698 	      int depth;
699 	      int cost, cost0, err;
700 
701 	      if (trans->assertions
702 		  && (CHECK_ASSERTIONS(trans->assertions)
703 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
704 		{
705 		  DPRINT(("  exact,  from %d: assert failed\n", id));
706 		  continue;
707 		}
708 
709 	      depth = reach[id].depth;
710 	      dest_id = trans->state_id;
711 
712 	      cost = reach[id].costs[depth][TRE_M_COST];
713 	      cost0 = reach[id].costs[0][TRE_M_COST];
714 	      err = 0;
715 
716 	      if (trans->code_min > (tre_cint_t)prev_c
717 		  || trans->code_max < (tre_cint_t)prev_c)
718 		{
719 		  /* Handle substitutes.  The required character was not in
720 		     the string, so match it in place of whatever was supposed
721 		     to be there and increase costs accordingly. */
722 		  err = 1;
723 
724 		  /* Compute and check cost at current depth. */
725 		  cost = reach[id].costs[depth][TRE_M_COST];
726 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
727 		    cost += reach[id].params.cost_subst;
728 		  if (cost > reach[id].params.max_cost)
729 		    continue; /* Cost too large. */
730 
731 		  /* Check number of substitutes at current depth. */
732 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
733 		      > reach[id].params.max_subst)
734 		    continue; /* Too many substitutes. */
735 
736 		  /* Check total number of errors at current depth. */
737 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
738 		      > reach[id].params.max_err)
739 		    continue; /* Too many errors. */
740 
741 		  /* Compute overall cost. */
742 		  cost0 = cost;
743 		  if (depth > 0)
744 		    {
745 		      cost0 = reach[id].costs[0][TRE_M_COST];
746 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
747 			cost0 += reach[id].params.cost_subst;
748 		      else
749 			cost0 += default_params.cost_subst;
750 		    }
751 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
752 			  id, dest_id, cost0));
753 		}
754 	      else
755 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
756 			id, dest_id, cost0));
757 
758 	      /* Compute tag values after this transition. */
759 	      for (i = 0; i < num_tags; i++)
760 		tmp_tags[i] = reach[id].tags[i];
761 	      if (trans->tags)
762 		for (i = 0; trans->tags[i] >= 0; i++)
763 		  if (trans->tags[i] < num_tags)
764 		    tmp_tags[trans->tags[i]] = pos;
765 
766 	      /* If another path has also reached this state, choose the
767 		 one with the smallest cost or best tags if costs are equal. */
768 	      if (reach_next[dest_id].pos == pos
769 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
770 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
771 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
772 					    tmp_tags,
773 					    reach_next[dest_id].tags))))
774 		{
775 		  DPRINT(("lose\n"));
776 		  continue;
777 		}
778 	      DPRINT(("win %d %d\n",
779 		      reach_next[dest_id].pos,
780 		      reach_next[dest_id].costs[0][TRE_M_COST]));
781 
782 	      /* Set state, position, tags, and depth. */
783 	      reach_next[dest_id].state = trans->state;
784 	      reach_next[dest_id].pos = pos;
785 	      for (i = 0; i < num_tags; i++)
786 		reach_next[dest_id].tags[i] = tmp_tags[i];
787 	      reach_next[dest_id].depth = reach[id].depth;
788 
789 	      /* Set parameters. */
790 	      reach_next[dest_id].params = reach[id].params;
791 	      if (trans->params)
792 		tre_set_params(&reach_next[dest_id], trans->params,
793 			       default_params);
794 
795 	      /* Set the costs after this transition. */
796 	      memcpy(&reach_next[dest_id].costs,
797 		     reach[id].costs,
798 		     sizeof(reach[id].costs[0][0])
799 		     * TRE_M_LAST * (depth + 1));
800 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
801 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
802 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
803 	      if (depth > 0)
804 		{
805 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
806 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
807 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
808 		}
809 
810 	      if (trans->state == tnfa->final
811 		  && (match_eo < 0
812 		      || cost0 < match_costs[TRE_M_COST]
813 		      || (cost0 == match_costs[TRE_M_COST]
814 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
815 		{
816 		  DPRINT(("    setting new match at %d, cost %d\n",
817 			  pos, cost0));
818 		  match_eo = pos;
819 		  for (i = 0; i < TRE_M_LAST; i++)
820 		    match_costs[i] = reach_next[dest_id].costs[0][i];
821 		  for (i = 0; i < num_tags; i++)
822 		    match_tags[i] = tmp_tags[i];
823 		}
824 	    }
825 	}
826     }
827 
828   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
829 	  match_costs[TRE_M_COST]));
830 
831 #ifndef TRE_USE_ALLOCA
832   if (buf)
833     xfree(buf);
834 #endif /* !TRE_USE_ALLOCA */
835 
836   match->cost = match_costs[TRE_M_COST];
837   match->num_ins = match_costs[TRE_M_NUM_INS];
838   match->num_del = match_costs[TRE_M_NUM_DEL];
839   match->num_subst = match_costs[TRE_M_NUM_SUBST];
840   *match_end_ofs = match_eo;
841 
842   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
843 }
844