1 /*
2   tre-match-approx.c - TRE approximate regex matching engine
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 
13 /* AIX requires this to be the first thing in the file.	 */
14 #ifdef TRE_USE_ALLOCA
15 #ifndef __GNUC__
16 # if HAVE_ALLOCA_H
17 #  include <alloca.h>
18 # else
19 #  ifdef _AIX
20  #pragma alloca
21 #  else
22 #   ifndef alloca /* predefined by HP cc +Olibcalls */
23 char *alloca ();
24 #   endif
25 #  endif
26 # endif
27 #endif
28 #endif /* TRE_USE_ALLOCA */
29 
30 #define __USE_STRING_INLINES
31 #undef __NO_INLINE__
32 
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <limits.h>
37 #ifdef HAVE_WCHAR_H
38 #include <wchar.h>
39 #endif /* HAVE_WCHAR_H */
40 #ifdef HAVE_WCTYPE_H
41 #include <wctype.h>
42 #endif /* HAVE_WCTYPE_H */
43 #ifndef TRE_WCHAR
44 #include <ctype.h>
45 #endif /* !TRE_WCHAR */
46 #ifdef HAVE_MALLOC_H
47 #include <malloc.h>
48 #endif /* HAVE_MALLOC_H */
49 
50 #include "tre-internal.h"
51 #include "tre-match-utils.h"
52 #include "tre.h"
53 #include "xmalloc.h"
54 
55 #define TRE_M_COST	0
56 #define TRE_M_NUM_INS	1
57 #define TRE_M_NUM_DEL	2
58 #define TRE_M_NUM_SUBST 3
59 #define TRE_M_NUM_ERR	4
60 #define TRE_M_LAST	5
61 
62 #define TRE_M_MAX_DEPTH 3
63 
64 typedef struct {
65   /* State in the TNFA transition table. */
66   tre_tnfa_transition_t *state;
67   /* Position in input string. */
68   int pos;
69   /* Tag values. */
70   int *tags;
71   /* Matching parameters. */
72   regaparams_t params;
73   /* Nesting depth of parameters.  This is used as an index in
74      the `costs' array. */
75   int depth;
76   /* Costs and counter values for different parameter nesting depths. */
77   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
78 } tre_tnfa_approx_reach_t;
79 
80 
81 #ifdef TRE_DEBUG
82 /* Prints the `reach' array in a readable fashion with DPRINT. */
83 static void
84 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
85 		int pos, int num_tags)
86 {
87   int id;
88 
89   /* Print each state on one line. */
90   DPRINT(("  reach:\n"));
91   for (id = 0; id < tnfa->num_states; id++)
92     {
93       int i, j;
94       if (reach[id].pos < pos)
95 	continue;  /* Not reached. */
96       DPRINT(("	 %03d, costs ", id));
97       for (i = 0; i <= reach[id].depth; i++)
98 	{
99 	  DPRINT(("["));
100 	  for (j = 0; j < TRE_M_LAST; j++)
101 	    {
102 	      DPRINT(("%2d", reach[id].costs[i][j]));
103 	      if (j + 1 < TRE_M_LAST)
104 		DPRINT((","));
105 	    }
106 	  DPRINT(("]"));
107 	  if (i + 1 <= reach[id].depth)
108 	    DPRINT((", "));
109 	}
110       DPRINT(("\n	tags "));
111       for (i = 0; i < num_tags; i++)
112 	{
113 	  DPRINT(("%02d", reach[id].tags[i]));
114 	  if (i + 1 < num_tags)
115 	    DPRINT((","));
116 	}
117       DPRINT(("\n"));
118     }
119   DPRINT(("\n"));
120 }
121 #endif /* TRE_DEBUG */
122 
123 
124 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
125    array.  If `pa' specifies default values, they are taken from
126    `default_params'. */
127 inline static void
128 tre_set_params(tre_tnfa_approx_reach_t *reach,
129 	       int *pa, regaparams_t default_params)
130 {
131   int value;
132 
133   /* If depth is increased reset costs and counters to zero for the
134      new levels. */
135   value = pa[TRE_PARAM_DEPTH];
136   assert(value <= TRE_M_MAX_DEPTH);
137   if (value > reach->depth)
138     {
139       int i, j;
140       for (i = reach->depth + 1; i <= value; i++)
141 	for (j = 0; j < TRE_M_LAST; j++)
142 	  reach->costs[i][j] = 0;
143     }
144   reach->depth = value;
145 
146   /* Set insert cost. */
147   value = pa[TRE_PARAM_COST_INS];
148   if (value == TRE_PARAM_DEFAULT)
149     reach->params.cost_ins = default_params.cost_ins;
150   else if (value != TRE_PARAM_UNSET)
151     reach->params.cost_ins = value;
152 
153   /* Set delete cost. */
154   value = pa[TRE_PARAM_COST_DEL];
155   if (value == TRE_PARAM_DEFAULT)
156     reach->params.cost_del = default_params.cost_del;
157   else if (value != TRE_PARAM_UNSET)
158     reach->params.cost_del = value;
159 
160   /* Set substitute cost. */
161   value = pa[TRE_PARAM_COST_SUBST];
162   if (value == TRE_PARAM_DEFAULT)
163     reach->params.cost_subst = default_params.cost_subst;
164   else
165     reach->params.cost_subst = value;
166 
167   /* Set maximum cost. */
168   value = pa[TRE_PARAM_COST_MAX];
169   if (value == TRE_PARAM_DEFAULT)
170     reach->params.max_cost = default_params.max_cost;
171   else if (value != TRE_PARAM_UNSET)
172     reach->params.max_cost = value;
173 
174   /* Set maximum inserts. */
175   value = pa[TRE_PARAM_MAX_INS];
176   if (value == TRE_PARAM_DEFAULT)
177     reach->params.max_ins = default_params.max_ins;
178   else if (value != TRE_PARAM_UNSET)
179     reach->params.max_ins = value;
180 
181   /* Set maximum deletes. */
182   value = pa[TRE_PARAM_MAX_DEL];
183   if (value == TRE_PARAM_DEFAULT)
184     reach->params.max_del = default_params.max_del;
185   else if (value != TRE_PARAM_UNSET)
186     reach->params.max_del = value;
187 
188   /* Set maximum substitutes. */
189   value = pa[TRE_PARAM_MAX_SUBST];
190   if (value == TRE_PARAM_DEFAULT)
191     reach->params.max_subst = default_params.max_subst;
192   else if (value != TRE_PARAM_UNSET)
193     reach->params.max_subst = value;
194 
195   /* Set maximum number of errors. */
196   value = pa[TRE_PARAM_MAX_ERR];
197   if (value == TRE_PARAM_DEFAULT)
198     reach->params.max_err = default_params.max_err;
199   else if (value != TRE_PARAM_UNSET)
200     reach->params.max_err = value;
201 }
202 
203 reg_errcode_t
204 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
205 		    tre_str_type_t type, int *match_tags,
206 		    regamatch_t *match, regaparams_t default_params,
207 		    int eflags, int *match_end_ofs)
208 {
209   /* State variables required by GET_NEXT_WCHAR. */
210   tre_char_t prev_c = 0, next_c = 0;
211   const char *str_byte = string;
212   int pos = -1;
213   unsigned int pos_add_next = 1;
214 #ifdef TRE_WCHAR
215   const wchar_t *str_wide = string;
216 #ifdef TRE_MBSTATE
217   mbstate_t mbstate;
218 #endif /* !TRE_WCHAR */
219 #endif /* TRE_WCHAR */
220   int reg_notbol = eflags & REG_NOTBOL;
221   int reg_noteol = eflags & REG_NOTEOL;
222   int reg_newline = tnfa->cflags & REG_NEWLINE;
223   int str_user_end = 0;
224 
225   int prev_pos;
226 
227   /* Number of tags. */
228   int num_tags;
229   /* The reach tables. */
230   tre_tnfa_approx_reach_t *reach, *reach_next;
231   /* Tag array for temporary use. */
232   int *tmp_tags;
233 
234   /* End offset of best match so far, or -1 if no match found yet. */
235   int match_eo = -1;
236   /* Costs of the match. */
237   int match_costs[TRE_M_LAST];
238 
239   /* Space for temporary data required for matching. */
240   unsigned char *buf;
241 
242   int i, id;
243 
244   if (!match_tags)
245     num_tags = 0;
246   else
247     num_tags = tnfa->num_tags;
248 
249 #ifdef TRE_MBSTATE
250   memset(&mbstate, '\0', sizeof(mbstate));
251 #endif /* TRE_MBSTATE */
252 
253   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
254 	  "match_tags %p\n",
255 	  type, len, eflags,
256 	  match_tags));
257   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
258 	  default_params.max_cost,
259 	  default_params.cost_ins,
260 	  default_params.cost_del,
261 	  default_params.cost_subst));
262 
263   /* Allocate memory for temporary data required for matching.	This needs to
264      be done for every matching operation to be thread safe.  This allocates
265      everything in a single large block from the stack frame using alloca()
266      or with malloc() if alloca is unavailable. */
267   {
268     unsigned char *buf_cursor;
269     /* Space needed for one array of tags. */
270     int tag_bytes = sizeof(*tmp_tags) * num_tags;
271     /* Space needed for one reach table. */
272     int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
273     /* Total space needed. */
274     int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
275     /* Add some extra to make sure we can align the pointers.  The multiplier
276        used here must be equal to the number of ALIGN calls below. */
277     total_bytes += (sizeof(long) - 1) * 3;
278 
279     /* Allocate the memory. */
280 #ifdef TRE_USE_ALLOCA
281     buf = alloca(total_bytes);
282 #else /* !TRE_USE_ALLOCA */
283     buf = xmalloc((unsigned)total_bytes);
284 #endif /* !TRE_USE_ALLOCA */
285     if (!buf)
286       return REG_ESPACE;
287     memset(buf, 0, (size_t)total_bytes);
288 
289     /* Allocate `tmp_tags' from `buf'. */
290     tmp_tags = (void *)buf;
291     buf_cursor = buf + tag_bytes;
292     buf_cursor += ALIGN(buf_cursor, long);
293 
294     /* Allocate `reach' from `buf'. */
295     reach = (void *)buf_cursor;
296     buf_cursor += reach_bytes;
297     buf_cursor += ALIGN(buf_cursor, long);
298 
299     /* Allocate `reach_next' from `buf'. */
300     reach_next = (void *)buf_cursor;
301     buf_cursor += reach_bytes;
302     buf_cursor += ALIGN(buf_cursor, long);
303 
304     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
305     for (i = 0; i < tnfa->num_states; i++)
306       {
307 	reach[i].tags = (void *)buf_cursor;
308 	buf_cursor += tag_bytes;
309 	reach_next[i].tags = (void *)buf_cursor;
310 	buf_cursor += tag_bytes;
311       }
312     assert(buf_cursor <= buf + total_bytes);
313   }
314 
315   for (i = 0; i < TRE_M_LAST; i++)
316     match_costs[i] = INT_MAX;
317 
318   /* Mark the reach arrays empty. */
319   for (i = 0; i < tnfa->num_states; i++)
320     reach[i].pos = reach_next[i].pos = -2;
321 
322   prev_pos = pos;
323   GET_NEXT_WCHAR();
324   pos = 0;
325 
326   while (/*CONSTCOND*/1)
327     {
328       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
329 
330       /* Add initial states to `reach_next' if an exact match has not yet
331 	 been found. */
332       if (match_costs[TRE_M_COST] > 0)
333 	{
334 	  tre_tnfa_transition_t *trans;
335 	  DPRINT(("  init"));
336 	  for (trans = tnfa->initial; trans->state; trans++)
337 	    {
338 	      int stateid = trans->state_id;
339 
340 	      /* If this state is not currently in `reach_next', add it
341 		 there. */
342 	      if (reach_next[stateid].pos < pos)
343 		{
344 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
345 		    {
346 		      /* Assertions failed, don't add this state. */
347 		      DPRINT((" !%d (assert)", stateid));
348 		      continue;
349 		    }
350 		  DPRINT((" %d", stateid));
351 		  reach_next[stateid].state = trans->state;
352 		  reach_next[stateid].pos = pos;
353 
354 		  /* Compute tag values after this transition. */
355 		  for (i = 0; i < num_tags; i++)
356 		    reach_next[stateid].tags[i] = -1;
357 
358 		  if (trans->tags)
359 		    for (i = 0; trans->tags[i] >= 0; i++)
360 		      if (trans->tags[i] < num_tags)
361 			reach_next[stateid].tags[trans->tags[i]] = pos;
362 
363 		  /* Set the parameters, depth, and costs. */
364 		  reach_next[stateid].params = default_params;
365 		  reach_next[stateid].depth = 0;
366 		  for (i = 0; i < TRE_M_LAST; i++)
367 		    reach_next[stateid].costs[0][i] = 0;
368 		  if (trans->params)
369 		    tre_set_params(&reach_next[stateid], trans->params,
370 				   default_params);
371 
372 		  /* If this is the final state, mark the exact match. */
373 		  if (trans->state == tnfa->final)
374 		    {
375 		      match_eo = pos;
376 		      for (i = 0; i < num_tags; i++)
377 			match_tags[i] = reach_next[stateid].tags[i];
378 		      for (i = 0; i < TRE_M_LAST; i++)
379 			match_costs[i] = 0;
380 		    }
381 		}
382 	    }
383 	    DPRINT(("\n"));
384 	}
385 
386 
387       /* Handle inserts.  This is done by pretending there's an epsilon
388 	 transition from each state in `reach' back to the same state.
389 	 We don't need to worry about the final state here; this will never
390 	 give a better match than what we already have. */
391       for (id = 0; id < tnfa->num_states; id++)
392 	{
393 	  int depth;
394 	  int cost, cost0;
395 
396 	  if (reach[id].pos != prev_pos)
397 	    {
398 	      DPRINT(("	 insert: %d not reached\n", id));
399 	      continue;	 /* Not reached. */
400 	    }
401 
402 	  depth = reach[id].depth;
403 
404 	  /* Compute and check cost at current depth. */
405 	  cost = reach[id].costs[depth][TRE_M_COST];
406 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
407 	    cost += reach[id].params.cost_ins;
408 	  if (cost > reach[id].params.max_cost)
409 	    continue;  /* Cost too large. */
410 
411 	  /* Check number of inserts at current depth. */
412 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
413 	      > reach[id].params.max_ins)
414 	    continue;  /* Too many inserts. */
415 
416 	  /* Check total number of errors at current depth. */
417 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
418 	      > reach[id].params.max_err)
419 	    continue;  /* Too many errors. */
420 
421 	  /* Compute overall cost. */
422 	  cost0 = cost;
423 	  if (depth > 0)
424 	    {
425 	      cost0 = reach[id].costs[0][TRE_M_COST];
426 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
427 		cost0 += reach[id].params.cost_ins;
428 	      else
429 		cost0 += default_params.cost_ins;
430 	    }
431 
432 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
433 		  reach[id].costs[depth][TRE_M_COST]));
434 	  if (reach_next[id].pos == pos
435 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
436 	    {
437 	      DPRINT(("lose\n"));
438 	      continue;
439 	    }
440 	  DPRINT(("win\n"));
441 
442 	  /* Copy state, position, tags, parameters, and depth. */
443 	  reach_next[id].state = reach[id].state;
444 	  reach_next[id].pos = pos;
445 	  for (i = 0; i < num_tags; i++)
446 	    reach_next[id].tags[i] = reach[id].tags[i];
447 	  reach_next[id].params = reach[id].params;
448 	  reach_next[id].depth = reach[id].depth;
449 
450 	  /* Set the costs after this transition. */
451 	  memcpy(reach_next[id].costs, reach[id].costs,
452 		 sizeof(reach_next[id].costs[0][0])
453 		 * TRE_M_LAST * (depth + 1));
454 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
455 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
456 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
457 	  if (depth > 0)
458 	    {
459 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
460 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
461 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
462 	    }
463 
464 	}
465 
466 
467       /* Handle deletes.  This is done by traversing through the whole TNFA
468 	 pretending that all transitions are epsilon transitions, until
469 	 no more states can be reached with better costs. */
470       {
471 	/* XXX - dynamic ringbuffer size */
472 	tre_tnfa_approx_reach_t *ringbuffer[512];
473 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
474 
475 	deque_start = deque_end = ringbuffer;
476 
477 	/* Add all states in `reach_next' to the deque. */
478 	for (id = 0; id < tnfa->num_states; id++)
479 	  {
480 	    if (reach_next[id].pos != pos)
481 	      continue;
482 	    *deque_end = &reach_next[id];
483 	    deque_end++;
484 	    assert(deque_end != deque_start);
485 	  }
486 
487 	/* Repeat until the deque is empty. */
488 	while (deque_end != deque_start)
489 	  {
490 	    tre_tnfa_approx_reach_t *reach_p;
491 	    int depth;
492 	    int cost, cost0;
493 	    tre_tnfa_transition_t *trans;
494 
495 	    /* Pop the first item off the deque. */
496 	    reach_p = *deque_start;
497 	    id = reach_p - reach_next;
498 	    depth = reach_p->depth;
499 
500 	    /* Compute cost at current depth. */
501 	    cost = reach_p->costs[depth][TRE_M_COST];
502 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
503 	      cost += reach_p->params.cost_del;
504 
505 	    /* Check cost, number of deletes, and total number of errors
506 	       at current depth. */
507 	    if (cost > reach_p->params.max_cost
508 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
509 		    > reach_p->params.max_del)
510 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
511 		    > reach_p->params.max_err))
512 	      {
513 		/* Too many errors or cost too large. */
514 		DPRINT(("  delete: from %03d: cost too large\n", id));
515 		deque_start++;
516 		if (deque_start >= (ringbuffer + 512))
517 		  deque_start = ringbuffer;
518 		continue;
519 	      }
520 
521 	    /* Compute overall cost. */
522 	    cost0 = cost;
523 	    if (depth > 0)
524 	      {
525 		cost0 = reach_p->costs[0][TRE_M_COST];
526 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
527 		  cost0 += reach_p->params.cost_del;
528 		else
529 		  cost0 += default_params.cost_del;
530 	      }
531 
532 	    for (trans = reach_p->state; trans->state; trans++)
533 	      {
534 		int dest_id = trans->state_id;
535 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
536 			id, dest_id, cost0, reach_p->params.max_cost));
537 
538 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
539 		  {
540 		    DPRINT(("assertion failed\n"));
541 		    continue;
542 		  }
543 
544 		/* Compute tag values after this transition. */
545 		for (i = 0; i < num_tags; i++)
546 		  tmp_tags[i] = reach_p->tags[i];
547 		if (trans->tags)
548 		  for (i = 0; trans->tags[i] >= 0; i++)
549 		    if (trans->tags[i] < num_tags)
550 		      tmp_tags[trans->tags[i]] = pos;
551 
552 		/* If another path has also reached this state, choose the one
553 		   with the smallest cost or best tags if costs are equal. */
554 		if (reach_next[dest_id].pos == pos
555 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
556 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
557 			    && (!match_tags
558 				|| !tre_tag_order(num_tags,
559 						  tnfa->tag_directions,
560 						  tmp_tags,
561 						  reach_next[dest_id].tags)))))
562 		  {
563 		    DPRINT(("lose, cost0 %d, have %d\n",
564 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
565 		    continue;
566 		  }
567 		DPRINT(("win\n"));
568 
569 		/* Set state, position, tags, parameters, depth, and costs. */
570 		reach_next[dest_id].state = trans->state;
571 		reach_next[dest_id].pos = pos;
572 		for (i = 0; i < num_tags; i++)
573 		  reach_next[dest_id].tags[i] = tmp_tags[i];
574 
575 		reach_next[dest_id].params = reach_p->params;
576 		if (trans->params)
577 		  tre_set_params(&reach_next[dest_id], trans->params,
578 				 default_params);
579 
580 		reach_next[dest_id].depth = reach_p->depth;
581 		memcpy(&reach_next[dest_id].costs,
582 		       reach_p->costs,
583 		       sizeof(reach_p->costs[0][0])
584 		       * TRE_M_LAST * (depth + 1));
585 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
586 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
587 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
588 		if (depth > 0)
589 		  {
590 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
591 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
592 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
593 		  }
594 
595 		if (trans->state == tnfa->final
596 		    && (match_eo < 0
597 			|| match_costs[TRE_M_COST] > cost0
598 			|| (match_costs[TRE_M_COST] == cost0
599 			    && (num_tags > 0
600 				&& tmp_tags[0] <= match_tags[0]))))
601 		  {
602 		    DPRINT(("	 setting new match at %d, cost %d\n",
603 			    pos, cost0));
604 		    match_eo = pos;
605 		    memcpy(match_costs, reach_next[dest_id].costs[0],
606 			   sizeof(match_costs[0]) * TRE_M_LAST);
607 		    for (i = 0; i < num_tags; i++)
608 		      match_tags[i] = tmp_tags[i];
609 		  }
610 
611 		/* Add to the end of the deque. */
612 		*deque_end = &reach_next[dest_id];
613 		deque_end++;
614 		if (deque_end >= (ringbuffer + 512))
615 		  deque_end = ringbuffer;
616 		assert(deque_end != deque_start);
617 	      }
618 	    deque_start++;
619 	    if (deque_start >= (ringbuffer + 512))
620 	      deque_start = ringbuffer;
621 	  }
622 
623       }
624 
625 #ifdef TRE_DEBUG
626       tre_print_reach(tnfa, reach_next, pos, num_tags);
627 #endif /* TRE_DEBUG */
628 
629       /* Check for end of string. */
630       if (len < 0)
631 	{
632 	  if (type == STR_USER)
633 	    {
634 	      if (str_user_end)
635 		break;
636 	    }
637 	  else if (next_c == L'\0')
638 	    break;
639 	}
640       else
641 	{
642 	  if (pos >= len)
643 	    break;
644 	}
645 
646       prev_pos = pos;
647       GET_NEXT_WCHAR();
648 
649       /* Swap `reach' and `reach_next'. */
650       {
651 	tre_tnfa_approx_reach_t *tmp;
652 	tmp = reach;
653 	reach = reach_next;
654 	reach_next = tmp;
655       }
656 
657       /* Handle exact matches and substitutions. */
658       for (id = 0; id < tnfa->num_states; id++)
659 	{
660 	  tre_tnfa_transition_t *trans;
661 
662 	  if (reach[id].pos < prev_pos)
663 	    continue;  /* Not reached. */
664 	  for (trans = reach[id].state; trans->state; trans++)
665 	    {
666 	      int dest_id;
667 	      int depth;
668 	      int cost, cost0, err;
669 
670 	      if (trans->assertions
671 		  && (CHECK_ASSERTIONS(trans->assertions)
672 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
673 		{
674 		  DPRINT(("  exact,  from %d: assert failed\n", id));
675 		  continue;
676 		}
677 
678 	      depth = reach[id].depth;
679 	      dest_id = trans->state_id;
680 
681 	      cost = reach[id].costs[depth][TRE_M_COST];
682 	      cost0 = reach[id].costs[0][TRE_M_COST];
683 	      err = 0;
684 
685 	      if (trans->code_min > (tre_cint_t)prev_c
686 		  || trans->code_max < (tre_cint_t)prev_c)
687 		{
688 		  /* Handle substitutes.  The required character was not in
689 		     the string, so match it in place of whatever was supposed
690 		     to be there and increase costs accordingly. */
691 		  err = 1;
692 
693 		  /* Compute and check cost at current depth. */
694 		  cost = reach[id].costs[depth][TRE_M_COST];
695 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
696 		    cost += reach[id].params.cost_subst;
697 		  if (cost > reach[id].params.max_cost)
698 		    continue; /* Cost too large. */
699 
700 		  /* Check number of substitutes at current depth. */
701 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
702 		      > reach[id].params.max_subst)
703 		    continue; /* Too many substitutes. */
704 
705 		  /* Check total number of errors at current depth. */
706 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
707 		      > reach[id].params.max_err)
708 		    continue; /* Too many errors. */
709 
710 		  /* Compute overall cost. */
711 		  cost0 = cost;
712 		  if (depth > 0)
713 		    {
714 		      cost0 = reach[id].costs[0][TRE_M_COST];
715 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
716 			cost0 += reach[id].params.cost_subst;
717 		      else
718 			cost0 += default_params.cost_subst;
719 		    }
720 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
721 			  id, dest_id, cost0));
722 		}
723 	      else
724 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
725 			id, dest_id, cost0));
726 
727 	      /* Compute tag values after this transition. */
728 	      for (i = 0; i < num_tags; i++)
729 		tmp_tags[i] = reach[id].tags[i];
730 	      if (trans->tags)
731 		for (i = 0; trans->tags[i] >= 0; i++)
732 		  if (trans->tags[i] < num_tags)
733 		    tmp_tags[trans->tags[i]] = pos;
734 
735 	      /* If another path has also reached this state, choose the
736 		 one with the smallest cost or best tags if costs are equal. */
737 	      if (reach_next[dest_id].pos == pos
738 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
739 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
740 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
741 					    tmp_tags,
742 					    reach_next[dest_id].tags))))
743 		{
744 		  DPRINT(("lose\n"));
745 		  continue;
746 		}
747 	      DPRINT(("win %d %d\n",
748 		      reach_next[dest_id].pos,
749 		      reach_next[dest_id].costs[0][TRE_M_COST]));
750 
751 	      /* Set state, position, tags, and depth. */
752 	      reach_next[dest_id].state = trans->state;
753 	      reach_next[dest_id].pos = pos;
754 	      for (i = 0; i < num_tags; i++)
755 		reach_next[dest_id].tags[i] = tmp_tags[i];
756 	      reach_next[dest_id].depth = reach[id].depth;
757 
758 	      /* Set parameters. */
759 	      reach_next[dest_id].params = reach[id].params;
760 	      if (trans->params)
761 		tre_set_params(&reach_next[dest_id], trans->params,
762 			       default_params);
763 
764 	      /* Set the costs after this transition. */
765 		memcpy(&reach_next[dest_id].costs,
766 		       reach[id].costs,
767 		       sizeof(reach[id].costs[0][0])
768 		       * TRE_M_LAST * (depth + 1));
769 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
770 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
771 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
772 	      if (depth > 0)
773 		{
774 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
775 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
776 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
777 		}
778 
779 	      if (trans->state == tnfa->final
780 		  && (match_eo < 0
781 		      || cost0 < match_costs[TRE_M_COST]
782 		      || (cost0 == match_costs[TRE_M_COST]
783 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
784 		{
785 		  DPRINT(("    setting new match at %d, cost %d\n",
786 			  pos, cost0));
787 		  match_eo = pos;
788 		  for (i = 0; i < TRE_M_LAST; i++)
789 		    match_costs[i] = reach_next[dest_id].costs[0][i];
790 		  for (i = 0; i < num_tags; i++)
791 		    match_tags[i] = tmp_tags[i];
792 		}
793 	    }
794 	}
795     }
796 
797   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
798 	  match_costs[TRE_M_COST]));
799 
800 #ifndef TRE_USE_ALLOCA
801   if (buf)
802     xfree(buf);
803 #endif /* !TRE_USE_ALLOCA */
804 
805   match->cost = match_costs[TRE_M_COST];
806   match->num_ins = match_costs[TRE_M_NUM_INS];
807   match->num_del = match_costs[TRE_M_NUM_DEL];
808   match->num_subst = match_costs[TRE_M_NUM_SUBST];
809   *match_end_ofs = match_eo;
810 
811   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
812 }
813