xref: /dragonfly/contrib/tre/lib/tre-compile.c (revision 0ca59c34)
1 /*
2   tre-compile.c - TRE regex compiler
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 /*
10   TODO:
11    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12      function calls.
13 */
14 
15 
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22 #include <limits.h>
23 
24 #include "tre-internal.h"
25 #include "tre-mem.h"
26 #include "tre-stack.h"
27 #include "tre-ast.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
30 #include "tre.h"
31 #include "xmalloc.h"
32 
33 /*
34   The bit_ffs() macro in bitstring.h is flawed.  Replace it with a working one.
35 */
36 #undef bit_ffs
37 #define	bit_ffs(name, nbits, value) { \
38 	register bitstr_t *_name = name; \
39 	register int _byte, _nbits = nbits; \
40 	register int _stopbyte = _bit_byte(_nbits), _value = -1; \
41 	for (_byte = 0; _byte <= _stopbyte; ++_byte) \
42 		if (_name[_byte]) { \
43 			_value = _byte << 3; \
44 			for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
45 			    ++_value, _stopbyte >>= 1); \
46 			break; \
47 		} \
48 	*(value) = _value; \
49 }
50 
51 /*
52   Algorithms to setup tags so that submatch addressing can be done.
53 */
54 
55 
56 #ifdef TRE_DEBUG
57 static const char *tag_dir_str[] = {
58   "minimize",
59   "maximize",
60   "left-maximize"
61 };
62 
63 static const char _indent[] = "  ";
64 
65 static void
66 print_indent(int indent)
67 {
68   while (indent-- > 0)
69     DPRINT((_indent));
70 }
71 
72 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
73 				   int num_tags);
74 static void
75 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
76 			    int num_tags)
77 {
78   tre_last_matched_pre_t *u = branch->last_matched;
79   int n_last_matched = 0;
80 
81   while (u)
82     {
83       n_last_matched++;
84       u = u->next;
85     }
86 
87   print_indent(indent);
88   DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
89 	  branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
90   print_indent(indent);
91   DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
92 	  n_last_matched));
93   if (branch->n_last_matched != n_last_matched)
94     DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
95   if (branch->cmp_tag > 0)
96     {
97       int i;
98       const char *sep = " tags=";
99       print_indent(indent);
100       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
101       for (i = 0; i < num_tags; i++)
102 	if (bit_test(branch->tags, i))
103 	  {
104 	    DPRINT(("%s%d", sep, i));
105 	    sep = ",";
106 	  }
107       DPRINT(("\n"));
108     }
109 
110   u = branch->last_matched;
111   indent++;
112   while (u)
113     {
114       print_last_matched_pre(u, indent, num_tags);
115       u = u->next;
116     }
117 }
118 
119 static void
120 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
121 {
122   tre_last_matched_branch_pre_t *b = lm->branches;
123   int n_branches = 0;
124 
125   while (b)
126     {
127       n_branches++;
128       b = b->next;
129     }
130 
131   print_indent(indent);
132   DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
133 	  lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
134   print_indent(indent);
135   DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
136 	  lm->n_branches, n_branches));
137   if (lm->n_branches != n_branches)
138     DPRINT(("*** mismatch between n and branches ***\n"));
139 
140   b = lm->branches;
141   indent++;
142   while (b)
143     {
144       print_last_match_branch_pre(b, indent, num_tags);
145       b = b->next;
146     }
147 }
148 
149 static void print_last_matched(tre_last_matched_t *lm, int indent);
150 static void
151 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
152 {
153   tre_last_matched_t *u;
154   int i;
155 
156   print_indent(indent);
157   DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
158   if (branch->cmp_tag > 0)
159     {
160       print_indent(indent);
161       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
162       if (branch->n_tags > 0)
163 	{
164 	  const char *sep = " tags=";
165 	  for (i = 0; i < branch->n_tags; i++)
166 	    {
167 	      DPRINT(("%s%d", sep, branch->tags[i]));
168 	      sep = ",";
169 	    }
170 	}
171       DPRINT(("\n"));
172     }
173 
174   u = branch->last_matched;
175   indent++;
176   for (i = branch->n_last_matched; i > 0; i--, u++)
177     print_last_matched(u, indent);
178 }
179 
180 static void
181 print_last_matched(tre_last_matched_t *lm, int indent)
182 {
183   int i;
184   tre_last_matched_branch_t *b;
185 
186   print_indent(indent);
187   DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
188 	  lm->start_tag));
189 
190   b = lm->branches;
191   indent++;
192   for (i = lm->n_branches; i > 0; i--, b++)
193     print_last_match_branch(b, indent);
194 }
195 #endif /* TRE_DEBUG */
196 
197 
198 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
199    one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
200    create an unset tre_last_matched_branch_pre_t. */
201 static reg_errcode_t
202 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
203 		   int tag_id, int num_tags)
204 {
205   tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
206   tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
207 
208   if (db)
209     {
210       if (sb)
211 	{
212 	  bitstr_t *l = db->tags;
213 	  bitstr_t *r = sb->tags;
214 	  int i = bitstr_size(num_tags);
215 
216 	  while(i-- > 0)
217 	    *l++ |= *r++;
218 	  /* db and sb are the info from two parallel sub-trees, so the tags
219 	     must be mutually exclusive, and we can just add their numbers */
220 	  db->n_tags += sb->n_tags;
221 	  db->tot_tags += sb->tot_tags;
222 	  if (db->last_matched)
223 	    {
224 	      if (sb->last_matched)
225 		{
226 		  tre_last_matched_pre_t *u = db->last_matched;
227 
228 		  while(u->next)
229 		    u = u->next;
230 		  u->next = sb->last_matched;
231 		  db->n_last_matched += sb->n_last_matched;
232 		  db->tot_branches += sb->tot_branches;
233 		  db->tot_last_matched += sb->tot_last_matched;
234 		}
235 	    }
236 	    else if (sb->last_matched)
237 	      {
238 		db->last_matched = sb->last_matched;
239 		db->n_last_matched = sb->n_last_matched;
240 		db->tot_branches = sb->tot_branches;
241 		db->tot_last_matched = sb->tot_last_matched;
242 	      }
243 	}
244     }
245   else
246     db = sb;
247 
248   if (tag_id != 0)
249     {
250       if (!db)
251 	{
252 	  db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
253 			      + bitstr_size(num_tags));
254 	  if (db == NULL)
255 	    return REG_ESPACE;
256 	  db->tot_branches = 1;
257 	}
258       if (tag_id > 0)
259 	{
260 	  /* tag_id is a new tag, and shouldn't exist in db's tags,
261 	     so we can always increment n_tags */
262 	  bit_set(db->tags, tag_id);
263 	  db->n_tags++;
264 	  db->tot_tags++;
265 	}
266     }
267   dst->last_matched_branch = db;
268   return REG_OK;
269 }
270 
271 
272 /* Inserts a catenation node to the root of the tree given in `node'.
273    As the left child a new tag with number `tag_id' to `node' is added,
274    and the right child is the old root. */
275 static reg_errcode_t
276 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
277 {
278   tre_catenation_t *c;
279 
280   DPRINT(("add_tag_left: tag %d\n", tag_id));
281 
282   c = tre_mem_alloc(mem, sizeof(*c));
283   if (c == NULL)
284     return REG_ESPACE;
285   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
286   if (c->left == NULL)
287     return REG_ESPACE;
288   c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
289   if (c->right == NULL)
290     return REG_ESPACE;
291 
292   c->right->obj = node->obj;
293   c->right->type = node->type;
294   c->right->last_matched_branch = node->last_matched_branch;
295   c->right->nullable = -1;
296   c->right->submatch_id = -1;
297   node->obj = c;
298   node->type = CATENATION;
299   node->original = c->right;
300   return REG_OK;
301 }
302 
303 /* Inserts a catenation node to the root of the tree given in `node'.
304    As the right child a new tag with number `tag_id' to `node' is added,
305    and the left child is the old root. */
306 static reg_errcode_t
307 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
308 {
309   tre_catenation_t *c;
310 
311   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
312 
313   c = tre_mem_alloc(mem, sizeof(*c));
314   if (c == NULL)
315     return REG_ESPACE;
316   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
317   if (c->right == NULL)
318     return REG_ESPACE;
319   c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
320   if (c->left == NULL)
321     return REG_ESPACE;
322 
323   c->left->obj = node->obj;
324   c->left->type = node->type;
325   c->left->last_matched_branch = node->last_matched_branch;
326   c->left->nullable = -1;
327   c->left->submatch_id = -1;
328   node->obj = c;
329   node->type = CATENATION;
330   node->original = c->left;
331   return REG_OK;
332 }
333 
334 typedef enum {
335   ADDTAGS_RECURSE,
336   ADDTAGS_RECURSE_NOT_TOP_UNION,
337   ADDTAGS_AFTER_ITERATION,
338   ADDTAGS_AFTER_UNION_LEFT,
339   ADDTAGS_AFTER_UNION_RIGHT,
340   ADDTAGS_AFTER_CAT_LEFT,
341   ADDTAGS_AFTER_CAT_RIGHT,
342   ADDTAGS_SET_SUBMATCH_END,
343   ADDTAGS_UNION_RECURSE,
344   ADDTAGS_UNION_RIGHT_RECURSE,
345   ADDTAGS_AFTER_UNION_TOP,
346 } tre_addtags_symbol_t;
347 
348 enum {
349   COPY_LAST_MATCHED_BRANCH,
350   COPY_LAST_MATCHED_BRANCH_NEXT,
351   COPY_LAST_MATCHED,
352   COPY_LAST_MATCHED_NEXT,
353 };
354 
355 
356 #define REGSET_UNSET		((unsigned)-1)
357 
358 /* Go through `regset' and set submatch data for submatches that are
359    using this tag. */
360 static void
361 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
362 {
363   int i;
364 
365   for (i = 0; regset[i] != REGSET_UNSET; i++)
366     {
367       int id = regset[i] / 2;
368       int start = !(regset[i] % 2);
369       if (id >= SUBMATCH_ID_INVISIBLE_START)
370 	continue;
371       DPRINT(("  Using tag %d for %s offset of "
372 	      "submatch %d\n", tag,
373 	      start ? "start" : "end", id));
374       if (start)
375 	tnfa->submatch_data[id].so_tag = tag;
376       else
377 	tnfa->submatch_data[id].eo_tag = tag;
378     }
379   regset[0] = -1;
380 }
381 
382 
383 #define REGSET_HAS_STARTS	0x1
384 #define REGSET_HAS_ENDS		0x2
385 
386 
387 /* Adds tags to appropriate locations in the parse tree in `tree', so that
388    subexpressions marked for submatch addressing can be traced. */
389 static reg_errcode_t
390 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
391 	     tre_tnfa_t *tnfa)
392 {
393   reg_errcode_t status = REG_OK;
394   tre_addtags_symbol_t symbol;
395   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
396   int bottom = tre_stack_num_objects(stack);
397   /* True for first pass (counting number of needed tags) */
398   int first_pass = (mem == NULL || tnfa == NULL);
399   unsigned *regset, *orig_regset;
400   int regset_contains = 0;
401   int num_tags = 0; /* Total number of tags. */
402   int num_minimals = 0;	 /* Number of special minimal tags. */
403   int tag = 0;	    /* The tag that is to be added next. */
404   int next_tag = 1; /* Next tag to use after this one. */
405   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
406   int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
407 		             * the first is the tag to reorder, the second
408 		             * is the tag after which the first is reordered */
409   int *rtp;		    /* Pointer used to fill in reorder_tags and
410 			     * tag_order */
411   int *to_reorder;          /* Transform array converting sequential order to
412 		             * that specified by reorder_tags */
413   int id;
414 
415   tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
416   if (!first_pass)
417     {
418       DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
419       tnfa->end_tag = 0;
420       tnfa->minimal_tags[0] = -1;
421     }
422 
423   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
424 		   + tnfa->num_submatches_invisible + 1) * 2));
425   if (regset == NULL)
426     {
427       status = REG_ESPACE;
428       goto error_regset;
429     }
430   regset[0] = REGSET_UNSET;
431   orig_regset = regset;
432 
433   if (!first_pass)
434     {
435       /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
436        * to_reorder in one batch (assuming all are the same type) */
437       rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
438 				   ((2 * tnfa->num_reorder_tags + 1) +
439 				   tnfa->num_tags));
440       if (reorder_tags == NULL)
441 	{
442 	  status = REG_ESPACE;
443 	  goto error_reorder_tags;
444 	}
445       to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
446     }
447 
448   STACK_PUSH(stack, voidptr, node);
449   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
450 
451   while (tre_stack_num_objects(stack) > bottom)
452     {
453       if (status != REG_OK)
454 	break;
455 
456       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
457       switch (symbol)
458 	{
459 	  int top_union;
460 
461 	case ADDTAGS_SET_SUBMATCH_END:
462 	  {
463 	    int i;
464 
465 	    id = tre_stack_pop_int(stack);
466 	    node = tre_stack_pop_voidptr(stack);
467 	    /* Add end of this submatch to regset. */
468 	    for (i = 0; regset[i] != REGSET_UNSET; i++);
469 	    regset[i] = id * 2 + 1;
470 	    regset[i + 1] = -1;
471 	    regset_contains |= REGSET_HAS_ENDS;
472 
473 	    /* Always put a tag after a minimal iterator. */
474 	    if (minimal_tag >= 0)
475 	      {
476 		if (first_pass)
477 		  {
478 		    node->num_tags++;
479 		    DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
480 			    node->num_tags));
481 		  }
482 		else
483 		  {
484 		    int i;
485 		    status = tre_merge_branches(mem, node, NULL, tag,
486 						tnfa->num_tags);
487 		    if (status != REG_OK)
488 		      break;
489 		    status = tre_add_tag_right(mem, node, tag);
490 		    if (status != REG_OK)
491 		      break;
492 		    tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
493 		    DPRINT(("Setting t%d direction to %s\n", tag,
494 			    tag_dir_str[tnfa->tag_directions[tag]]));
495 		    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
496 		    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
497 		    tnfa->minimal_tags[i] = tag;
498 		    tnfa->minimal_tags[i + 1] = minimal_tag;
499 		    tnfa->minimal_tags[i + 2] = -1;
500 
501 		    DPRINT(("  Minimal end: t%d reordered to "
502 			    "after t%d\n", tag, minimal_tag));
503 		    /* Append to tag_order, move "tag" after
504 		     * "minimal_tag" */
505 		    *rtp++ = tag;
506 		    *rtp++ = minimal_tag;
507 
508 		    num_minimals++;
509 		    tre_purge_regset(regset, tnfa, tag);
510 		  }
511 
512 		minimal_tag = -1;
513 		DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
514 		regset[0] = REGSET_UNSET;
515 		regset_contains = 0;
516 		tag = next_tag;
517 		num_tags++;
518 		next_tag++;
519 	      }
520 	    break;
521 	  }
522 
523 	case ADDTAGS_RECURSE_NOT_TOP_UNION:
524 	  /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
525 	   * indicating that if a union is being processed, it is not the
526 	   * top-most of a series */
527 	  top_union = 0;
528 	  goto do_addtags_recurse;
529 
530 	case ADDTAGS_RECURSE:
531 	  /* Setting top_union to 1 means that if a union is begin processed,
532 	   * it is the top-most of a series, and should recurse through the
533 	   * series to set the left_tag and right_tag values */
534 	  top_union = 1;
535 
536 do_addtags_recurse:
537 	  node = tre_stack_pop_voidptr(stack);
538 
539 	  id = node->submatch_id;
540 	  if (id >= 0)
541 	    {
542 	      int i;
543 
544 
545 	      /* Add start of this submatch to regset. */
546 	      for (i = 0; regset[i] != REGSET_UNSET; i++);
547 	      regset[i] = id * 2;
548 	      regset[i + 1] = -1;
549 	      regset_contains |= REGSET_HAS_STARTS;
550 
551 	      /* Add end of this submatch to regset after processing this
552 		 node. */
553 	      STACK_PUSH(stack, voidptr, node);
554 	      STACK_PUSHX(stack, int, id);
555 	      STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
556 	    }
557 
558 	  switch (node->type)
559 	    {
560 	    case LITERAL:
561 	      {
562 		tre_literal_t *lit = node->obj;
563 
564 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
565 		  {
566 		    DPRINT(("Literal %d-%d\n",
567 			    (int)lit->code_min, (int)lit->code_max));
568 		    if (regset_contains)
569 		      {
570 			/* Regset is not empty, so add a tag before the
571 			   literal or backref. */
572 			if (first_pass)
573 			  {
574 			    DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
575 			    node->num_tags = 1;
576 			  }
577 			else
578 			  {
579 			    status = tre_merge_branches(mem, node, NULL, tag,
580 							tnfa->num_tags);
581 			    if (status != REG_OK)
582 			      break;
583 			    status = tre_add_tag_left(mem, node, tag);
584 			    if (status != REG_OK)
585 			      break;
586 			    if (regset_contains == REGSET_HAS_STARTS)
587 			      tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
588 			    else
589 			      tnfa->tag_directions[tag] = direction;
590 			    DPRINT(("Setting t%d direction to %s\n", tag,
591 				    tag_dir_str[tnfa->tag_directions[tag]]));
592 			    tre_purge_regset(regset, tnfa, tag);
593 
594 			    if (IS_BACKREF(lit))
595 			      {
596 				int b = lit->code_max;
597 				int t = tnfa->submatch_data[b].so_tag;
598 				/* Fail if the referenced submatch hasn't been
599 				 * completed yet */
600 				if (tnfa->submatch_data[b].eo_tag < 0)
601 				  {
602 				    status = REG_ESUBREG;
603 				    break;
604 				  }
605 				if (t < tag)
606 				  {
607 				    DPRINT(("  Backref %d start: "
608 					    "t%d reordered to before t%d\n",
609 					    b, tag, t));
610 				    if(t > 0)
611 				      t--;
612 				    /* Append to tag_order, move "tag" after
613 				     * "t" */
614 				    *rtp++ = tag;
615 				    *rtp++ = t;
616 				  }
617 #if TRE_DEBUG
618 				else
619 				  DPRINT(("  Backref %d start: "
620 					  "(t%d already before t%d)\n",
621 					    b, tag, t));
622 #endif /* TRE_DEBUG */
623 			      }
624 			  }
625 
626 			DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
627 				tag));
628 			regset[0] = REGSET_UNSET;
629 			regset_contains = 0;
630 			tag = next_tag;
631 			num_tags++;
632 			next_tag++;
633 		      }
634 		  }
635 		else
636 		  {
637 		    assert(!IS_TAG(lit));
638 		  }
639 		break;
640 	      }
641 	    case CATENATION:
642 	      {
643 		tre_catenation_t *cat = node->obj;
644 		tre_ast_node_t *left = cat->left;
645 		tre_ast_node_t *right = cat->right;
646 		int reserved_tag = -1;
647 		DPRINT(("Catenation, next_tag = %d\n", next_tag));
648 
649 
650 		/* After processing right child. */
651 		STACK_PUSHX(stack, voidptr, node);
652 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
653 
654 		/* Process right child. */
655 		STACK_PUSHX(stack, voidptr, right);
656 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
657 
658 		/* After processing left child. */
659 		STACK_PUSHX(stack, int, next_tag + left->num_tags);
660 		DPRINT(("  Pushing %d for after left\n",
661 			next_tag + left->num_tags));
662 		if (left->num_tags > 0 && right->num_tags > 0)
663 		  {
664 		    /* Reserve the next tag to the right child. */
665 		    DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
666 			    "Reserving next_tag %d to right child\n",
667 			    next_tag));
668 		    reserved_tag = next_tag;
669 		    next_tag++;
670 		  }
671 		STACK_PUSHX(stack, int, reserved_tag);
672 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
673 
674 		/* Process left child. */
675 		STACK_PUSHX(stack, voidptr, left);
676 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
677 
678 		}
679 	      break;
680 	    case ITERATION:
681 	      {
682 		tre_iteration_t *iter = node->obj;
683 		DPRINT(("Iteration\n"));
684 
685 		if (first_pass)
686 		  STACK_PUSHX(stack, int, regset_contains != 0);
687 		STACK_PUSHX(stack, int, tag);
688 		STACK_PUSHX(stack, voidptr, node);
689 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
690 
691 		STACK_PUSHX(stack, voidptr, iter->arg);
692 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
693 
694 		/* Regset is not empty, so add a tag here (this always happens
695 		   because iterators always get submatch id, even if in the
696 		   invisible range) */
697 		if (regset_contains)
698 		  {
699 		    if (!first_pass)
700 		      {
701 			status = tre_merge_branches(mem, node, NULL, tag,
702 						    tnfa->num_tags);
703 			if (status != REG_OK)
704 			  break;
705 			status = tre_add_tag_left(mem, node, tag);
706 			if (status != REG_OK)
707 			  break;
708 			if (regset_contains == REGSET_HAS_STARTS && tag != 0)
709 			  tnfa->tag_directions[tag] = iter->minimal ?
710 						      TRE_TAG_MINIMIZE :
711 						      TRE_TAG_LEFT_MAXIMIZE;
712 			else
713 			  tnfa->tag_directions[tag] = direction;
714 			DPRINT(("Setting t%d direction to %s\n", tag,
715 				tag_dir_str[tnfa->tag_directions[tag]]));
716 			tre_purge_regset(regset, tnfa, tag);
717 		      }
718 
719 		    DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
720 			    tag));
721 		    regset[0] = REGSET_UNSET;
722 		    regset_contains = 0;
723 		    tag = next_tag;
724 		    num_tags++;
725 		    next_tag++;
726 		  }
727 		direction = TRE_TAG_LEFT_MAXIMIZE;
728 		DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
729 	      }
730 	      break;
731 	    case UNION:
732 	      {
733 		tre_union_t *uni;
734 		tre_ast_node_t *left;
735 		tre_ast_node_t *right;
736 		int front_tag = -1;
737 
738 		DPRINT(("Union\n"));
739 
740 		if (regset_contains)
741 		  {
742 		    DPRINT(("  UNION num_tags++ tag=%d\n", tag));
743 		    front_tag = tag;
744 		    tag = next_tag;
745 		    num_tags++;
746 		    next_tag++;
747 		  }
748 
749 		/* For the top union, walk the tree of consecutive unions,
750 		 * setting the left_tag and right_tag values in increasing
751 		 * order (left to right priority) */
752 		if (top_union &&
753 		    (node->num_submatches -
754 		    (node->submatch_id >= 0 &&
755 		    node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
756 		  {
757 		    tre_ast_node_t *n;
758 		    int last = tre_stack_num_objects(stack);
759 
760 		    STACK_PUSH(stack, voidptr, node);
761 		    STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
762 
763 		    while (tre_stack_num_objects(stack) > last)
764 		      {
765 			symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
766 			switch (symbol)
767 			  {
768 			  case ADDTAGS_UNION_RECURSE:
769 			    n = tre_stack_pop_voidptr(stack);
770 			    uni = n->obj;
771 			    left = uni->left;
772 
773 			    /* Since the top union has num_submatches > 0,
774 			     * we set all the consecutive union's
775 			     * make_branches to 1 to force the generation
776 			     * of end tags for each union branch. */
777 			    n->make_branches = 1;
778 
779 			    STACK_PUSH(stack, voidptr, n);
780 			    STACK_PUSH(stack, int,
781 				       ADDTAGS_UNION_RIGHT_RECURSE);
782 
783 			    if (left->type == UNION)
784 			      {
785 				STACK_PUSH(stack, voidptr, left);
786 				STACK_PUSH(stack, int,
787 					   ADDTAGS_UNION_RECURSE);
788 			      }
789 			    else
790 			      {
791 				DPRINT(("  ADDTAGS_UNION_RECURSE "
792 					"num_tags++ tag=%d\n", tag));
793 				uni->left_tag = tag;
794 				tag = next_tag;
795 				num_tags++;
796 				next_tag++;
797 			      }
798 			    break;
799 
800 			  case ADDTAGS_UNION_RIGHT_RECURSE:
801 			    n = tre_stack_pop_voidptr(stack);
802 			    uni = n->obj;
803 			    right = uni->right;
804 
805 			    if (right->type == UNION)
806 			      {
807 				STACK_PUSH(stack, voidptr, right);
808 				STACK_PUSH(stack, int,
809 					   ADDTAGS_UNION_RECURSE);
810 			      }
811 			    else
812 			      {
813 				DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
814 					"num_tags++ tag=%d\n", tag));
815 				uni->right_tag = tag;
816 				tag = next_tag;
817 				num_tags++;
818 				next_tag++;
819 			      }
820 
821 			    break;
822 
823 			  default:
824 			    assert(0);
825 			    break;
826 
827 			  } /* end switch(symbol) */
828 		      } /* end while(tre_stack_num_objects(stack) > last */
829 		    if (!first_pass)
830 		      {
831 			STACK_PUSHX(stack, int, front_tag);
832 			STACK_PUSHX(stack, voidptr, node);
833 			STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
834 		      }
835 		  } /* end if (top_union && ...) */
836 
837 		uni = node->obj;
838 		left = uni->left;
839 		right = uni->right;
840 
841 		/* After processing right child. */
842 		STACK_PUSHX(stack, voidptr, regset);
843 		STACK_PUSHX(stack, int, regset_contains != 0);
844 		STACK_PUSHX(stack, voidptr, node);
845 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
846 
847 		/* Process right child. */
848 		STACK_PUSHX(stack, voidptr, right);
849 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
850 
851 		/* After processing left child. */
852 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
853 
854 		/* Process left child. */
855 		STACK_PUSHX(stack, voidptr, left);
856 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
857 
858 		/* Regset is not empty, so add a tag here. */
859 		if (regset_contains)
860 		  {
861 		    if (!first_pass)
862 		      {
863 			status = tre_merge_branches(mem, node, NULL, front_tag,
864 						    tnfa->num_tags);
865 			if (status != REG_OK)
866 			  break;
867 			status = tre_add_tag_left(mem, node, front_tag);
868 			if (status != REG_OK)
869 			  break;
870 			if (regset_contains == REGSET_HAS_STARTS)
871 			  tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
872 			else
873 			  tnfa->tag_directions[front_tag] = direction;
874 			DPRINT(("Setting t%d direction to %s\n", front_tag,
875 				tag_dir_str[tnfa->tag_directions[front_tag]]));
876 			tre_purge_regset(regset, tnfa, front_tag);
877 		      }
878 
879 		    regset[0] = REGSET_UNSET;
880 		    regset_contains = 0;
881 		  }
882 
883 		break;
884 	      }
885 	    } /* end switch (node->type) */
886 
887 	  break; /* end case: ADDTAGS_RECURSE */
888 
889 	case ADDTAGS_AFTER_ITERATION:
890 	  {
891 	    tre_iteration_t *iter;
892 	    tre_ast_node_t *orig;
893 	    int enter_tag;
894 
895 	    node = tre_stack_pop_voidptr(stack);
896 	    orig = node->original ? node->original : node;
897 	    iter = (tre_iteration_t *)orig->obj;
898 	    enter_tag = tre_stack_pop_int(stack);
899 	    if (iter->minimal)
900 	      minimal_tag = enter_tag;
901 
902 	    DPRINT(("After iteration\n"));
903 	    if (first_pass)
904 	      {
905 		node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
906 		DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
907 			node->num_tags));
908 	      }
909 	    else
910 	      {
911 		/* node->last_matched_branch will have the start tag (the tag
912 		   just *before* the iteration).  iter->arg->last_matched_branch
913 		   will have the tag(s) inside the iteration, the ones that
914 		   may need to be reset if the iteration doesn't match.  So
915 		   before we merge iter->arg into node, we need to set up
916 		   a new tre_last_matched_t and tre_last_matched_branch_t,
917 		   using any of the inside tags as cmp_tag (we choose the first
918 		   tag found by bit_ffs).  If there are no inside tags, we
919 		   don't bother creating the extra structures. */
920 		tre_last_matched_branch_pre_t *b =
921 						iter->arg->last_matched_branch;
922 
923 		if (b && b->n_tags > 0)
924 		  {
925 		    tre_last_matched_pre_t *u;
926 
927 		    bit_ffs(b->tags, num_tags, &b->cmp_tag);
928 		    DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
929 			    "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
930 
931 		    u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
932 				       sizeof(tre_last_matched_branch_pre_t)
933 				       + bitstr_size(tnfa->num_tags));
934 		    if (!u)
935 		      {
936 			status = REG_ESPACE;
937 			break;
938 		      }
939 		    u->branches = b;
940 		    u->n_branches = 1;
941 		    u->start_tag = b->cmp_tag;
942 		    u->tot_branches = b->tot_branches;
943 		    u->tot_last_matched = 1 + b->tot_last_matched;
944 		    u->tot_tags = b->tot_tags;
945 
946 		    b = (tre_last_matched_branch_pre_t *)(u + 1);
947 		    b->last_matched = u;
948 		    b->n_last_matched = 1;
949 		    b->tot_branches = 1 + u->tot_branches;
950 		    b->tot_last_matched = u->tot_last_matched;
951 		    b->tot_tags = u->tot_tags;
952 
953 		    iter->arg->last_matched_branch = b;
954 		  }
955 		status = tre_merge_branches(mem, node, iter->arg, 0,
956 					    tnfa->num_tags);
957 		if (status != REG_OK)
958 		  break;
959 
960 		if (iter->minimal)
961 		  {
962 		    /* Add a union with a left EMPTY literal and the right
963 		       being iter->arg.  This should force the tags inside
964 		       the minimal iteration to prefer being unset */
965 		    if (iter->min == 0 && iter->max <= 1)
966 		      {
967 			tre_ast_node_t *u, *e;
968 
969 			e = tre_ast_new_literal(mem, EMPTY, -1, -1);
970 			if (e == NULL)
971 			  {
972 			    status = REG_ESPACE;
973 			    break;
974 			  }
975 			u = tre_ast_new_union(mem, e, iter->arg);
976 			if (u == NULL)
977 			  {
978 			    status = REG_ESPACE;
979 			    break;
980 			  }
981 			iter->arg = u;
982 		      }
983 
984 		    direction = TRE_TAG_MINIMIZE;
985 		  }
986 		else
987 		  direction = TRE_TAG_MAXIMIZE;
988 		DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
989 	      }
990 	    break;
991 	  }
992 
993 	case ADDTAGS_AFTER_CAT_LEFT:
994 	  {
995 	    int new_tag = tre_stack_pop_int(stack);
996 	    next_tag = tre_stack_pop_int(stack);
997 	    DPRINT(("After cat left, tag = %d, next_tag = %d\n",
998 		    tag, next_tag));
999 	    if (new_tag >= 0)
1000 	      {
1001 		DPRINT(("  Setting tag to %d\n", new_tag));
1002 		tag = new_tag;
1003 	      }
1004 	    break;
1005 	  }
1006 
1007 	case ADDTAGS_AFTER_CAT_RIGHT:
1008 	  {
1009 	    tre_catenation_t *cat;
1010 
1011 	    DPRINT(("After cat right\n"));
1012 	    node = tre_stack_pop_voidptr(stack);
1013 	    cat = node->obj;
1014 	    if (first_pass)
1015 	      {
1016 		node->num_tags = cat->left->num_tags + cat->right->num_tags;
1017 		DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1018 			node->num_tags));
1019 	      }
1020 	    else
1021 	      {
1022 		status = tre_merge_branches(mem, cat->left, cat->right, 0,
1023 					    tnfa->num_tags);
1024 		if (status != REG_OK)
1025 		  break;
1026 		status = tre_merge_branches(mem, node, cat->left, 0,
1027 					    tnfa->num_tags);
1028 	      }
1029 	    break;
1030 	  }
1031 
1032 	case ADDTAGS_AFTER_UNION_LEFT:
1033 	  DPRINT(("After union left\n"));
1034 	  /* Lift the bottom of the `regset' array so that when processing
1035 	     the right operand the items currently in the array are
1036 	     invisible.	 The original bottom was saved at ADDTAGS_UNION and
1037 	     will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1038 	  while (*regset != REGSET_UNSET)
1039 	    regset++;
1040 	  regset_contains = 0;
1041 	  break;
1042 
1043 	case ADDTAGS_AFTER_UNION_RIGHT:
1044 	  {
1045 	    int added_tags;
1046 	    tre_ast_node_t *orig;
1047 	    tre_union_t *uni;
1048 	    /* Note: node may not be a UNION, but a CATENATION with a left
1049 	     * tag.  So that is why we pass the original node->obj on the
1050 	     * stack, to get the union's true values. */
1051 
1052 	    DPRINT(("After union right\n"));
1053 	    node = tre_stack_pop_voidptr(stack);
1054 	    orig = node->original ? node->original : node;
1055 	    uni = (tre_union_t *)orig->obj;
1056 	    added_tags = tre_stack_pop_int(stack);
1057 	    if (first_pass)
1058 	      {
1059 		node->num_tags = uni->left->num_tags + uni->right->num_tags
1060 				 + added_tags;
1061 		if (uni->left_tag > 0)
1062 		  node->num_tags++;
1063 		if (uni->right_tag > 0)
1064 		  node->num_tags++;
1065 		DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1066 			node->num_tags));
1067 	      }
1068 	    regset = tre_stack_pop_voidptr(stack);
1069 
1070 	    /* Add tags after both children, the left child gets a smaller
1071 	       tag than the right child.  This guarantees that we prefer
1072 	       the left child over the right child. */
1073 	    /* XXX - This is not always necessary (if the children have
1074 	       tags which must be seen for every match of that child). */
1075 	    if (!first_pass && node->make_branches)
1076 	      {
1077 		tre_last_matched_branch_pre_t *lb =
1078 		  uni->left->last_matched_branch;
1079 		tre_last_matched_branch_pre_t *rb =
1080 		  uni->right->last_matched_branch;
1081 		tre_last_matched_pre_t *lu =
1082 		  uni->left->last_matched_in_progress;
1083 		tre_last_matched_pre_t *ru =
1084 		  uni->right->last_matched_in_progress;
1085 		tre_last_matched_pre_t *u;
1086 		/* We don't need to call tre_merge_branches because these
1087 		 * tags don't participate in submatch ranges, so don't need
1088 		 * to be recorded.  But we do set the cmp_tag entry of the
1089 		 * tre_last_matched_branch_pre_t, so we might call
1090 		 * tre_merge_branches if we need to create an empty
1091 		 * tre_last_matched_branch_pre_t. */
1092 		if (uni->left_tag > 0)
1093 		  {
1094 		    DPRINT(("Setting t%d direction to maximize\n",
1095 			    uni->left_tag));
1096 		    status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1097 		    if (status != REG_OK)
1098 		      break;
1099 		    tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1100 		    if (!lb)
1101 		      {
1102 			status = tre_merge_branches(mem, uni->left, NULL, -1,
1103 						    tnfa->num_tags);
1104 			if (status != REG_OK)
1105 			  break;
1106 			lb = uni->left->last_matched_branch;
1107 		      }
1108 		    lb->cmp_tag = uni->left_tag;
1109 		  }
1110 		if (uni->right_tag > 0)
1111 		  {
1112 		    DPRINT(("Setting t%d direction to maximize\n",
1113 			    uni->right_tag));
1114 		    status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1115 		    if (status != REG_OK)
1116 		      break;
1117 		    tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1118 		    if (!rb)
1119 		      {
1120 			status = tre_merge_branches(mem, uni->right, NULL, -1,
1121 						    tnfa->num_tags);
1122 			if (status != REG_OK)
1123 			  break;
1124 			rb = uni->right->last_matched_branch;
1125 		      }
1126 		    rb->cmp_tag = uni->right_tag;
1127 		  }
1128 		/* Now merge the tre_last_matched_branch_pre_t into a
1129 		   tre_last_matched_pre_t */
1130 		if (lu == NULL)
1131 		  {
1132 		    if (ru == NULL)
1133 		      {
1134 			/* Create a new tre_last_matched_pre_t */
1135 			u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1136 			if (!u)
1137 			  {
1138 			    status = REG_ESPACE;
1139 			    break;
1140 			  }
1141 			u->tot_last_matched = 1;
1142 
1143 			if (lb)
1144 			  {
1145 			    u->branches = lb;
1146 			    u->n_branches = 1;
1147 			    u->tot_branches += lb->tot_branches;
1148 			    u->tot_last_matched += lb->tot_last_matched;
1149 			    u->tot_tags += lb->tot_tags;
1150 			    if (rb)
1151 			      {
1152 				lb->next = rb;
1153 				u->n_branches++;
1154 				u->tot_branches += rb->tot_branches;
1155 				u->tot_last_matched += rb->tot_last_matched;
1156 				u->tot_tags += rb->tot_tags;
1157 			      }
1158 			  }
1159 			else if (rb)
1160 			  {
1161 			    u->branches = rb;
1162 			    u->n_branches = 1;
1163 			    u->tot_branches += rb->tot_branches;
1164 			    u->tot_last_matched += rb->tot_last_matched;
1165 			    u->tot_tags += rb->tot_tags;
1166 			  }
1167 		      }
1168 		    else
1169 		      {
1170 			/* Use ru, and add lb */
1171 			u = ru;
1172 			if (lb)
1173 			  {
1174 			    lb->next = u->branches;
1175 			    u->branches = lb;
1176 			    u->n_branches++;
1177 			    u->tot_branches += lb->tot_branches;
1178 			    u->tot_last_matched += lb->tot_last_matched;
1179 			    u->tot_tags += lb->tot_tags;
1180 			  }
1181 		      }
1182 		  }
1183 		else if (ru == NULL)
1184 		  {
1185 		    /* Use lu, and add rb */
1186 		    u = lu;
1187 		    if (rb)
1188 		      {
1189 			rb->next = u->branches;
1190 			u->branches = rb;
1191 			u->n_branches++;
1192 			u->tot_branches += rb->tot_branches;
1193 			u->tot_last_matched += rb->tot_last_matched;
1194 			u->tot_tags += rb->tot_tags;
1195 		      }
1196 		  }
1197 		else
1198 		  {
1199 		    /* Merge lu and ru into lu */
1200 		    if (lu->branches)
1201 		      {
1202 			if (ru->branches)
1203 			  {
1204 			    tre_last_matched_branch_pre_t *b = lu->branches;
1205 			    while (b->next) b = b->next;
1206 			    b->next = ru->branches;
1207 			    lu->n_branches += ru->n_branches;
1208 			  }
1209 		      }
1210 		    else if (ru->branches)
1211 		      {
1212 			lu->branches = ru->branches;
1213 			lu->n_branches = ru->n_branches;
1214 		      }
1215 		    lu->tot_branches += ru->tot_branches;
1216 		    lu->tot_last_matched += ru->tot_last_matched - 1;
1217 		    lu->tot_tags += ru->tot_tags;
1218 		    u = lu;
1219 		  }
1220 		node->last_matched_in_progress = u;
1221 	      }
1222 	    direction = TRE_TAG_MAXIMIZE;
1223 	    break;
1224 	  }
1225 
1226 	case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1227 	  {
1228 	    tre_last_matched_branch_pre_t *b;
1229 	    tre_last_matched_pre_t *u;
1230 	    int start_tag;
1231 
1232 	    DPRINT(("After union top\n"));
1233 	    node = tre_stack_pop_voidptr(stack);
1234 	    start_tag = tre_stack_pop_int(stack);
1235 	    b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1236 			       + bitstr_size(tnfa->num_tags));
1237 	    if (!b)
1238 	      {
1239 		status = REG_ESPACE;
1240 		break;
1241 	      }
1242 
1243 	    u = node->last_matched_in_progress;
1244 	    u->start_tag = start_tag;
1245 	    b->tot_branches = 1 + u->tot_branches;
1246 	    b->tot_last_matched = u->tot_last_matched;
1247 	    b->tot_tags = u->tot_tags;
1248 	    b->last_matched = u;
1249 	    b->n_last_matched = 1;
1250 	    node->last_matched_branch = b;
1251 	    node->last_matched_in_progress = NULL;
1252 	    break;
1253 	  }
1254 
1255 	default:
1256 	  assert(0);
1257 	  break;
1258 
1259 	} /* end switch(symbol) */
1260     } /* end while(tre_stack_num_objects(stack) > bottom) */
1261 
1262   if (status != REG_OK)
1263     {
1264       DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1265       goto error_post_compile;
1266     }
1267 
1268   if (!first_pass)
1269     {
1270       int i;
1271       if (num_tags != tnfa->num_tags)
1272 	{
1273 	  DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1274 		  tnfa->num_tags));
1275 	  status = REG_BADPAT;
1276 	  goto error_post_compile;
1277 	}
1278 
1279       tre_purge_regset(regset, tnfa, tag);
1280       DPRINT(("Setting t%d to %s\n", num_tags,
1281 	      tag_dir_str[direction]));
1282       tnfa->tag_directions[num_tags] = direction;
1283 
1284       if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1285 	{
1286 	  DPRINT(("Processed %d reorder tags instead of %d\n",
1287 		  (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1288 	  status = REG_BADPAT;
1289 	  goto error_post_compile;
1290 	}
1291     *rtp = -1;
1292 #if TRE_DEBUG
1293       if (reorder_tags[0] >= 0)
1294 	{
1295 	  DPRINT(("reorder_tags:\n"));
1296 	  for (rtp = reorder_tags; *rtp >= 0;)
1297 	    {
1298 	      DPRINT(("%d after ", *rtp++));
1299 	      DPRINT(("%d\n", *rtp++));
1300 	    }
1301 	}
1302 	else
1303 	  DPRINT(("No reorder_tags\n"));
1304 #endif /* TRE_DEBUG */
1305 
1306       /* Initialize to_reorder */
1307       for (i = 0; i < num_tags; i++)
1308 	to_reorder[i] = i;
1309       /* Use to_seq_order to convert reorder_tags values, and use those to
1310        * reorder to_reorder */
1311       for (rtp = reorder_tags; *rtp >= 0;)
1312 	{
1313 	  int j, high, low;
1314 	  int ti = *rtp++;
1315 
1316 	  /* Skip reordering the final tag */
1317 	  if (ti >= num_tags)
1318 	    {
1319 	      DPRINT(("Skipping reorder of %d\n", ti));
1320 	      rtp++;
1321 	      continue;
1322 	    }
1323 	  /* The number of the tag to reorder */
1324 	  high = to_reorder[ti];
1325 	  /* Reorder after this tag */
1326 	  low = to_reorder[*rtp++];
1327 
1328 	  DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1329 	  if (low > high)
1330 	    {
1331 	      DPRINT(("Tag %d already before %d\n", high, low));
1332 	      continue;
1333 	    }
1334 	  for (j = 0; j < num_tags; j++)
1335 	    if (to_reorder[j] > low && to_reorder[j] < high)
1336 	      to_reorder[j]++;
1337 	  to_reorder[ti] = low + 1;
1338 #ifdef TRE_DEBUG
1339 	  DPRINT(("to_reorder=("));
1340 	  for (j = 0; j < num_tags; j++)
1341 	    {
1342 	      DPRINT(("%d", to_reorder[j]));
1343 	      if (j < num_tags - 1)
1344 		DPRINT((","));
1345 	    }
1346 	  DPRINT((")\n"));
1347 #endif /* TRE_DEBUG */
1348 	}
1349       /* Determine if reordering in really necessary */
1350       {
1351 	int need_reorder = 0;
1352 	for (i = 0; i < num_tags; i++)
1353 	  if(to_reorder[i] != i)
1354 	    {
1355 	      need_reorder = 1;
1356 	      break;
1357 	    }
1358 	/* If need_reorder is not set, free reorder_tags, and set to NULL,
1359 	 * indicating no reordering is needed */
1360 	if (!need_reorder)
1361 	  {
1362 	    DPRINT(("Don't need to reorder\n"));
1363 	    xfree(reorder_tags);
1364 	    reorder_tags = NULL;
1365 	  }
1366       }
1367     }
1368 
1369   if (reorder_tags)
1370     {
1371       int i;
1372       tre_tag_direction_t *new_tag_directions;
1373 #if TRE_DEBUG
1374       DPRINT(("to_reorder:"));
1375       for (i = 0; i < num_tags; i++)
1376 	DPRINT((" %d->%d", i, to_reorder[i]));
1377       DPRINT(("\n"));
1378 #endif /* TRE_DEBUG */
1379 
1380       DPRINT(("Reordering submatch_data\n"));
1381       for (i = 0; i < tnfa->num_submatches; i++)
1382 	{
1383 #if TRE_DEBUG
1384 	  int so = tnfa->submatch_data[i].so_tag;
1385 	  int eo = tnfa->submatch_data[i].eo_tag;
1386 #endif /* TRE_DEBUG */
1387 	  tnfa->submatch_data[i].so_tag =
1388 	    to_reorder[tnfa->submatch_data[i].so_tag];
1389 	  tnfa->submatch_data[i].eo_tag =
1390 	    tnfa->submatch_data[i].eo_tag < num_tags ?
1391 	    to_reorder[tnfa->submatch_data[i].eo_tag] :
1392 	    tnfa->submatch_data[i].eo_tag;
1393 	  DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1394 		  tnfa->submatch_data[i].so_tag,
1395 		  tnfa->submatch_data[i].eo_tag));
1396 	}
1397 
1398       DPRINT(("Reordering tag_directions\n"));
1399       /* We only allocate num_tags directions and reorder them.  The
1400        * num_tags-th direction (end tag) is left unchanged. */
1401       new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1402       if (new_tag_directions == NULL)
1403 	{
1404 	  status = REG_ESPACE;
1405 	  goto error_post_compile;
1406 	}
1407       for (i = 0; i < num_tags; i++)
1408 	{
1409 	  new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1410 	}
1411 #if TRE_DEBUG
1412       for (i = 0; i < num_tags; i++)
1413 	{
1414 	  DPRINT(("t%d %s->%s\n", i,
1415 		  tag_dir_str[tnfa->tag_directions[i]],
1416 		  tag_dir_str[new_tag_directions[i]]));
1417 	}
1418 	DPRINT(("t%d %s->%s\n", num_tags,
1419 		tag_dir_str[tnfa->tag_directions[num_tags]],
1420 		tag_dir_str[tnfa->tag_directions[num_tags]]));
1421 #endif /* TRE_DEBUG */
1422       memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1423       xfree(new_tag_directions);
1424 
1425       DPRINT(("Reordering minimal_tags\n"));
1426       for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1427 	tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1428 				to_reorder[tnfa->minimal_tags[i]] :
1429 				tnfa->minimal_tags[i];
1430 
1431       DPRINT(("Reordering AST tags\n"));
1432       STACK_PUSH(stack, voidptr, tree);
1433       while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1434 	{
1435 	  node = tre_stack_pop_voidptr(stack);
1436 
1437 	  switch (node->type)
1438 	    {
1439 	    case LITERAL:
1440 	      {
1441 		tre_literal_t *lit = (tre_literal_t *)node->obj;
1442 		if (IS_TAG(lit))
1443 		  lit->code_max = to_reorder[lit->code_max];
1444 		break;
1445 	      }
1446 
1447 	    case UNION:
1448 	      {
1449 		tre_union_t *uni = (tre_union_t *)node->obj;
1450 		STACK_PUSHX(stack, voidptr, uni->right);
1451 		STACK_PUSHX(stack, voidptr, uni->left);
1452 		break;
1453 	      }
1454 
1455 	    case CATENATION:
1456 	      {
1457 		tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1458 		STACK_PUSHX(stack, voidptr, cat->right);
1459 		STACK_PUSHX(stack, voidptr, cat->left);
1460 		break;
1461 	      }
1462 
1463 	    case ITERATION:
1464 	      {
1465 		tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1466 		STACK_PUSHX(stack, voidptr, iter->arg);
1467 		break;
1468 	      }
1469 
1470 	    default:
1471 	      assert(0);
1472 	      break;
1473 	    }
1474 	}
1475 	if (status != REG_OK)
1476 	  {
1477 	    DPRINT(("Error while reordering tags\n"));
1478 	    goto error_post_compile;
1479 	  }
1480     }
1481 
1482 
1483   if (!first_pass)
1484     {
1485       if (tree->last_matched_branch)
1486 	{
1487 	  tre_last_matched_branch_t *buf, *b, *bb;
1488 	  tre_last_matched_branch_pre_t *bp;
1489 	  tre_last_matched_t *u, *uu;
1490 	  tre_last_matched_pre_t *up;
1491 	  int *t;
1492 	  int i;
1493 #ifdef TRE_DEBUG
1494 	  tre_last_matched_branch_t *_b;
1495 	  tre_last_matched_t *_u;
1496 	  int *_t;
1497 
1498 	  DPRINT(("last_match_branch_pre:\n"));
1499 	  print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1500 #endif /* TRE_DEBUG */
1501 	  buf = (tre_last_matched_branch_t *)xcalloc(1,
1502 				     tree->last_matched_branch->tot_branches
1503 				     * sizeof(tre_last_matched_branch_t) +
1504 				     tree->last_matched_branch->tot_last_matched
1505 				     * sizeof(tre_last_matched_t) +
1506 				     tree->last_matched_branch->tot_tags *
1507 				     sizeof(int));
1508 	  if (!buf)
1509 	    {
1510 	      status = REG_ESPACE;
1511 	      goto error_post_compile;
1512 	    }
1513 
1514 	  b = buf;
1515 	  u = (tre_last_matched_t *)(b +
1516 	      tree->last_matched_branch->tot_branches);
1517 	  t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1518 #ifdef TRE_DEBUG
1519 	  _b = b;
1520 	  _u = u;
1521 	  _t = t;
1522 #endif /* TRE_DEBUG */
1523 	  DPRINT(("Copying info_pre to info\n"));
1524 	  STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1525 	  STACK_PUSH(stack, int, 1);
1526 	  STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1527 
1528 	  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1529 	    {
1530 	      switch (tre_stack_pop_int(stack))
1531 		{
1532 		case COPY_LAST_MATCHED_BRANCH:
1533 		  i = tre_stack_pop_int(stack);
1534 		  /* The tre_last_matched_branch_pre_t * is still on the
1535 		     stack */
1536 		  STACK_PUSHX(stack, voidptr, b);
1537 		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1538 		  b += i;
1539 		  break;
1540 
1541 		case COPY_LAST_MATCHED_BRANCH_NEXT:
1542 		  bb = tre_stack_pop_voidptr(stack);
1543 		  bp = tre_stack_pop_voidptr(stack);
1544 		  bb->n_last_matched = bp->n_last_matched;
1545 		  bb->cmp_tag = bp->cmp_tag;
1546 		  if (bp->n_tags > 0)
1547 		    {
1548 		      int n;
1549 		      n = bb->n_tags = bp->n_tags;
1550 		      bb->tags = t;
1551 		      for (i = 0; i < num_tags; i++)
1552 			if (bit_test(bp->tags, i))
1553 			  {
1554 			    *t++ = i;
1555 			    if (--n <= 0)
1556 			      break;
1557 			  }
1558 		    }
1559 		  if (bp->next)
1560 		    {
1561 		      STACK_PUSHX(stack, voidptr, bp->next);
1562 		      STACK_PUSHX(stack, voidptr, bb + 1);
1563 		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1564 		    }
1565 		  if (bp->n_last_matched > 0)
1566 		    {
1567 		      bb->last_matched = u;
1568 		      STACK_PUSHX(stack, voidptr, bp->last_matched);
1569 		      STACK_PUSHX(stack, int, bp->n_last_matched);
1570 		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1571 		    }
1572 		  break;
1573 
1574 		case COPY_LAST_MATCHED:
1575 		  i = tre_stack_pop_int(stack);
1576 		  /* The tre_last_matched_pre_t * is still on the stack */
1577 		  STACK_PUSHX(stack, voidptr, u);
1578 		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1579 		  u += i;
1580 		  break;
1581 
1582 		case COPY_LAST_MATCHED_NEXT:
1583 		  uu = tre_stack_pop_voidptr(stack);
1584 		  up = tre_stack_pop_voidptr(stack);
1585 		  uu->n_branches = up->n_branches;
1586 		  uu->branches = b;
1587 		  uu->start_tag = up->start_tag;
1588 		  if (up->next)
1589 		    {
1590 		      STACK_PUSHX(stack, voidptr, up->next);
1591 		      STACK_PUSHX(stack, voidptr, uu + 1);
1592 		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1593 		    }
1594 		  STACK_PUSHX(stack, voidptr, up->branches);
1595 		  STACK_PUSHX(stack, int, up->n_branches);
1596 		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1597 		  break;
1598 		}
1599 	    }
1600 	  if (status != REG_OK)
1601 	    goto error_post_compile;
1602 #ifdef TRE_DEBUG
1603 	  DPRINT(("last_matched_branch:\n"));
1604 	  print_last_match_branch(buf, 0);
1605 	  if (b != _b + tree->last_matched_branch->tot_branches)
1606 	    DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1607 		    b, _b + tree->last_matched_branch->tot_branches));
1608 	  if (u != _u + tree->last_matched_branch->tot_last_matched)
1609 	    DPRINT(("u/%p != _u + "
1610 		    "tree->last_matched_branch->tot_last_matched/%p\n",
1611 		    u, _u + tree->last_matched_branch->tot_last_matched));
1612 	  if (t != _t + tree->last_matched_branch->tot_tags)
1613 	    DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1614 		    t, _t + tree->last_matched_branch->tot_tags));
1615 #endif /* TRE_DEBUG */
1616 	  tnfa->last_matched_branch = buf;
1617 	}
1618 #ifdef TRE_DEBUG
1619       else
1620 	DPRINT(("No last_match_branch_pre\n"));
1621 #endif /* TRE_DEBUG */
1622     }
1623 
1624   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
1625 	  first_pass? "First pass" : "Second pass", num_tags));
1626 #ifdef TRE_DEBUG
1627   tre_ast_print(tree);
1628 #endif /* TRE_DEBUG */
1629   DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1630 	  num_tags));
1631   assert(tree->num_tags == num_tags);
1632   tnfa->end_tag = num_tags;
1633   tnfa->num_tags = num_tags;
1634   tnfa->num_minimals = num_minimals;
1635 error_post_compile:
1636   xfree(reorder_tags);
1637 error_reorder_tags:
1638   xfree(orig_regset);
1639 error_regset:
1640   return status;
1641 }
1642 
1643 
1644 
1645 /*
1646   AST to TNFA compilation routines.
1647 */
1648 
1649 typedef enum {
1650   COPY_RECURSE,
1651   COPY_SET_RESULT_PTR
1652 } tre_copyast_symbol_t;
1653 
1654 /* Flags for tre_copy_ast(). */
1655 #define COPY_REMOVE_TAGS	 1
1656 #define COPY_MAXIMIZE_FIRST_TAG	 2
1657 
1658 static reg_errcode_t
1659 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1660 	     int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1661 	     tre_ast_node_t **copy, int *max_pos)
1662 {
1663   reg_errcode_t status = REG_OK;
1664   int bottom = tre_stack_num_objects(stack);
1665   int num_copied = 0;
1666   int first_tag = 1;
1667   tre_ast_node_t **result = copy;
1668   tre_copyast_symbol_t symbol;
1669 
1670   STACK_PUSH(stack, voidptr, ast);
1671   STACK_PUSH(stack, int, COPY_RECURSE);
1672 
1673   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1674     {
1675       tre_ast_node_t *node;
1676       if (status != REG_OK)
1677 	break;
1678 
1679       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1680       switch (symbol)
1681 	{
1682 	case COPY_SET_RESULT_PTR:
1683 	  result = tre_stack_pop_voidptr(stack);
1684 	  break;
1685 	case COPY_RECURSE:
1686 	  node = tre_stack_pop_voidptr(stack);
1687 	  switch (node->type)
1688 	    {
1689 	    case LITERAL:
1690 	      {
1691 		tre_literal_t *lit = node->obj;
1692 		int pos = lit->position;
1693 		int min = lit->code_min;
1694 		int max = lit->code_max;
1695 		tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1696 						  lit->u.bracket_match_list :
1697 						  NULL;
1698 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1699 		  {
1700 		    /* XXX - e.g. [ab] has only one position but two
1701 		       nodes, so we are creating holes in the state space
1702 		       here.  Not fatal, just wastes memory. */
1703 		    pos += *pos_add;
1704 		    num_copied++;
1705 		  }
1706 		else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1707 		  {
1708 		    /* Change this tag to empty. */
1709 		    min = EMPTY;
1710 		    max = pos = -1;
1711 		  }
1712 		else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1713 			 && first_tag)
1714 		  {
1715 		    /* Maximize the first tag. */
1716 		    if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1717 		      tag_directions[max] = TRE_TAG_MAXIMIZE;
1718 		    first_tag = 0;
1719 		  }
1720 		*result = tre_ast_new_literal(mem, min, max, pos);
1721 		if (*result == NULL)
1722 		  status = REG_ESPACE;
1723 
1724 		if (pos > *max_pos)
1725 		  *max_pos = pos;
1726 
1727 		if (!IS_SPECIAL(lit))
1728 		  ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1729 		      = list;
1730 		break;
1731 	      }
1732 	    case UNION:
1733 	      {
1734 		tre_union_t *uni = node->obj;
1735 		tre_union_t *tmp;
1736 		*result = tre_ast_new_union(mem, uni->left, uni->right);
1737 		if (*result == NULL)
1738 		  {
1739 		    status = REG_ESPACE;
1740 		    break;
1741 		  }
1742 		tmp = (*result)->obj;
1743 		result = &tmp->left;
1744 		STACK_PUSHX(stack, voidptr, uni->right);
1745 		STACK_PUSHX(stack, int, COPY_RECURSE);
1746 		STACK_PUSHX(stack, voidptr, &tmp->right);
1747 		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1748 		STACK_PUSHX(stack, voidptr, uni->left);
1749 		STACK_PUSHX(stack, int, COPY_RECURSE);
1750 		break;
1751 	      }
1752 	    case CATENATION:
1753 	      {
1754 		tre_catenation_t *cat = node->obj;
1755 		tre_catenation_t *tmp;
1756 		*result = tre_ast_new_catenation(mem, cat->left, cat->right);
1757 		if (*result == NULL)
1758 		  {
1759 		    status = REG_ESPACE;
1760 		    break;
1761 		  }
1762 		tmp = (*result)->obj;
1763 		tmp->left = NULL;
1764 		tmp->right = NULL;
1765 		result = &tmp->left;
1766 
1767 		STACK_PUSHX(stack, voidptr, cat->right);
1768 		STACK_PUSHX(stack, int, COPY_RECURSE);
1769 		STACK_PUSHX(stack, voidptr, &tmp->right);
1770 		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1771 		STACK_PUSHX(stack, voidptr, cat->left);
1772 		STACK_PUSHX(stack, int, COPY_RECURSE);
1773 		break;
1774 	      }
1775 	    case ITERATION:
1776 	      {
1777 		tre_iteration_t *iter = node->obj;
1778 		STACK_PUSHX(stack, voidptr, iter->arg);
1779 		STACK_PUSHX(stack, int, COPY_RECURSE);
1780 		*result = tre_ast_new_iter(mem, iter->arg, iter->min,
1781 					   iter->max, iter->minimal);
1782 		if (*result == NULL)
1783 		  {
1784 		    status = REG_ESPACE;
1785 		    break;
1786 		  }
1787 		iter = (*result)->obj;
1788 		result = &iter->arg;
1789 		break;
1790 	      }
1791 	    default:
1792 	      assert(0);
1793 	      break;
1794 	    }
1795 	  break;
1796 	}
1797     }
1798   *pos_add += num_copied;
1799   return status;
1800 }
1801 
1802 typedef enum {
1803   EXPAND_RECURSE,
1804   EXPAND_AFTER_ITER
1805 } tre_expand_ast_symbol_t;
1806 
1807 /* Expands each iteration node that has a finite nonzero minimum or maximum
1808    iteration count to a catenated sequence of copies of the node. */
1809 static reg_errcode_t
1810 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1811 	       int *position, tre_tag_direction_t *tag_directions,
1812 	       int *max_depth)
1813 {
1814   reg_errcode_t status = REG_OK;
1815   int bottom = tre_stack_num_objects(stack);
1816   int pos_add = 0;
1817   int pos_add_total = 0;
1818   int max_pos = 0;
1819 #ifdef TRE_APPROX
1820   /* Current approximate matching parameters. */
1821   int params[TRE_PARAM_LAST];
1822   /* Approximate parameter nesting level. */
1823   int params_depth = 0;
1824 #endif /* TRE_APPROX */
1825   int iter_depth = 0;
1826 #ifdef TRE_APPROX
1827   int i;
1828 #endif /* TRE_APPROX */
1829 
1830 #ifdef TRE_APPROX
1831   for (i = 0; i < TRE_PARAM_LAST; i++)
1832     params[i] = TRE_PARAM_DEFAULT;
1833 #endif /* TRE_APPROX */
1834 
1835   STACK_PUSHR(stack, voidptr, ast);
1836   STACK_PUSHR(stack, int, EXPAND_RECURSE);
1837   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1838     {
1839       tre_ast_node_t *node;
1840       tre_expand_ast_symbol_t symbol;
1841 
1842       if (status != REG_OK)
1843 	break;
1844 
1845       DPRINT(("pos_add %d\n", pos_add));
1846 
1847       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1848       node = tre_stack_pop_voidptr(stack);
1849       switch (symbol)
1850 	{
1851 	case EXPAND_RECURSE:
1852 	  switch (node->type)
1853 	    {
1854 	    case LITERAL:
1855 	      {
1856 		tre_literal_t *lit= node->obj;
1857 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1858 		  {
1859 		    lit->position += pos_add;
1860 		    if (lit->position > max_pos)
1861 		      max_pos = lit->position;
1862 		  }
1863 		break;
1864 	      }
1865 	    case UNION:
1866 	      {
1867 		tre_union_t *uni = node->obj;
1868 		STACK_PUSHX(stack, voidptr, uni->right);
1869 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1870 		STACK_PUSHX(stack, voidptr, uni->left);
1871 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1872 		break;
1873 	      }
1874 	    case CATENATION:
1875 	      {
1876 		tre_catenation_t *cat = node->obj;
1877 		STACK_PUSHX(stack, voidptr, cat->right);
1878 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1879 		STACK_PUSHX(stack, voidptr, cat->left);
1880 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1881 		break;
1882 	      }
1883 	    case ITERATION:
1884 	      {
1885 		tre_iteration_t *iter = node->obj;
1886 		STACK_PUSHX(stack, int, pos_add);
1887 		STACK_PUSHX(stack, voidptr, node);
1888 		STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1889 		STACK_PUSHX(stack, voidptr, iter->arg);
1890 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1891 		/* If we are going to expand this node at EXPAND_AFTER_ITER
1892 		   then don't increase the `pos' fields of the nodes now, it
1893 		   will get done when expanding. */
1894 		if (iter->min > 1 || iter->max > 1)
1895 		  pos_add = 0;
1896 		iter_depth++;
1897 		DPRINT(("iter\n"));
1898 		break;
1899 	      }
1900 	    default:
1901 	      assert(0);
1902 	      break;
1903 	    }
1904 	  break;
1905 	case EXPAND_AFTER_ITER:
1906 	  {
1907 	    tre_iteration_t *iter = node->obj;
1908 	    int pos_add_last;
1909 	    pos_add = tre_stack_pop_int(stack);
1910 	    pos_add_last = pos_add;
1911 	    /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1912 	       immediate replace the whole iteration with EMPTY.  This
1913 	       unfortunately drops any submatches, and messes up setting the
1914 	       pmatch values (we can get tags of -1, and tag values in the
1915 	       billions).  So we left it there and replace with EMPTY here. */
1916 	    if (iter->min == 0 && iter->max == 0)
1917 	      {
1918 		tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1919 		if (empty == NULL)
1920 		  return REG_ESPACE;
1921 		node->obj = empty->obj;
1922 		node->type = empty->type;
1923 	      }
1924 	    else if (iter->min > 1 || iter->max > 1)
1925 	      {
1926 		tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1927 		int j;
1928 		int pos_add_save = pos_add;
1929 
1930 		/* Create a catenated sequence of copies of the node. */
1931 		for (j = 0; j < iter->min; j++)
1932 		  {
1933 		    tre_ast_node_t *copy;
1934 		    /* Remove tags from all but the last copy. */
1935 		    int flags = ((j + 1 < iter->min)
1936 				 ? COPY_REMOVE_TAGS
1937 				 : COPY_MAXIMIZE_FIRST_TAG);
1938 		    DPRINT(("  pos_add %d\n", pos_add));
1939 		    pos_add_save = pos_add;
1940 		    status = tre_copy_ast(mem, stack, iter->arg, flags,
1941 					  &pos_add, tag_directions, &copy,
1942 					  &max_pos);
1943 		    if (status != REG_OK)
1944 		      return status;
1945 		    if (seq1 != NULL)
1946 		      seq1 = tre_ast_new_catenation(mem, seq1, copy);
1947 		    else
1948 		      seq1 = copy;
1949 		    if (seq1 == NULL)
1950 		      return REG_ESPACE;
1951 		  }
1952 
1953 		if (iter->max == -1)
1954 		  {
1955 		    /* No upper limit. */
1956 		    pos_add_save = pos_add;
1957 		    status = tre_copy_ast(mem, stack, iter->arg, 0,
1958 					  &pos_add, NULL, &seq2, &max_pos);
1959 		    if (status != REG_OK)
1960 		      return status;
1961 		    seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1962 		    if (seq2 == NULL)
1963 		      return REG_ESPACE;
1964 		  }
1965 		else
1966 		  {
1967 		    for (j = iter->min; j < iter->max; j++)
1968 		      {
1969 			tre_ast_node_t *tmp, *copy;
1970 			pos_add_save = pos_add;
1971 			status = tre_copy_ast(mem, stack, iter->arg, 0,
1972 					      &pos_add, NULL, &copy, &max_pos);
1973 			if (status != REG_OK)
1974 			  return status;
1975 			if (seq2 != NULL)
1976 			  seq2 = tre_ast_new_catenation(mem, copy, seq2);
1977 			else
1978 			  seq2 = copy;
1979 			if (seq2 == NULL)
1980 			  return REG_ESPACE;
1981 			tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1982 			if (tmp == NULL)
1983 			  return REG_ESPACE;
1984 			seq2 = tre_ast_new_union(mem, tmp, seq2);
1985 			if (seq2 == NULL)
1986 			  return REG_ESPACE;
1987 		      }
1988 		  }
1989 
1990 		pos_add = pos_add_save;
1991 		if (seq1 == NULL)
1992 		  seq1 = seq2;
1993 		else if (seq2 != NULL)
1994 		  seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1995 		if (seq1 == NULL)
1996 		  return REG_ESPACE;
1997 		node->obj = seq1->obj;
1998 		node->type = seq1->type;
1999 	      }
2000 
2001 	    iter_depth--;
2002 	    pos_add_total += pos_add - pos_add_last;
2003 	    if (iter_depth == 0)
2004 	      pos_add = pos_add_total;
2005 
2006 #ifdef TRE_APPROX
2007 	    /* If approximate parameters are specified, surround the result
2008 	       with two parameter setting nodes.  The one on the left sets
2009 	       the specified parameters, and the one on the right restores
2010 	       the old parameters. */
2011 	    if (iter->params)
2012 	      {
2013 		tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2014 		int *old_params;
2015 
2016 		tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2017 		if (!tmp_l)
2018 		  return REG_ESPACE;
2019 		((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2020 		iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2021 		tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2022 		if (!tmp_r)
2023 		  return REG_ESPACE;
2024 		old_params = tre_mem_alloc(mem, sizeof(*old_params)
2025 					   * TRE_PARAM_LAST);
2026 		if (!old_params)
2027 		  return REG_ESPACE;
2028 		for (i = 0; i < TRE_PARAM_LAST; i++)
2029 		  old_params[i] = params[i];
2030 		((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2031 		old_params[TRE_PARAM_DEPTH] = params_depth;
2032 		/* XXX - this is the only place where ast_new_node is
2033 		   needed -- should be moved inside AST module. */
2034 		node_copy = tre_ast_new_node(mem, ITERATION,
2035 					     sizeof(tre_iteration_t));
2036 		if (!node_copy)
2037 		  return REG_ESPACE;
2038 		node_copy->obj = node->obj;
2039 		tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2040 		if (!tmp_node)
2041 		  return REG_ESPACE;
2042 		tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2043 		if (!tmp_node)
2044 		  return REG_ESPACE;
2045 		/* Replace the contents of `node' with `tmp_node'. */
2046 		memcpy(node, tmp_node, sizeof(*node));
2047 		node->obj = tmp_node->obj;
2048 		node->type = tmp_node->type;
2049 		params_depth++;
2050 		if (params_depth > *max_depth)
2051 		  *max_depth = params_depth;
2052 	      }
2053 #endif /* TRE_APPROX */
2054 	    break;
2055 	  }
2056 	default:
2057 	  assert(0);
2058 	  break;
2059 	}
2060     }
2061 
2062   *position += pos_add_total;
2063 
2064   /* `max_pos' should never be larger than `*position' if the above
2065      code works, but just an extra safeguard let's make sure
2066      `*position' is set large enough so enough memory will be
2067      allocated for the transition table. */
2068   if (max_pos > *position)
2069     *position = max_pos;
2070 
2071 #ifdef TRE_DEBUG
2072   DPRINT(("Expanded AST:\n"));
2073   tre_ast_print(ast);
2074   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2075 #endif
2076 
2077   return status;
2078 }
2079 
2080 static tre_pos_and_tags_t *
2081 tre_set_empty(tre_mem_t mem)
2082 {
2083   tre_pos_and_tags_t *new_set;
2084 
2085   new_set = tre_mem_calloc(mem, sizeof(*new_set));
2086   if (new_set == NULL)
2087     return NULL;
2088 
2089   new_set[0].position = -1;
2090   new_set[0].code_min = -1;
2091   new_set[0].code_max = -1;
2092 
2093   return new_set;
2094 }
2095 
2096 static tre_pos_and_tags_t *
2097 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2098 	    tre_bracket_match_list_t *bracket_match_list, int backref)
2099 {
2100   tre_pos_and_tags_t *new_set;
2101 
2102   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2103   if (new_set == NULL)
2104     return NULL;
2105 
2106   new_set[0].position = position;
2107   new_set[0].code_min = code_min;
2108   new_set[0].code_max = code_max;
2109   new_set[0].bracket_match_list = bracket_match_list;
2110   new_set[0].backref = backref;
2111   new_set[1].position = -1;
2112   new_set[1].code_min = -1;
2113   new_set[1].code_max = -1;
2114 
2115   return new_set;
2116 }
2117 
2118 static tre_pos_and_tags_t *
2119 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2120 	      int *tags, int assertions, int *params)
2121 {
2122   int s1, s2, i, j;
2123   tre_pos_and_tags_t *new_set;
2124   int *new_tags;
2125   int num_tags;
2126 
2127   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2128   for (s1 = 0; set1[s1].position >= 0; s1++);
2129   for (s2 = 0; set2[s2].position >= 0; s2++);
2130   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2131   if (!new_set )
2132     return NULL;
2133 
2134   for (s1 = 0; set1[s1].position >= 0; s1++)
2135     {
2136       new_set[s1].position = set1[s1].position;
2137       new_set[s1].code_min = set1[s1].code_min;
2138       new_set[s1].code_max = set1[s1].code_max;
2139       new_set[s1].assertions = set1[s1].assertions | assertions;
2140       new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2141       new_set[s1].backref = set1[s1].backref;
2142       if (set1[s1].tags == NULL && tags == NULL)
2143 	new_set[s1].tags = NULL;
2144       else
2145 	{
2146 	  for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2147 	  new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2148 					 * (i + num_tags + 1)));
2149 	  if (new_tags == NULL)
2150 	    return NULL;
2151 	  for (j = 0; j < i; j++)
2152 	    new_tags[j] = set1[s1].tags[j];
2153 	  for (i = 0; i < num_tags; i++)
2154 	    new_tags[j + i] = tags[i];
2155 	  new_tags[j + i] = -1;
2156 	  new_set[s1].tags = new_tags;
2157 	}
2158       if (set1[s1].params)
2159 	new_set[s1].params = set1[s1].params;
2160       if (params)
2161 	{
2162 	  if (!new_set[s1].params)
2163 	    new_set[s1].params = params;
2164 	  else
2165 	    {
2166 	      new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2167 						 TRE_PARAM_LAST);
2168 	      if (!new_set[s1].params)
2169 		return NULL;
2170 	      for (i = 0; i < TRE_PARAM_LAST; i++)
2171 		if (params[i] != TRE_PARAM_UNSET)
2172 		  new_set[s1].params[i] = params[i];
2173 	    }
2174 	}
2175     }
2176 
2177   for (s2 = 0; set2[s2].position >= 0; s2++)
2178     {
2179       new_set[s1 + s2].position = set2[s2].position;
2180       new_set[s1 + s2].code_min = set2[s2].code_min;
2181       new_set[s1 + s2].code_max = set2[s2].code_max;
2182       /* XXX - why not | assertions here as well? */
2183       new_set[s1 + s2].assertions = set2[s2].assertions;
2184       new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2185       new_set[s1 + s2].backref = set2[s2].backref;
2186       if (set2[s2].tags == NULL)
2187 	new_set[s1 + s2].tags = NULL;
2188       else
2189 	{
2190 	  for (i = 0; set2[s2].tags[i] >= 0; i++);
2191 	  new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2192 	  if (new_tags == NULL)
2193 	    return NULL;
2194 	  for (j = 0; j < i; j++)
2195 	    new_tags[j] = set2[s2].tags[j];
2196 	  new_tags[j] = -1;
2197 	  new_set[s1 + s2].tags = new_tags;
2198 	}
2199       if (set2[s2].params)
2200 	new_set[s1 + s2].params = set2[s2].params;
2201       if (params)
2202 	{
2203 	  if (!new_set[s1 + s2].params)
2204 	    new_set[s1 + s2].params = params;
2205 	  else
2206 	    {
2207 	      new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2208 						      TRE_PARAM_LAST);
2209 	      if (!new_set[s1 + s2].params)
2210 		return NULL;
2211 	      for (i = 0; i < TRE_PARAM_LAST; i++)
2212 		if (params[i] != TRE_PARAM_UNSET)
2213 		  new_set[s1 + s2].params[i] = params[i];
2214 	    }
2215 	}
2216     }
2217   new_set[s1 + s2].position = -1;
2218   return new_set;
2219 }
2220 
2221 /* Finds the empty path through `node' which is the one that should be
2222    taken according to POSIX.2 rules, and adds the tags on that path to
2223    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2224    set to the number of tags seen on the path. */
2225 static reg_errcode_t
2226 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2227 		int *assertions, int *params, int *num_tags_seen,
2228 		int *params_seen)
2229 {
2230   tre_literal_t *lit;
2231   tre_union_t *uni;
2232   tre_catenation_t *cat;
2233   tre_iteration_t *iter;
2234   int i;
2235   int bottom = tre_stack_num_objects(stack);
2236   reg_errcode_t status = REG_OK;
2237   if (num_tags_seen)
2238     *num_tags_seen = 0;
2239   if (params_seen)
2240     *params_seen = 0;
2241 
2242   status = tre_stack_push_voidptr(stack, node);
2243 
2244   /* Walk through the tree recursively. */
2245   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2246     {
2247       node = tre_stack_pop_voidptr(stack);
2248 
2249       switch (node->type)
2250 	{
2251 	case LITERAL:
2252 	  lit = (tre_literal_t *)node->obj;
2253 	  switch (lit->code_min)
2254 	    {
2255 	    case TAG:
2256 	      if (lit->code_max >= 0)
2257 		{
2258 		  if (tags != NULL)
2259 		    {
2260 		      /* Add the tag to `tags'. */
2261 		      for (i = 0; tags[i] >= 0; i++)
2262 			if (tags[i] == lit->code_max)
2263 			  break;
2264 		      if (tags[i] < 0)
2265 			{
2266 			  tags[i] = lit->code_max;
2267 			  tags[i + 1] = -1;
2268 			}
2269 		    }
2270 		  if (num_tags_seen)
2271 		    (*num_tags_seen)++;
2272 		}
2273 	      break;
2274 	    case ASSERTION:
2275 	      assert(lit->code_max >= 1
2276 		     || lit->code_max <= ASSERT_LAST);
2277 	      if (assertions != NULL)
2278 		*assertions |= lit->code_max;
2279 	      break;
2280 	    case PARAMETER:
2281 	      if (params != NULL)
2282 		for (i = 0; i < TRE_PARAM_LAST; i++)
2283 		  params[i] = lit->u.params[i];
2284 	      if (params_seen != NULL)
2285 		*params_seen = 1;
2286 	      break;
2287 	    case EMPTY:
2288 	      break;
2289 	    default:
2290 	      assert(0);
2291 	      break;
2292 	    }
2293 	  break;
2294 
2295 	case UNION:
2296 	  /* Subexpressions starting earlier take priority over ones
2297 	     starting later, so we prefer the left subexpression over the
2298 	     right subexpression. */
2299 	  uni = (tre_union_t *)node->obj;
2300 	  if (uni->left->nullable)
2301 	    STACK_PUSHX(stack, voidptr, uni->left)
2302 	  else if (uni->right->nullable)
2303 	    STACK_PUSHX(stack, voidptr, uni->right)
2304 	  else
2305 	    assert(0);
2306 	  break;
2307 
2308 	case CATENATION:
2309 	  /* The path must go through both children. */
2310 	  cat = (tre_catenation_t *)node->obj;
2311 	  assert(cat->left->nullable);
2312 	  assert(cat->right->nullable);
2313 	  STACK_PUSHX(stack, voidptr, cat->left);
2314 	  STACK_PUSHX(stack, voidptr, cat->right);
2315 	  break;
2316 
2317 	case ITERATION:
2318 	  /* A match with an empty string is preferred over no match at
2319 	     all, so we go through the argument if possible. */
2320 	  iter = (tre_iteration_t *)node->obj;
2321 	  if (iter->arg->nullable)
2322 	    STACK_PUSHX(stack, voidptr, iter->arg);
2323 	  break;
2324 
2325 	default:
2326 	  assert(0);
2327 	  break;
2328 	}
2329     }
2330 
2331   return status;
2332 }
2333 
2334 
2335 typedef enum {
2336   NFL_RECURSE,
2337   NFL_POST_UNION,
2338   NFL_POST_CATENATION,
2339   NFL_POST_ITERATION
2340 } tre_nfl_stack_symbol_t;
2341 
2342 
2343 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2344    the nodes of the AST `tree'. */
2345 static reg_errcode_t
2346 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2347 {
2348   int bottom = tre_stack_num_objects(stack);
2349 
2350   STACK_PUSHR(stack, voidptr, tree);
2351   STACK_PUSHR(stack, int, NFL_RECURSE);
2352 
2353   while (tre_stack_num_objects(stack) > bottom)
2354     {
2355       tre_nfl_stack_symbol_t symbol;
2356       tre_ast_node_t *node;
2357 
2358       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2359       node = tre_stack_pop_voidptr(stack);
2360       switch (symbol)
2361 	{
2362 	case NFL_RECURSE:
2363 	  switch (node->type)
2364 	    {
2365 	    case LITERAL:
2366 	      {
2367 		tre_literal_t *lit = (tre_literal_t *)node->obj;
2368 		if (IS_BACKREF(lit))
2369 		  {
2370 		    /* Back references: nullable = false, firstpos = {i},
2371 		       lastpos = {i}. */
2372 		    node->nullable = 0;
2373 		    node->firstpos = tre_set_one(mem, lit->position, 0,
2374 					     TRE_CHAR_MAX, NULL, -1);
2375 		    if (!node->firstpos)
2376 		      return REG_ESPACE;
2377 		    node->lastpos = tre_set_one(mem, lit->position, 0,
2378 						TRE_CHAR_MAX, NULL,
2379 						(int)lit->code_max);
2380 		    if (!node->lastpos)
2381 		      return REG_ESPACE;
2382 		  }
2383 		else if (lit->code_min < 0)
2384 		  {
2385 		    /* Tags, empty strings, params, and zero width assertions:
2386 		       nullable = true, firstpos = {}, and lastpos = {}. */
2387 		    node->nullable = 1;
2388 		    node->firstpos = tre_set_empty(mem);
2389 		    if (!node->firstpos)
2390 		      return REG_ESPACE;
2391 		    node->lastpos = tre_set_empty(mem);
2392 		    if (!node->lastpos)
2393 		      return REG_ESPACE;
2394 		  }
2395 		else
2396 		  {
2397 		    /* Literal at position i: nullable = false, firstpos = {i},
2398 		       lastpos = {i}. */
2399 		    node->nullable = 0;
2400 		    node->firstpos =
2401 		      tre_set_one(mem, lit->position, (int)lit->code_min,
2402 				  (int)lit->code_max, NULL, -1);
2403 		    if (!node->firstpos)
2404 		      return REG_ESPACE;
2405 		    node->lastpos = tre_set_one(mem, lit->position,
2406 						(int)lit->code_min,
2407 						(int)lit->code_max,
2408 						lit->u.bracket_match_list,
2409 						-1);
2410 		    if (!node->lastpos)
2411 		      return REG_ESPACE;
2412 		  }
2413 		break;
2414 	      }
2415 
2416 	    case UNION:
2417 	      /* Compute the attributes for the two subtrees, and after that
2418 		 for this node. */
2419 	      STACK_PUSHR(stack, voidptr, node);
2420 	      STACK_PUSHR(stack, int, NFL_POST_UNION);
2421 	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2422 	      STACK_PUSHR(stack, int, NFL_RECURSE);
2423 	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2424 	      STACK_PUSHR(stack, int, NFL_RECURSE);
2425 	      break;
2426 
2427 	    case CATENATION:
2428 	      /* Compute the attributes for the two subtrees, and after that
2429 		 for this node. */
2430 	      STACK_PUSHR(stack, voidptr, node);
2431 	      STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2432 	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2433 	      STACK_PUSHR(stack, int, NFL_RECURSE);
2434 	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2435 	      STACK_PUSHR(stack, int, NFL_RECURSE);
2436 	      break;
2437 
2438 	    case ITERATION:
2439 	      /* Compute the attributes for the subtree, and after that for
2440 		 this node. */
2441 	      STACK_PUSHR(stack, voidptr, node);
2442 	      STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2443 	      STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2444 	      STACK_PUSHR(stack, int, NFL_RECURSE);
2445 	      break;
2446 	    }
2447 	  break; /* end case: NFL_RECURSE */
2448 
2449 	case NFL_POST_UNION:
2450 	  {
2451 	    tre_union_t *uni = (tre_union_t *)node->obj;
2452 	    node->nullable = uni->left->nullable || uni->right->nullable;
2453 	    node->firstpos = tre_set_union(mem, uni->left->firstpos,
2454 					   uni->right->firstpos, NULL, 0, NULL);
2455 	    if (!node->firstpos)
2456 	      return REG_ESPACE;
2457 	    node->lastpos = tre_set_union(mem, uni->left->lastpos,
2458 					  uni->right->lastpos, NULL, 0, NULL);
2459 	    if (!node->lastpos)
2460 	      return REG_ESPACE;
2461 	    break;
2462 	  }
2463 
2464 	case NFL_POST_ITERATION:
2465 	  {
2466 	    int num_tags, *tags, assertions, params_seen;
2467 	    int *params;
2468 	    reg_errcode_t status;
2469 	    tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2470 
2471 	    /* From Ville Laurikari's original 2001 Master's thesis, the
2472 	       firstpos(n) and lastpos(n) of an iteration is just the
2473 	       corresponding values of the iteration's argument.  Unfortunately,
2474 	       this isn't sufficient for the following BRE:
2475 
2476 	           \(a*\)*b\(\1\)    matched against    ab
2477 
2478 	       The backreference wants to force the first subexpression to
2479 	       be the empty string, but there is no transition for this.  So
2480 	       we need to modify the lastpos(n) of an iteration to be the
2481 	       equivalent of that of catentation.  Using the same notation as
2482 	       in the thesis, lastpos(n) is redefined as:
2483 
2484 	           if nullable(c1) then
2485 		       lastpos(c1) U
2486 		       addtags(lastpos(c1),
2487 		               emptymatch(c1))
2488 		   else
2489 		       lastpos(c1)
2490 
2491 	       where c1 is the argument node.  firstpos(n) remains the same. */
2492 
2493 	    /* Compute lastpos. */
2494 	    if (iter->min == 0 || iter->arg->nullable)
2495 	      {
2496 		node->nullable = 1;
2497 		if (iter->arg->nullable)
2498 		  {
2499 		    /* The arg matches the empty string.  Make a first pass
2500 		       with tre_match_empty() to get the number of tags and
2501 		       parameters. */
2502 		    status = tre_match_empty(stack, iter->arg,
2503 					     NULL, NULL, NULL, &num_tags,
2504 					     &params_seen);
2505 		    if (status != REG_OK)
2506 		      return status;
2507 		    /* Allocate arrays for the tags and parameters. */
2508 		    tags = xmalloc(sizeof(int) * (num_tags + 1));
2509 		    if (!tags)
2510 		      return REG_ESPACE;
2511 		    tags[0] = -1;
2512 		    assertions = 0;
2513 		    params = NULL;
2514 		    if (params_seen)
2515 		      {
2516 			params = tre_mem_alloc(mem, sizeof(*params)
2517 					       * TRE_PARAM_LAST);
2518 			if (!params)
2519 			  {
2520 			    xfree(tags);
2521 			    return REG_ESPACE;
2522 			  }
2523 		      }
2524 		    /* Second pass with tre_mach_empty() to get the list of
2525 		       tags and parameters. */
2526 		    status = tre_match_empty(stack, iter->arg, tags,
2527 					     &assertions, params, NULL, NULL);
2528 		    if (status != REG_OK)
2529 		      {
2530 			xfree(tags);
2531 			return status;
2532 		      }
2533 		    node->lastpos =
2534 		      tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2535 				    tags, assertions, params);
2536 		    xfree(tags);
2537 		    if (!node->lastpos)
2538 		      return REG_ESPACE;
2539 		  }
2540 		else
2541 		  node->lastpos = iter->arg->lastpos;
2542 	      }
2543 	    else
2544 	      {
2545 		node->nullable = 0;
2546 		node->lastpos = iter->arg->lastpos;
2547 	      }
2548 	    node->firstpos = iter->arg->firstpos;
2549 	    break;
2550 	  }
2551 
2552 	case NFL_POST_CATENATION:
2553 	  {
2554 	    int num_tags, *tags, assertions, params_seen;
2555 	    int *params;
2556 	    reg_errcode_t status;
2557 	    tre_catenation_t *cat = node->obj;
2558 	    node->nullable = cat->left->nullable && cat->right->nullable;
2559 
2560 	    /* Compute firstpos. */
2561 	    if (cat->left->nullable)
2562 	      {
2563 		/* The left side matches the empty string.  Make a first pass
2564 		   with tre_match_empty() to get the number of tags and
2565 		   parameters. */
2566 		status = tre_match_empty(stack, cat->left,
2567 					 NULL, NULL, NULL, &num_tags,
2568 					 &params_seen);
2569 		if (status != REG_OK)
2570 		  return status;
2571 		/* Allocate arrays for the tags and parameters. */
2572 		tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2573 		if (!tags)
2574 		  return REG_ESPACE;
2575 		tags[0] = -1;
2576 		assertions = 0;
2577 		params = NULL;
2578 		if (params_seen)
2579 		  {
2580 		    params = tre_mem_alloc(mem, sizeof(*params)
2581 					   * TRE_PARAM_LAST);
2582 		    if (!params)
2583 		      {
2584 			xfree(tags);
2585 			return REG_ESPACE;
2586 		      }
2587 		  }
2588 		/* Second pass with tre_mach_empty() to get the list of
2589 		   tags and parameters. */
2590 		status = tre_match_empty(stack, cat->left, tags,
2591 					 &assertions, params, NULL, NULL);
2592 		if (status != REG_OK)
2593 		  {
2594 		    xfree(tags);
2595 		    return status;
2596 		  }
2597 		node->firstpos =
2598 		  tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2599 				tags, assertions, params);
2600 		xfree(tags);
2601 		if (!node->firstpos)
2602 		  return REG_ESPACE;
2603 	      }
2604 	    else
2605 	      {
2606 		node->firstpos = cat->left->firstpos;
2607 	      }
2608 
2609 	    /* Compute lastpos. */
2610 	    if (cat->right->nullable)
2611 	      {
2612 		/* The right side matches the empty string.  Make a first pass
2613 		   with tre_match_empty() to get the number of tags and
2614 		   parameters. */
2615 		status = tre_match_empty(stack, cat->right,
2616 					 NULL, NULL, NULL, &num_tags,
2617 					 &params_seen);
2618 		if (status != REG_OK)
2619 		  return status;
2620 		/* Allocate arrays for the tags and parameters. */
2621 		tags = xmalloc(sizeof(int) * (num_tags + 1));
2622 		if (!tags)
2623 		  return REG_ESPACE;
2624 		tags[0] = -1;
2625 		assertions = 0;
2626 		params = NULL;
2627 		if (params_seen)
2628 		  {
2629 		    params = tre_mem_alloc(mem, sizeof(*params)
2630 					   * TRE_PARAM_LAST);
2631 		    if (!params)
2632 		      {
2633 			xfree(tags);
2634 			return REG_ESPACE;
2635 		      }
2636 		  }
2637 		/* Second pass with tre_mach_empty() to get the list of
2638 		   tags and parameters. */
2639 		status = tre_match_empty(stack, cat->right, tags,
2640 					 &assertions, params, NULL, NULL);
2641 		if (status != REG_OK)
2642 		  {
2643 		    xfree(tags);
2644 		    return status;
2645 		  }
2646 		node->lastpos =
2647 		  tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2648 				tags, assertions, params);
2649 		xfree(tags);
2650 		if (!node->lastpos)
2651 		  return REG_ESPACE;
2652 	      }
2653 	    else
2654 	      {
2655 		node->lastpos = cat->right->lastpos;
2656 	      }
2657 	    break;
2658 	  }
2659 
2660 	default:
2661 	  assert(0);
2662 	  break;
2663 	}
2664     }
2665 
2666   return REG_OK;
2667 }
2668 
2669 
2670 /* Adds a transition from each position in `p1' to each position in `p2'. */
2671 static reg_errcode_t
2672 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2673 	       tre_tnfa_transition_t *transitions,
2674 	       int *counts, int *offs)
2675 {
2676   tre_pos_and_tags_t *orig_p2 = p2;
2677   tre_tnfa_transition_t *trans;
2678   int i, j, k, l, dup, prev_p2_pos;
2679 
2680   if (transitions != NULL)
2681     while (p1->position >= 0)
2682       {
2683 	p2 = orig_p2;
2684 	prev_p2_pos = -1;
2685 	while (p2->position >= 0)
2686 	  {
2687 	    /* Optimization: if this position was already handled, skip it. */
2688 	    if (p2->position == prev_p2_pos)
2689 	      {
2690 		p2++;
2691 		continue;
2692 	      }
2693 	    prev_p2_pos = p2->position;
2694 	    /* Set `trans' to point to the next unused transition from
2695 	       position `p1->position'. */
2696 	    trans = transitions + offs[p1->position];
2697 	    while (trans->state != NULL)
2698 	      {
2699 #if 0
2700 		/* If we find a previous transition from `p1->position' to
2701 		   `p2->position', it is overwritten.  This can happen only
2702 		   if there are nested loops in the regexp, like in "((a)*)*".
2703 		   In POSIX.2 repetition using the outer loop is always
2704 		   preferred over using the inner loop.	 Therefore the
2705 		   transition for the inner loop is useless and can be thrown
2706 		   away. */
2707 		/* XXX - The same position is used for all nodes in a bracket
2708 		   expression, so this optimization cannot be used (it will
2709 		   break bracket expressions) unless I figure out a way to
2710 		   detect it here. */
2711 		if (trans->state_id == p2->position)
2712 		  {
2713 		    DPRINT(("*"));
2714 		    break;
2715 		  }
2716 #endif
2717 		trans++;
2718 	      }
2719 
2720 	    if (trans->state == NULL)
2721 	      (trans + 1)->state = NULL;
2722 	    /* Use the character ranges, assertions, etc. from `p1' for
2723 	       the transition from `p1' to `p2'. */
2724 	    trans->code_min = p1->code_min;
2725 	    trans->code_max = p1->code_max;
2726 	    trans->state = transitions + offs[p2->position];
2727 	    trans->state_id = p2->position;
2728 	    trans->assertions = p1->assertions | p2->assertions
2729 	      | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2730 	    if (p1->backref >= 0)
2731 	      {
2732 		assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2733 		assert(p2->backref < 0);
2734 		trans->u.backref = p1->backref;
2735 		trans->assertions |= ASSERT_BACKREF;
2736 	      }
2737 	    if (p1->bracket_match_list != NULL)
2738 	      {
2739 		trans->u.bracket_match_list =
2740 		  xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2741 		if (trans->u.bracket_match_list == NULL)
2742 		  return REG_ESPACE;
2743 		memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2744 		       SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2745 	      }
2746 
2747 	    /* Find out how many tags this transition has. */
2748 	    i = 0;
2749 	    if (p1->tags != NULL)
2750 	      while(p1->tags[i] >= 0)
2751 		i++;
2752 	    j = 0;
2753 	    if (p2->tags != NULL)
2754 	      while(p2->tags[j] >= 0)
2755 		j++;
2756 
2757 	    /* If we are overwriting a transition, free the old tag array. */
2758 	    if (trans->tags != NULL)
2759 	      xfree(trans->tags);
2760 	    trans->tags = NULL;
2761 
2762 	    /* If there were any tags, allocate an array and fill it. */
2763 	    if (i + j > 0)
2764 	      {
2765 		trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2766 		if (!trans->tags)
2767 		  return REG_ESPACE;
2768 		i = 0;
2769 		if (p1->tags != NULL)
2770 		  while(p1->tags[i] >= 0)
2771 		    {
2772 		      trans->tags[i] = p1->tags[i];
2773 		      i++;
2774 		    }
2775 		l = i;
2776 		j = 0;
2777 		if (p2->tags != NULL)
2778 		  while (p2->tags[j] >= 0)
2779 		    {
2780 		      /* Don't add duplicates. */
2781 		      dup = 0;
2782 		      for (k = 0; k < i; k++)
2783 			if (trans->tags[k] == p2->tags[j])
2784 			  {
2785 			    dup = 1;
2786 			    break;
2787 			  }
2788 		      if (!dup)
2789 			trans->tags[l++] = p2->tags[j];
2790 		      j++;
2791 		    }
2792 		trans->tags[l] = -1;
2793 	      }
2794 
2795 	    /* Set the parameter array.	 If both `p2' and `p1' have same
2796 	       parameters, the values in `p2' override those in `p1'. */
2797 	    if (p1->params || p2->params)
2798 	      {
2799 		if (!trans->params)
2800 		  trans->params = xmalloc(sizeof(*trans->params)
2801 					  * TRE_PARAM_LAST);
2802 		if (!trans->params)
2803 		  return REG_ESPACE;
2804 		for (i = 0; i < TRE_PARAM_LAST; i++)
2805 		  {
2806 		    trans->params[i] = TRE_PARAM_UNSET;
2807 		    if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2808 		      trans->params[i] = p1->params[i];
2809 		    if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2810 		      trans->params[i] = p2->params[i];
2811 		  }
2812 	      }
2813 	    else
2814 	      {
2815 		if (trans->params)
2816 		  xfree(trans->params);
2817 		trans->params = NULL;
2818 	      }
2819 
2820 
2821 #ifdef TRE_DEBUG
2822 	    {
2823 	      int *tags;
2824 
2825 	      DPRINT(("	 %2d -> %2d on %3d", p1->position, p2->position,
2826 		      p1->code_min));
2827 	      if (p1->code_max != p1->code_min)
2828 		DPRINT(("-%3d", p1->code_max));
2829 	      tags = trans->tags;
2830 	      if (tags)
2831 		{
2832 		  DPRINT((", tags ["));
2833 		  while (*tags >= 0)
2834 		    {
2835 		      DPRINT(("%d", *tags));
2836 		      tags++;
2837 		      if (*tags >= 0)
2838 			DPRINT((","));
2839 		    }
2840 		  DPRINT(("]"));
2841 		}
2842 	      if (trans->assertions)
2843 		DPRINT((", assert %d", trans->assertions));
2844 	      if (trans->assertions & ASSERT_BACKREF)
2845 		DPRINT((", backref %d", trans->u.backref));
2846 	      else if (trans->assertions & ASSERT_BRACKET_MATCH)
2847 		DPRINT((", bracket_match_list %p",
2848 			trans->u.bracket_match_list));
2849 	      if (trans->params)
2850 		{
2851 		  DPRINT((", "));
2852 		  tre_print_params(trans->params);
2853 		}
2854 	      DPRINT(("\n"));
2855 	    }
2856 #endif /* TRE_DEBUG */
2857 	    p2++;
2858 	  }
2859 	p1++;
2860       }
2861   else
2862     /* Compute a maximum limit for the number of transitions leaving
2863        from each state. */
2864     while (p1->position >= 0)
2865       {
2866 	p2 = orig_p2;
2867 	while (p2->position >= 0)
2868 	  {
2869 	    counts[p1->position]++;
2870 	    p2++;
2871 	  }
2872 	p1++;
2873       }
2874   return REG_OK;
2875 }
2876 
2877 /* Converts the syntax tree to a TNFA.	All the transitions in the TNFA are
2878    labelled with one character range (there are no transitions on empty
2879    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2880    the regexp. */
2881 static reg_errcode_t
2882 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2883 		int *counts, int *offs)
2884 {
2885   tre_union_t *uni;
2886   tre_catenation_t *cat;
2887   tre_iteration_t *iter;
2888   reg_errcode_t errcode = REG_OK;
2889 
2890   /* XXX - recurse using a stack!. */
2891   switch (node->type)
2892     {
2893     case LITERAL:
2894       break;
2895     case UNION:
2896       uni = (tre_union_t *)node->obj;
2897       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2898       if (errcode != REG_OK)
2899 	return errcode;
2900       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2901       break;
2902 
2903     case CATENATION:
2904       cat = (tre_catenation_t *)node->obj;
2905       /* Add a transition from each position in cat->left->lastpos
2906 	 to each position in cat->right->firstpos. */
2907       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2908 			       transitions, counts, offs);
2909       if (errcode != REG_OK)
2910 	return errcode;
2911       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2912       if (errcode != REG_OK)
2913 	return errcode;
2914       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2915       break;
2916 
2917     case ITERATION:
2918       iter = (tre_iteration_t *)node->obj;
2919       assert(iter->max == -1 || iter->max == 1);
2920 
2921       if (iter->max == -1)
2922 	{
2923 	  assert(iter->min == 0 || iter->min == 1);
2924 	  /* Add a transition from each last position in the iterated
2925 	     expression to each first position. */
2926 	  errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2927 				   transitions, counts, offs);
2928 	  if (errcode != REG_OK)
2929 	    return errcode;
2930 	}
2931       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2932       break;
2933     }
2934   return errcode;
2935 }
2936 
2937 
2938 #define ERROR_EXIT(err)		  \
2939   do				  \
2940     {				  \
2941       errcode = err;		  \
2942       if (/*CONSTCOND*/1)	  \
2943       	goto error_exit;	  \
2944     }				  \
2945  while (/*CONSTCOND*/0)
2946 
2947 
2948 int
2949 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2950 	    locale_t loc)
2951 {
2952   tre_stack_t *stack;
2953   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2954   tre_pos_and_tags_t *p;
2955   int *counts = NULL, *offs = NULL;
2956   int i, add = 0;
2957   tre_tnfa_transition_t *transitions, *initial;
2958   tre_tnfa_t *tnfa = NULL;
2959   tre_submatch_data_t *submatch_data = NULL;
2960   tre_tag_direction_t *tag_directions = NULL;
2961   reg_errcode_t errcode;
2962   tre_mem_t mem;
2963 
2964   /* Parse context. */
2965   tre_parse_ctx_t parse_ctx;
2966 
2967   /* Allocate a stack used throughout the compilation process for various
2968      purposes. */
2969   stack = tre_stack_new(512, 10240, 128);
2970   if (!stack)
2971     return REG_ESPACE;
2972   /* Allocate a fast memory allocator. */
2973   mem = tre_mem_new();
2974   if (!mem)
2975     {
2976       tre_stack_destroy(stack);
2977       return REG_ESPACE;
2978     }
2979 
2980   /* Parse the regexp. */
2981   memset(&parse_ctx, 0, sizeof(parse_ctx));
2982   parse_ctx.mem = mem;
2983   parse_ctx.stack = stack;
2984   parse_ctx.re = regex;
2985   parse_ctx.len = n;
2986   /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2987      are also set */
2988   if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2989     cflags &= ~REG_UNGREEDY;
2990   parse_ctx.cflags = cflags;
2991   parse_ctx.max_backref = -1;
2992   parse_ctx.loc = loc;
2993   parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2994 
2995   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2996   errcode = tre_parse(&parse_ctx);
2997   if (errcode != REG_OK)
2998     ERROR_EXIT(errcode);
2999   preg->re_nsub = parse_ctx.submatch_id - 1;
3000   tree = parse_ctx.result;
3001 
3002   /* Back references and approximate matching cannot currently be used
3003      in the same regexp. */
3004   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3005     ERROR_EXIT(REG_BADPAT);
3006 
3007 #ifdef TRE_DEBUG
3008   tre_ast_print(tree);
3009 #endif /* TRE_DEBUG */
3010 
3011   /* Referring to nonexistent subexpressions is illegal. */
3012   if (parse_ctx.max_backref > (int)preg->re_nsub)
3013     ERROR_EXIT(REG_ESUBREG);
3014 
3015   /* Allocate the TNFA struct. */
3016   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3017   if (tnfa == NULL)
3018     ERROR_EXIT(REG_ESPACE);
3019   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3020   tnfa->have_approx = parse_ctx.have_approx;
3021   tnfa->num_submatches = parse_ctx.submatch_id;
3022   tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3023 				   - SUBMATCH_ID_INVISIBLE_START;
3024   tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3025   tnfa->loc = parse_ctx.loc;
3026 
3027   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
3028      regexp does not have back references, this can be skipped. */
3029   if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3030     {
3031       DPRINT(("tre_compile: setting up tags\n"));
3032 
3033       /* Figure out how many tags we will need. */
3034       errcode = tre_add_tags(NULL, stack, tree, tnfa);
3035       if (errcode != REG_OK)
3036 	ERROR_EXIT(errcode);
3037 #ifdef TRE_DEBUG
3038       tre_ast_print(tree);
3039 #endif /* TRE_DEBUG */
3040 
3041       if (tnfa->num_tags > 0)
3042 	{
3043 	  tag_directions = xmalloc(sizeof(*tag_directions)
3044 				   * (tnfa->num_tags + 1));
3045 	  if (tag_directions == NULL)
3046 	    ERROR_EXIT(REG_ESPACE);
3047 	  tnfa->tag_directions = tag_directions;
3048 	  memset(tag_directions, -1,
3049 		 sizeof(*tag_directions) * (tnfa->num_tags + 1));
3050 	}
3051       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3052 				   sizeof(tnfa->minimal_tags));
3053       if (tnfa->minimal_tags == NULL)
3054 	ERROR_EXIT(REG_ESPACE);
3055 
3056       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3057 			      sizeof(*submatch_data));
3058       if (submatch_data == NULL)
3059 	ERROR_EXIT(REG_ESPACE);
3060       /* Set the eo_tag value to -1 to indicate that that corresponding
3061        * submatch has not be completed yet */
3062       for (i = 0; i < parse_ctx.submatch_id; i++)
3063 	{
3064 	  submatch_data[i].eo_tag = -1;
3065 	}
3066       tnfa->submatch_data = submatch_data;
3067 
3068       errcode = tre_add_tags(mem, stack, tree, tnfa);
3069       if (errcode != REG_OK)
3070 	ERROR_EXIT(errcode);
3071 
3072 #ifdef TRE_DEBUG
3073       for (i = 0; i < parse_ctx.submatch_id; i++)
3074 	DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3075 		i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3076       for (i = 0; i <= tnfa->num_tags; i++)
3077 	DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3078 #endif /* TRE_DEBUG */
3079     }
3080 
3081   /* Expand iteration nodes. */
3082   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3083 			   tag_directions, &tnfa->params_depth);
3084   if (errcode != REG_OK)
3085     ERROR_EXIT(errcode);
3086 
3087   /* Add a dummy node for the final state.
3088      XXX - For certain patterns this dummy node can be optimized away,
3089 	   for example "a*" or "ab*".	Figure out a simple way to detect
3090 	   this possibility. */
3091   tmp_ast_l = tree;
3092   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3093   if (tmp_ast_r == NULL)
3094     ERROR_EXIT(REG_ESPACE);
3095 
3096   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3097   if (tree == NULL)
3098     ERROR_EXIT(REG_ESPACE);
3099 
3100 #ifdef TRE_DEBUG
3101   tre_ast_print(tree);
3102   DPRINT(("Number of states: %d\n", parse_ctx.position));
3103   if (submatch_data)
3104     for (i = 0; i < parse_ctx.submatch_id; i++)
3105       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3106 	      i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3107   if (tag_directions)
3108     for (i = 0; i <= tnfa->num_tags; i++)
3109       DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3110 #endif /* TRE_DEBUG */
3111 
3112   errcode = tre_compute_nfl(mem, stack, tree);
3113   if (errcode != REG_OK)
3114     ERROR_EXIT(errcode);
3115 
3116   counts = xmalloc(sizeof(int) * parse_ctx.position);
3117   if (counts == NULL)
3118     ERROR_EXIT(REG_ESPACE);
3119 
3120   offs = xmalloc(sizeof(int) * parse_ctx.position);
3121   if (offs == NULL)
3122     ERROR_EXIT(REG_ESPACE);
3123 
3124   for (i = 0; i < parse_ctx.position; i++)
3125     counts[i] = 0;
3126   tre_ast_to_tnfa(tree, NULL, counts, NULL);
3127 
3128   add = 0;
3129   for (i = 0; i < parse_ctx.position; i++)
3130     {
3131       offs[i] = add;
3132       add += counts[i] + 1;
3133       counts[i] = 0;
3134     }
3135   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3136   if (transitions == NULL)
3137     ERROR_EXIT(REG_ESPACE);
3138   tnfa->transitions = transitions;
3139   tnfa->num_transitions = add;
3140 
3141   DPRINT(("Converting to TNFA:\n"));
3142   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3143   if (errcode != REG_OK)
3144     ERROR_EXIT(errcode);
3145 
3146 
3147   /* Set first_char only if there is only one character that can be the
3148      first character of a match */
3149   tnfa->first_char = -1;
3150   if (!tmp_ast_l->nullable)
3151     {
3152       int scanning = 1;
3153       for (p = tree->firstpos; scanning && p->position >= 0; p++)
3154 	{
3155 	  tre_tnfa_transition_t *j = transitions + offs[p->position];
3156 	  while (j->state != NULL)
3157 	    {
3158 	      if (j->code_min <= j->code_max)
3159 		{
3160 		  if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3161 		    {
3162 		      tnfa->first_char = -1;
3163 		      scanning = 0;
3164 		      break;
3165 		    }
3166 		  tnfa->first_char = j->code_min;
3167 		}
3168 	      j++;
3169 	    }
3170 	}
3171 #ifdef TRE_DEBUG
3172       if (tnfa->first_char >= 0)
3173 	DPRINT(("first char must be %d\n", tnfa->first_char));
3174 #endif /* TRE_DEBUG */
3175     }
3176 
3177   p = tree->firstpos;
3178   i = 0;
3179   while (p->position >= 0)
3180     {
3181       i++;
3182 
3183 #ifdef TRE_DEBUG
3184       {
3185 	int *tags;
3186 	DPRINT(("initial: %d", p->position));
3187 	tags = p->tags;
3188 	if (tags != NULL)
3189 	  {
3190 	    if (*tags >= 0)
3191 	      DPRINT(("/"));
3192 	    while (*tags >= 0)
3193 	      {
3194 		DPRINT(("%d", *tags));
3195 		tags++;
3196 		if (*tags >= 0)
3197 		  DPRINT((","));
3198 	      }
3199 	  }
3200 	DPRINT((", assert %d", p->assertions));
3201 	if (p->params)
3202 	  {
3203 	    DPRINT((", "));
3204 	    tre_print_params(p->params);
3205 	  }
3206 	DPRINT(("\n"));
3207       }
3208 #endif /* TRE_DEBUG */
3209 
3210       p++;
3211     }
3212 
3213   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3214   if (initial == NULL)
3215     ERROR_EXIT(REG_ESPACE);
3216   tnfa->initial = initial;
3217 
3218   i = 0;
3219   for (p = tree->firstpos; p->position >= 0; p++)
3220     {
3221       initial[i].state = transitions + offs[p->position];
3222       initial[i].state_id = p->position;
3223       initial[i].tags = NULL;
3224       /* Copy the arrays p->tags, and p->params, they are allocated
3225 	 from a tre_mem object. */
3226       if (p->tags)
3227 	{
3228 	  int j;
3229 	  for (j = 0; p->tags[j] >= 0; j++);
3230 	  initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3231 	  if (!initial[i].tags)
3232 	    ERROR_EXIT(REG_ESPACE);
3233 	  memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3234 	}
3235       initial[i].params = NULL;
3236       if (p->params)
3237 	{
3238 	  initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3239 	  if (!initial[i].params)
3240 	    ERROR_EXIT(REG_ESPACE);
3241 	  memcpy(initial[i].params, p->params,
3242 		 sizeof(*p->params) * TRE_PARAM_LAST);
3243 	}
3244       initial[i].assertions = p->assertions;
3245       i++;
3246     }
3247   initial[i].state = NULL;
3248 
3249   tnfa->num_transitions = add;
3250   tnfa->final = transitions + offs[tree->lastpos[0].position];
3251   tnfa->num_states = parse_ctx.position;
3252   tnfa->cflags = cflags;
3253 
3254   DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3255 	  (void *)tnfa->final));
3256 
3257   tre_mem_destroy(mem);
3258   tre_stack_destroy(stack);
3259   xfree(counts);
3260   xfree(offs);
3261 
3262 #ifdef TRE_USE_SYSTEM_REGEX_H
3263   preg->re_magic = RE_MAGIC;
3264 #endif /* TRE_USE_SYSTEM_REGEX_H */
3265   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3266   xlocale_retain(tnfa->loc);
3267   return REG_OK;
3268 
3269  error_exit:
3270   /* Free everything that was allocated and return the error code. */
3271   tre_mem_destroy(mem);
3272   if (stack != NULL)
3273     tre_stack_destroy(stack);
3274   if (counts != NULL)
3275     xfree(counts);
3276   if (offs != NULL)
3277     xfree(offs);
3278 
3279   /* Set tnfa into preg, so that calling tre_free() will free the contents
3280      of tnfa.  NULL out the loc field since we never retained the locale. */
3281   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3282   if(tnfa) tnfa->loc = NULL;
3283 
3284   tre_free(preg);
3285   return errcode;
3286 }
3287 
3288 
3289 
3290 
3291 void
3292 tre_free(regex_t *preg)
3293 {
3294   tre_tnfa_t *tnfa;
3295   unsigned int i;
3296   tre_tnfa_transition_t *trans;
3297 
3298 #ifdef TRE_USE_SYSTEM_REGEX_H
3299   preg->re_magic = 0;
3300 #endif /* TRE_USE_SYSTEM_REGEX_H */
3301   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3302   if (!tnfa)
3303     return;
3304   preg->TRE_REGEX_T_FIELD = NULL;
3305 
3306   for (i = 0; i < tnfa->num_transitions; i++)
3307     if (tnfa->transitions[i].state)
3308       {
3309 	if (tnfa->transitions[i].tags)
3310 	  xfree(tnfa->transitions[i].tags);
3311 	if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3312 	  xfree(tnfa->transitions[i].u.bracket_match_list);
3313 	if (tnfa->transitions[i].params)
3314 	  xfree(tnfa->transitions[i].params);
3315       }
3316   if (tnfa->transitions)
3317     xfree(tnfa->transitions);
3318 
3319   if (tnfa->initial)
3320     {
3321       for (trans = tnfa->initial; trans->state; trans++)
3322 	{
3323 	  if (trans->tags)
3324 	    xfree(trans->tags);
3325 	  if (trans->params)
3326 	    xfree(trans->params);
3327 	}
3328       xfree(tnfa->initial);
3329     }
3330 
3331   if (tnfa->submatch_data)
3332     {
3333       xfree(tnfa->submatch_data);
3334     }
3335 
3336   if (tnfa->tag_directions)
3337     xfree(tnfa->tag_directions);
3338   if (tnfa->minimal_tags)
3339     xfree(tnfa->minimal_tags);
3340 
3341   if (tnfa->loc)
3342     xlocale_release(tnfa->loc);
3343 
3344   if (tnfa->last_matched_branch)
3345     xfree(tnfa->last_matched_branch);
3346 
3347   xfree(tnfa);
3348 }
3349 
3350 char *
3351 tre_version(void)
3352 {
3353   static char str[256];
3354   char *version;
3355 
3356   if (str[0] == 0)
3357     {
3358       (void) tre_config(TRE_CONFIG_VERSION, &version);
3359       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3360     }
3361   return str;
3362 }
3363 
3364 int
3365 tre_config(int query, void *result)
3366 {
3367   int *int_result = result;
3368   const char **string_result = result;
3369 
3370   switch (query)
3371     {
3372     case TRE_CONFIG_APPROX:
3373 #ifdef TRE_APPROX
3374       *int_result = 1;
3375 #else /* !TRE_APPROX */
3376       *int_result = 0;
3377 #endif /* !TRE_APPROX */
3378       return REG_OK;
3379 
3380     case TRE_CONFIG_WCHAR:
3381 #ifdef TRE_WCHAR
3382       *int_result = 1;
3383 #else /* !TRE_WCHAR */
3384       *int_result = 0;
3385 #endif /* !TRE_WCHAR */
3386       return REG_OK;
3387 
3388     case TRE_CONFIG_MULTIBYTE:
3389 #ifdef TRE_MULTIBYTE
3390       *int_result = 1;
3391 #else /* !TRE_MULTIBYTE */
3392       *int_result = 0;
3393 #endif /* !TRE_MULTIBYTE */
3394       return REG_OK;
3395 
3396     case TRE_CONFIG_SYSTEM_ABI:
3397 #ifdef TRE_CONFIG_SYSTEM_ABI
3398       *int_result = 1;
3399 #else /* !TRE_CONFIG_SYSTEM_ABI */
3400       *int_result = 0;
3401 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3402       return REG_OK;
3403 
3404     case TRE_CONFIG_VERSION:
3405       *string_result = TRE_VERSION;
3406       return REG_OK;
3407     }
3408 
3409   return REG_NOMATCH;
3410 }
3411 
3412 
3413 /* EOF */
3414