1 #if !defined(RXH) || defined(RX_WANT_SE_DEFS)
2 #define RXH
3 
4 /*	Copyright (C) 1992, 1993 Free Software Foundation, Inc.
5 
6 This file is part of the librx library.
7 
8 Librx is free software; you can redistribute it and/or modify it under
9 the terms of the GNU Library General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12 
13 Librx is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU Library General Public
19 License along with this software; see the file COPYING.LIB.  If not,
20 write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
21 02139, USA.  */
22 /*  t. lord	Wed Sep 23 18:20:57 1992	*/
23 /* Multi-byte extension added Jun, 1997 by WILLs (Katsuyuki Okabe)
24    Last change: Jul. 3, 1997 by WILLs */
25 
26 
27 
28 
29 
30 
31 
32 
33 #ifndef RX_WANT_SE_DEFS
34 
35 /* This page: Bitsets */
36 
37 #ifndef RX_subset
38 typedef unsigned long RX_subset;
39 #define RX_subset_bits	(32)
40 #define RX_subset_mask	(RX_subset_bits - 1)
41 #endif
42 
43 typedef RX_subset * rx_Bitset;
44 
45 #if defined(__STDC__) || defined(MSDOS)
46 typedef void (*rx_bitset_iterator) (void *, int member_index);
47 #else
48 typedef void (*rx_bitset_iterator) ();
49 #endif
50 
51 #define rx_bitset_subset(N)  ((unsigned) (N) / RX_subset_bits)
52 #define rx_bitset_subset_val(B,N)  ((B)[rx_bitset_subset(N)])
53 #define RX_bitset_access(B,N,OP) \
54   ((B)[rx_bitset_subset(N)] OP rx_subset_singletons[(N) & RX_subset_mask])
55 #define RX_bitset_member(B,N)   RX_bitset_access(B, N, &)
56 #define RX_bitset_enjoin(B,N)   RX_bitset_access(B, N, |=)
57 #define RX_bitset_remove(B,N)   RX_bitset_access(B, N, &= ~)
58 #define RX_bitset_toggle(B,N)   RX_bitset_access(B, N, ^= )
59 #define rx_bitset_numb_subsets(N) (((unsigned) (N) + RX_subset_bits - 1) / RX_subset_bits)
60 #define rx_sizeof_bitset(N)	(rx_bitset_numb_subsets(N) * sizeof(RX_subset))
61 
62 
63 
64 /* This page: Splay trees. */
65 
66 #if defined(__STDC__) || defined(MSDOS)
67 typedef int (*rx_sp_comparer) (void * a, void * b);
68 #else
69 typedef int (*rx_sp_comparer) ();
70 #endif
71 
72 struct rx_sp_node
73 {
74   void * key;
75   void * data;
76   struct rx_sp_node * kids[2];
77 };
78 
79 #if defined(__STDC__) || defined(MSDOS)
80 typedef void (*rx_sp_key_data_freer) (struct rx_sp_node *);
81 #else
82 typedef void (*rx_sp_key_data_freer) ();
83 #endif
84 
85 
86 /* giant inflatable hash trees */
87 
88 struct rx_hash_item
89 {
90   struct rx_hash_item * next_same_hash;
91   struct rx_hash * table;
92   unsigned long hash;
93   void * data;
94   void * binding;
95 };
96 
97 struct rx_hash
98 {
99   struct rx_hash * parent;
100   int refs;
101   struct rx_hash * children[13];
102   struct rx_hash_item * buckets [13];
103   int bucket_size [13];
104 };
105 
106 struct rx_hash_rules;
107 
108 #if defined(__STDC__) || defined(MSDOS)
109 /* should return like == */
110 typedef int (*rx_hash_eq)(void *, void *);
111 typedef struct rx_hash * (*rx_alloc_hash)(struct rx_hash_rules *);
112 typedef void (*rx_free_hash)(struct rx_hash *,
113 			    struct rx_hash_rules *);
114 typedef struct rx_hash_item * (*rx_alloc_hash_item)(struct rx_hash_rules *,
115 						    void *);
116 typedef void (*rx_free_hash_item)(struct rx_hash_item *,
117 				 struct rx_hash_rules *);
118 #else
119 typedef int (*rx_hash_eq)();
120 typedef struct rx_hash * (*rx_alloc_hash)();
121 typedef void (*rx_free_hash)();
122 typedef struct rx_hash_item * (*rx_alloc_hash_item)();
123 typedef void (*rx_free_hash_item)();
124 #endif
125 
126 struct rx_hash_rules
127 {
128   rx_hash_eq eq;
129   rx_alloc_hash hash_alloc;
130   rx_free_hash free_hash;
131   rx_alloc_hash_item hash_item_alloc;
132   rx_free_hash_item free_hash_item;
133 };
134 
135 
136 /* Forward declarations */
137 
138 struct rx_cache;
139 struct rx_superset;
140 struct rx;
141 struct rx_se_list;
142 
143 
144 
145 /*
146  * GLOSSARY
147  *
148  * regexp
149  * regular expression
150  * expression
151  * pattern - a `regular' expression.  The expression
152  *       need not be formally regular -- it can contain
153  *       constructs that don't correspond to purely regular
154  *       expressions.
155  *
156  * buffer
157  * string - the string (or strings) being searched or matched.
158  *
159  * pattern buffer - a structure of type `struct re_pattern_buffer'
160  *       This in turn contains a `struct rx', which holds the
161  *       NFA compiled from a pattern, as well as some of the state
162  *       of a matcher using the pattern.
163  *
164  * NFA - nondeterministic finite automata.  Some people
165  *       use this term to a member of the class of
166  *       regular automata (those corresponding to a regular
167  *       language).  However, in this code, the meaning is
168  *       more general.  The automata used by Rx are comperable
169  *       in power to what are usually called `push down automata'.
170  *
171  *       Two NFA are built by rx for every pattern.  One is built
172  *       by the compiler.  The other is built from the first, on
173  *       the fly, by the matcher.  The latter is called the `superstate
174  *       NFA' because its states correspond to sets of states from
175  *       the first NFA.  (Joe Keane gets credit for the name
176  *       `superstate NFA').
177  *
178  * NFA edges
179  * epsilon edges
180  * side-effect edges - The NFA compiled from a pattern can have three
181  *       kinds of edges.  Epsilon edges can be taken freely anytime
182  *       their source state is reached.  Character set edges can be
183  *       taken when their source state is reached and when the next
184  *       character in the buffer is a member of the set.  Side effect
185  *       edges imply a transition that can only be taken after the
186  *       indicated side effect has been successfully accomplished.
187  *       Some examples of side effects are:
188  *
189  *		Storing the current match position to record the
190  *              location of a parentesized subexpression.
191  *
192  *              Advancing the matcher over N characters if they
193  *              match the N characters previously matched by a
194  *              parentesized subexpression.
195  *
196  *       Both of those kinds of edges occur in the NFA generated
197  *       by the pattern:  \(.\)\1
198  *
199  *       Epsilon and side effect edges are similar.  Unfortunately,
200  *       some of the code uses the name `epsilon edge' to mean
201  *       both epsilon and side effect edges.  For example,  the
202  *       function has_non_idempotent_epsilon_path computes the existance
203  *       of a non-trivial path containing only a mix of epsilon and
204  *       side effect edges.  In that case `nonidempotent epsilon' is being
205  *       used to mean `side effect'.
206  */
207 
208 
209 
210 
211 
212 /* LOW LEVEL PATTERN BUFFERS */
213 
214 /* Suppose that from some NFA state, more than one path through
215  * side-effect edges is possible.  In what order should the paths
216  * be tried?  A function of type rx_se_list_order answers that
217  * question.  It compares two lists of side effects, and says
218  * which list comes first.
219  */
220 
221 #if defined(__STDC__) || defined(MSDOS)
222 typedef int (*rx_se_list_order) (struct rx *,
223 				 struct rx_se_list *,
224 				 struct rx_se_list *);
225 #else
226 typedef int (*rx_se_list_order) ();
227 #endif
228 
229 
230 
231 /* Struct RX holds a compiled regular expression - that is, an nfa
232  * ready to be converted on demand to a more efficient superstate nfa.
233  * This is for the low level interface.  The high-level interfaces enclose
234  * this in a `struct re_pattern_buffer'.
235  */
236 struct rx
237 {
238   /* The compiler assigns a unique id to every pattern.
239    * Like sequence numbers in X, there is a subtle bug here
240    * if you use Rx in a system that runs for a long time.
241    * But, because of the way the caches work out, it is almost
242    * impossible to trigger the Rx version of this bug.
243    *
244    * The id is used to validate superstates found in a cache
245    * of superstates.  It isn't sufficient to let a superstate
246    * point back to the rx for which it was compiled -- the caller
247    * may be re-using a `struct rx' in which case the superstate
248    * is not really valid.  So instead, superstates are validated
249    * by checking the sequence number of the pattern for which
250    * they were built.
251    */
252   int rx_id;
253 
254   /* This is memory mgt. state for superstates.  This may be
255    * shared by more than one struct rx.
256    */
257   struct rx_cache * cache;
258 
259   /* Every regex defines the size of its own character set.
260    * A superstate has an array of this size, with each element
261    * a `struct rx_inx'.  So, don't make this number too large.
262    * In particular, don't make it 2^16.
263    */
264   int local_cset_size;
265 
266   /* After the NFA is built, it is copied into a contiguous region
267    * of memory (mostly for compatability with GNU regex).
268    * Here is that region, and it's size:
269    */
270   void * buffer;
271   unsigned long allocated;
272 
273   /* Clients of RX can ask for some extra storage in the space pointed
274    * to by BUFFER.  The field RESERVED is an input parameter to the
275    * compiler.  After compilation, this much space will be available
276    * at (buffer + allocated - reserved)
277    */
278   unsigned long reserved;
279 
280   /* --------- The remaining fields are for internal use only. --------- */
281   /* --------- But! they must be initialized to 0.	       --------- */
282 
283   /* NODEC is the number of nodes in the NFA with non-epsilon
284    * transitions.
285    */
286   int nodec;
287 
288   /* EPSNODEC is the number of nodes with only epsilon transitions. */
289   int epsnodec;
290 
291   /* The sum (NODEC + EPSNODEC) is the total number of states in the
292    * compiled NFA.
293    */
294 
295   /* Lists of side effects as stored in the NFA are `hash consed'..meaning
296    * that lists with the same elements are ==.  During compilation,
297    * this table facilitates hash-consing.
298    */
299   struct rx_hash se_list_memo;
300 
301   /* Lists of NFA states are also hashed.
302    */
303   struct rx_hash set_list_memo;
304 
305 
306 
307 
308   /* The compiler and matcher must build a number of instruction frames.
309    * The format of these frames is fixed (c.f. struct rx_inx).  The values
310    * of the instructions is not fixed.
311    *
312    * An enumerated type (enum rx_opcode) defines the set of instructions
313    * that the compiler or matcher might generate.  When filling an instruction
314    * frame, the INX field is found by indexing this instruction table
315    * with an opcode:
316    */
317   void ** instruction_table;
318 
319   /* The list of all states in an NFA.
320    * During compilation, the NEXT field of NFA states links this list.
321    * After compilation, all the states are compacted into an array,
322    * ordered by state id numbers.  At that time, this points to the base
323    * of that array.
324    */
325   struct rx_nfa_state *nfa_states;
326 
327   /* Every nfa begins with one distinguished starting state:
328    */
329   struct rx_nfa_state *start;
330 
331   /* This orders the search through super-nfa paths.
332    * See the comment near the typedef of rx_se_list_order.
333    */
334   rx_se_list_order se_list_cmp;
335 
336   struct rx_superset * start_set;
337 };
338 
339 
340 
341 
342 /* SYNTAX TREES */
343 
344 /* Compilation is in stages.
345  *
346  * In the first stage, a pattern specified by a string is
347  * translated into a syntax tree.  Later stages will convert
348  * the syntax tree into an NFA optimized for conversion to a
349  * superstate-NFA.
350  *
351  * This page is about syntax trees.
352  */
353 
354 enum rexp_node_type
355 {
356   r_cset,			/* Match from a character set. `a' or `[a-z]'*/
357   r_cset2,			/* Match from a multi-byte second character set. */
358   r_concat,			/* Concat two subexpressions.   `ab' */
359   r_alternate,			/* Choose one of two subexpressions. `a\|b' */
360   r_opt,			/* Optional subexpression. `a?' */
361   r_star,			/* Repeated subexpression. `a*' */
362 
363 
364   /* A 2phase-star is a variation on a repeated subexpression.
365    * In this case, there are two subexpressions.  The first, if matched,
366    * begins a repitition (otherwise, the whole expression is matches the
367    * empth string).
368    *
369    * After matching the first subexpression, a 2phase star either finishes,
370    * or matches the second subexpression.  If the second subexpression is
371    * matched, then the whole construct repeats.
372    *
373    * 2phase stars are used in two circumstances.  First, they
374    * are used as part of the implementation of POSIX intervals (counted
375    * repititions).  Second, they are used to implement proper star
376    * semantics when the repeated subexpression contains paths of
377    * only side effects.  See rx_compile for more information.
378    */
379   r_2phase_star,
380 
381 
382   /* c.f. "typedef void * rx_side_effect" */
383   r_side_effect,
384 
385   /* This is an extension type:  It is for transient use in source->source
386    * transformations (implemented over syntax trees).
387    */
388   r_data
389 };
390 
391 /* A side effect is a matcher-specific action associated with
392  * transitions in the NFA.  The details of side effects are up
393  * to the matcher.  To the compiler and superstate constructors
394  * side effects are opaque:
395  */
396 
397 typedef void * rx_side_effect;
398 
399 /* Nodes in a syntax tree are of this type:
400  */
401 struct rexp_node
402 {
403   enum rexp_node_type type;
404   union
405   {
406     rx_Bitset cset;
407     rx_side_effect side_effect;
408     struct
409       {
410 	struct rexp_node *left;
411 	struct rexp_node *right;
412       } pair;
413     void * data;
414   } params;
415 };
416 
417 
418 
419 /* NFA
420  *
421  * A syntax tree is compiled into an NFA.  This page defines the structure
422  * of that NFA.
423  */
424 
425 struct rx_nfa_state
426 {
427   /* These are kept in a list as the NFA is being built. */
428   struct rx_nfa_state *next;
429 
430   /* After the NFA is built, states are given integer id's.
431    * States whose outgoing transitions are all either epsilon or
432    * side effect edges are given ids less than 0.  Other states
433    * are given successive non-negative ids starting from 0.
434    */
435   int id;
436 
437   /* The list of NFA edges that go from this state to some other. */
438   struct rx_nfa_edge *edges;
439 
440   /* If you land in this state, then you implicitly land
441    * in all other states reachable by only epsilon translations.
442    * Call the set of maximal paths to such states the epsilon closure
443    * of this state.
444    *
445    * There may be other states that are reachable by a mixture of
446    * epsilon and side effect edges.  Consider the set of maximal paths
447    * of that sort from this state.  Call it the epsilon-side-effect
448    * closure of the state.
449    *
450    * The epsilon closure of the state is a subset of the epsilon-side-
451    * effect closure.  It consists of all the paths that contain
452    * no side effects -- only epsilon edges.
453    *
454    * The paths in the epsilon-side-effect closure  can be partitioned
455    * into equivalance sets. Two paths are equivalant if they have the
456    * same set of side effects, in the same order.  The epsilon-closure
457    * is one of these equivalance sets.  Let's call these equivalance
458    * sets: observably equivalant path sets.  That name is chosen
459    * because equivalance of two paths means they cause the same side
460    * effects -- so they lead to the same subsequent observations other
461    * than that they may wind up in different target states.
462    *
463    * The superstate nfa, which is derived from this nfa, is based on
464    * the observation that all of the paths in an observably equivalant
465    * path set can be explored at the same time, provided that the
466    * matcher keeps track not of a single nfa state, but of a set of
467    * states.   In particular, after following all the paths in an
468    * observably equivalant set, you wind up at a set of target states.
469    * That set of target states corresponds to one state in the
470    * superstate NFA.
471    *
472    * Staticly, before matching begins, it is convenient to analyze the
473    * nfa.  Each state is labeled with a list of the observably
474    * equivalant path sets who's union covers all the
475    * epsilon-side-effect paths beginning in this state.  This list is
476    * called the possible futures of the state.
477    *
478    * A trivial example is this NFA:
479    *             s1
480    *         A  --->  B
481    *
482    *             s2
483    *            --->  C
484    *
485    *             epsilon           s1
486    *            --------->  D   ------> E
487    *
488    *
489    * In this example, A has two possible futures.
490    * One invokes the side effect `s1' and contains two paths,
491    * one ending in state B, the other in state E.
492    * The other invokes the side effect `s2' and contains only
493    * one path, landing in state C.
494    */
495   struct rx_possible_future *futures;
496 
497 
498   /* There are exactly two distinguished states in every NFA: */
499   unsigned int is_final:1;
500   unsigned int is_start:1;
501 
502   /* These are used during NFA construction... */
503   unsigned int eclosure_needed:1;
504   unsigned int mark:1;
505 };
506 
507 
508 /* An edge in an NFA is typed: */
509 enum rx_nfa_etype
510 {
511   /* A cset edge is labled with a set of characters one of which
512    * must be matched for the edge to be taken.
513    */
514   ne_cset,
515 
516   /* An epsilon edge is taken whenever its starting state is
517    * reached.
518    */
519   ne_epsilon,
520 
521   /* A side effect edge is taken whenever its starting state is
522    * reached.  Side effects may cause the match to fail or the
523    * position of the matcher to advance.
524    */
525   ne_side_effect		/* A special kind of epsilon. */
526 };
527 
528 struct rx_nfa_edge
529 {
530   struct rx_nfa_edge *next;
531   enum rx_nfa_etype type;
532   struct rx_nfa_state *dest;
533   union
534   {
535     rx_Bitset cset;
536     rx_side_effect side_effect;
537   } params;
538 };
539 
540 
541 
542 /* A possible future consists of a list of side effects
543  * and a set of destination states.  Below are their
544  * representations.  These structures are hash-consed which
545  * means that lists with the same elements share a representation
546  * (their addresses are ==).
547  */
548 
549 struct rx_nfa_state_set
550 {
551   struct rx_nfa_state * car;
552   struct rx_nfa_state_set * cdr;
553 };
554 
555 struct rx_se_list
556 {
557   rx_side_effect car;
558   struct rx_se_list * cdr;
559 };
560 
561 struct rx_possible_future
562 {
563   struct rx_possible_future *next;
564   struct rx_se_list * effects;
565   struct rx_nfa_state_set * destset;
566 };
567 
568 
569 
570 /* This begins the description of the superstate NFA.
571  *
572  * The superstate NFA corresponds to the NFA in these ways:
573  *
574  * Every superstate NFA states SUPER correspond to sets of NFA states,
575  * nfa_states(SUPER).
576  *
577  * Superstate edges correspond to NFA paths.
578  *
579  * The superstate has no epsilon transitions;
580  * every edge has a character label, and a (possibly empty) side
581  * effect label.   The side effect label corresponds to a list of
582  * side effects that occur in the NFA.  These parts are referred
583  * to as:   superedge_character(EDGE) and superedge_sides(EDGE).
584  *
585  * For a superstate edge EDGE starting in some superstate SUPER,
586  * the following is true (in pseudo-notation :-):
587  *
588  *       exists DEST in nfa_states s.t.
589  *         exists nfaEDGE in nfa_edges s.t.
590  *                 origin (nfaEDGE) == DEST
591  *              && origin (nfaEDGE) is a member of nfa_states(SUPER)
592  *              && exists PF in possible_futures(dest(nfaEDGE)) s.t.
593  * 	                sides_of_possible_future (PF) == superedge_sides (EDGE)
594  *
595  * also:
596  *
597  *      let SUPER2 := superedge_destination(EDGE)
598  *          nfa_states(SUPER2)
599  *           == union of all nfa state sets S s.t.
600  *                          exists PF in possible_futures(dest(nfaEDGE)) s.t.
601  * 	                       sides_of_possible_future (PF) == superedge_sides (EDGE)
602  *                          && S == dests_of_possible_future (PF) }
603  *
604  * Or in english, every superstate is a set of nfa states.  A given
605  * character and a superstate implies many transitions in the NFA --
606  * those that begin with an edge labeled with that character from a
607  * state in the set corresponding to the superstate.
608  *
609  * The destinations of those transitions each have a set of possible
610  * futures.  A possible future is a list of side effects and a set of
611  * destination NFA states.  Two sets of possible futures can be
612  * `merged' by combining all pairs of possible futures that have the
613  * same side effects.  A pair is combined by creating a new future
614  * with the same side effect but the union of the two destination sets.
615  * In this way, all the possible futures suggested by a superstate
616  * and a character can be merged into a set of possible futures where
617  * no two elements of the set have the same set of side effects.
618  *
619  * The destination of a possible future, being a set of NFA states,
620  * corresponds to a supernfa state.  So, the merged set of possible
621  * futures we just created can serve as a set of edges in the
622  * supernfa.
623  *
624  * The representation of the superstate nfa and the nfa is critical.
625  * The nfa has to be compact, but has to facilitate the rapid
626  * computation of missing superstates.  The superstate nfa has to
627  * be fast to interpret, lazilly constructed, and bounded in space.
628  *
629  * To facilitate interpretation, the superstate data structures are
630  * peppered with `instruction frames'.  There is an instruction set
631  * defined below which matchers using the supernfa must be able to
632  * interpret.
633  *
634  * We'd like to make it possible but not mandatory to use code
635  * addresses to represent instructions (c.f. gcc's computed goto).
636  * Therefore, we define an enumerated type of opcodes, and when
637  * writing one of these instructions into a data structure, use
638  * the opcode as an index into a table of instruction values.
639  *
640  * Here are the opcodes that occur in the superstate nfa:
641  */
642 
643 
644 /* Every superstate contains a table of instruction frames indexed
645  * by characters.  A normal `move' in a matcher is to fetch the next
646  * character and use it as an index into a superstates transition
647  * table.
648  *
649  * In the fasted case, only one edge follows from that character.
650  * In other cases there is more work to do.
651  *
652  * The descriptions of the opcodes refer to data structures that are
653  * described further below.
654  */
655 
656 enum rx_opcode
657 {
658   /*
659    * BACKTRACK_POINT is invoked when a character transition in
660    * a superstate leads to more than one edge.  In that case,
661    * the edges have to be explored independently using a backtracking
662    * strategy.
663    *
664    * A BACKTRACK_POINT instruction is stored in a superstate's
665    * transition table for some character when it is known that that
666    * character crosses more than one edge.  On encountering this
667    * instruction, the matcher saves enough state to backtrack to this
668    * point in the match later.
669    */
670   rx_backtrack_point = 0,	/* data is (struct transition_class *) */
671 
672   /*
673    * RX_DO_SIDE_EFFECTS evaluates the side effects of an epsilon path.
674    * There is one occurence of this instruction per rx_distinct_future.
675    * This instruction is skipped if a rx_distinct_future has no side effects.
676    */
677   rx_do_side_effects = rx_backtrack_point + 1,
678 
679   /* data is (struct rx_distinct_future *) */
680 
681   /*
682    * RX_CACHE_MISS instructions are stored in rx_distinct_futures whose
683    * destination superstate has been reclaimed (or was never built).
684    * It recomputes the destination superstate.
685    * RX_CACHE_MISS is also stored in a superstate transition table before
686    * any of its edges have been built.
687    */
688   rx_cache_miss = rx_do_side_effects + 1,
689   /* data is (struct rx_distinct_future *) */
690 
691   /*
692    * RX_NEXT_CHAR is called to consume the next character and take the
693    * corresponding transition.  This is the only instruction that uses
694    * the DATA field of the instruction frame instead of DATA_2.
695    * (see EXPLORE_FUTURE in regex.c).
696    */
697   rx_next_char = rx_cache_miss + 1, /* data is (struct superstate *) */
698 
699   /* RX_BACKTRACK indicates that a transition fails.
700    */
701   rx_backtrack = rx_next_char + 1, /* no data */
702 
703   /*
704    * RX_ERROR_INX is stored only in places that should never be executed.
705    */
706   rx_error_inx = rx_backtrack + 1, /* Not supposed to occur. */
707 
708   rx_num_instructions = rx_error_inx + 1
709 };
710 
711 /* An id_instruction_table holds the values stored in instruction
712  * frames.  The table is indexed by the enums declared above.
713  */
714 extern void * rx_id_instruction_table[rx_num_instructions];
715 
716 /* The heart of the matcher is a `word-code-interpreter'
717  * (like a byte-code interpreter, except that instructions
718  * are a full word wide).
719  *
720  * Instructions are not stored in a vector of code, instead,
721  * they are scattered throughout the data structures built
722  * by the regexp compiler and the matcher.  One word-code instruction,
723  * together with the arguments to that instruction, constitute
724  * an instruction frame (struct rx_inx).
725  *
726  * This structure type is padded by hand to a power of 2 because
727  * in one of the dominant cases, we dispatch by indexing a table
728  * of instruction frames.  If that indexing can be accomplished
729  * by just a shift of the index, we're happy.
730  *
731  * Instructions take at most one argument, but there are two
732  * slots in an instruction frame that might hold that argument.
733  * These are called data and data_2.  The data slot is only
734  * used for one instruction (RX_NEXT_CHAR).  For all other
735  * instructions, data should be set to 0.
736  *
737  * RX_NEXT_CHAR is the most important instruction by far.
738  * By reserving the data field for its exclusive use,
739  * instruction dispatch is sped up in that case.  There is
740  * no need to fetch both the instruction and the data,
741  * only the data is needed.  In other words, a `cycle' begins
742  * by fetching the field data.  If that is non-0, then it must
743  * be the destination state of a next_char transition, so
744  * make that value the current state, advance the match position
745  * by one character, and start a new cycle.  On the other hand,
746  * if data is 0, fetch the instruction and do a more complicated
747  * dispatch on that.
748  */
749 
750 struct rx_inx
751 {
752   void * data;
753   void * data_2;
754   void * inx;
755   void * fnord;
756 };
757 
758 #ifndef RX_TAIL_ARRAY
759 #define RX_TAIL_ARRAY  1
760 #endif
761 
762 /* A superstate corresponds to a set of nfa states.  Those sets are
763  * represented by STRUCT RX_SUPERSET.  The constructors
764  * guarantee that only one (shared) structure is created for a given set.
765  */
766 struct rx_superset
767 {
768   int refs;			/* This is a reference counted structure. */
769 
770   /* We keep these sets in a cache because (in an unpredictable way),
771    * the same set is often created again and again.  But that is also
772    * problematic -- compatibility with POSIX and GNU regex requires
773    * that we not be able to tell when a program discards a particular
774    * NFA (thus invalidating the supersets created from it).
775    *
776    * But when a cache hit appears to occur, we will have in hand the
777    * nfa for which it may have happened.  That is why every nfa is given
778    * its own sequence number.  On a cache hit, the cache is validated
779    * by comparing the nfa sequence number to this field:
780    */
781   int id;
782 
783   struct rx_nfa_state * car;	/* May or may not be a valid addr. */
784   struct rx_superset * cdr;
785 
786   /* If the corresponding superstate exists: */
787   struct rx_superstate * superstate;
788 
789 
790   /* There is another bookkeeping problem.  It is expensive to
791    * compute the starting nfa state set for an nfa.  So, once computed,
792    * it is cached in the `struct rx'.
793    *
794    * But, the state set can be flushed from the superstate cache.
795    * When that happens, we can't know if the corresponding `struct rx'
796    * is still alive or if it has been freed or re-used by the program.
797    * So, the cached pointer to this set in a struct rx might be invalid
798    * and we need a way to validate it.
799    *
800    * Fortunately, even if this set is flushed from the cache, it is
801    * not freed.  It just goes on the free-list of supersets.
802    * So we can still examine it.
803    *
804    * So to validate a starting set memo, check to see if the
805    * starts_for field still points back to the struct rx in question,
806    * and if the ID matches the rx sequence number.
807    */
808   struct rx * starts_for;
809 
810   /* This is used to link into a hash bucket so these objects can
811    * be `hash-consed'.
812    */
813   struct rx_hash_item hash_item;
814 };
815 
816 #define rx_protect_superset(RX,CON) (++(CON)->refs)
817 
818 /* The terminology may be confusing (rename this structure?).
819  * Every character occurs in at most one rx_super_edge per super-state.
820  * But, that structure might have more than one option, indicating a point
821  * of non-determinism.
822  *
823  * In other words, this structure holds a list of superstate edges
824  * sharing a common starting state and character label.  The edges
825  * are in the field OPTIONS.  All superstate edges sharing the same
826  * starting state and character are in this list.
827  */
828 struct rx_super_edge
829 {
830   struct rx_super_edge *next;
831   struct rx_inx rx_backtrack_frame;
832   int cset_size;
833   rx_Bitset cset;
834   struct rx_distinct_future *options;
835 };
836 
837 /* A superstate is a set of nfa states (RX_SUPERSET) along
838  * with a transition table.  Superstates are built on demand and reclaimed
839  * without warning.  To protect a superstate from this ghastly fate,
840  * use LOCK_SUPERSTATE.
841  */
842 struct rx_superstate
843 {
844   int rx_id;			/* c.f. the id field of rx_superset */
845   int locks;			/* protection from reclamation */
846 
847   /* Within a superstate cache, all the superstates are kept in a big
848    * queue.  The tail of the queue is the state most likely to be
849    * reclaimed.  The *recyclable fields hold the queue position of
850    * this state.
851    */
852   struct rx_superstate * next_recyclable;
853   struct rx_superstate * prev_recyclable;
854 
855   /* The supernfa edges that exist in the cache and that have
856    * this state as their destination are kept in this list:
857    */
858   struct rx_distinct_future * transition_refs;
859 
860   /* The list of nfa states corresponding to this superstate: */
861   struct rx_superset * contents;
862 
863   /* The list of edges in the cache beginning from this state. */
864   struct rx_super_edge * edges;
865 
866   /* A tail of the recyclable queue is marked as semifree.  A semifree
867    * state has no incoming next_char transitions -- any transition
868    * into a semifree state causes a complex dispatch with the side
869    * effect of rescuing the state from its semifree state.
870    *
871    * An alternative to this might be to make next_char more expensive,
872    * and to move a state to the head of the recyclable queue whenever
873    * it is entered.  That way, popular states would never be recycled.
874    *
875    * But unilaterally making next_char more expensive actually loses.
876    * So, incoming transitions are only made expensive for states near
877    * the tail of the recyclable queue.  The more cache contention
878    * there is, the more frequently a state will have to prove itself
879    * and be moved back to the front of the queue.  If there is less
880    * contention, then popular states just aggregate in the front of
881    * the queue and stay there.
882    */
883   int is_semifree;
884 
885 
886   /* This keeps track of the size of the transition table for this
887    * state.  There is a half-hearted attempt to support variable sized
888    * superstates.
889    */
890   int trans_size;
891 
892   /* Indexed by characters... */
893   struct rx_inx transitions[RX_TAIL_ARRAY];
894 };
895 
896 
897 /* A list of distinct futures define the edges that leave from a
898  * given superstate on a given character.  c.f. rx_super_edge.
899  */
900 
901 struct rx_distinct_future
902 {
903   struct rx_distinct_future * next_same_super_edge[2];
904   struct rx_distinct_future * next_same_dest;
905   struct rx_distinct_future * prev_same_dest;
906   struct rx_superstate * present;	/* source state */
907   struct rx_superstate * future;	/* destination state */
908   struct rx_super_edge * edge;
909 
910 
911   /* The future_frame holds the instruction that should be executed
912    * after all the side effects are done, when it is time to complete
913    * the transition to the next state.
914    *
915    * Normally this is a next_char instruction, but it may be a
916    * cache_miss instruction as well, depending on whether or not
917    * the superstate is in the cache and semifree.
918    *
919    * If this is the only future for a given superstate/char, and
920    * if there are no side effects to be performed, this frame is
921    * not used (directly) at all.  Instead, its contents are copied
922    * into the transition table of the starting state of this dist. future.
923    */
924   struct rx_inx future_frame;
925 
926   struct rx_inx side_effects_frame;
927   struct rx_se_list * effects;
928 };
929 
930 #define rx_lock_superstate(R,S)  ((S)->locks++)
931 #define rx_unlock_superstate(R,S) (--(S)->locks)
932 
933 
934 /* This page destined for rx.h */
935 
936 struct rx_blocklist
937 {
938   struct rx_blocklist * next;
939   int bytes;
940 };
941 
942 struct rx_freelist
943 {
944   struct rx_freelist * next;
945 };
946 
947 struct rx_cache;
948 
949 #if defined(__STDC__) || defined(MSDOS)
950 typedef void (*rx_morecore_fn)(struct rx_cache *);
951 #else
952 typedef void (*rx_morecore_fn)();
953 #endif
954 
955 /* You use this to control the allocation of superstate data
956  * during matching.  Most of it should be initialized to 0.
957  *
958  * A MORECORE function is necessary.  It should allocate
959  * a new block of memory or return 0.
960  * A default that uses malloc is called `rx_morecore'.
961  *
962  * The number of SUPERSTATES_ALLOWED indirectly limits how much memory
963  * the system will try to allocate.  The default is 128.  Batch style
964  * applications that are very regexp intensive should use as high a number
965  * as possible without thrashing.
966  *
967  * The LOCAL_CSET_SIZE is the number of characters in a character set.
968  * It is therefore the number of entries in a superstate transition table.
969  * Generally, it should be 256.  If your character set has 16 bits,
970  * it is better to translate your regexps into equivalent 8 bit patterns.
971  */
972 
973 struct rx_cache
974 {
975   struct rx_hash_rules superset_hash_rules;
976 
977   /* Objects are allocated by incrementing a pointer that
978    * scans across rx_blocklists.
979    */
980   struct rx_blocklist * memory;
981   struct rx_blocklist * memory_pos;
982   unsigned int bytes_left;
983   char * memory_addr;
984   rx_morecore_fn morecore;
985 
986   /* Freelists. */
987   struct rx_freelist * free_superstates;
988   struct rx_freelist * free_transition_classes;
989   struct rx_freelist * free_discernable_futures;
990   struct rx_freelist * free_supersets;
991   struct rx_freelist * free_hash;
992 
993   /* Two sets of superstates -- those that are semifreed, and those
994    * that are being used.
995    */
996   struct rx_superstate * lru_superstate;
997   struct rx_superstate * semifree_superstate;
998 
999   struct rx_superset * empty_superset;
1000 
1001   int superstates;
1002   int semifree_superstates;
1003   int hits;
1004   int misses;
1005   int superstates_allowed;
1006 
1007   int local_cset_size;
1008   void ** instruction_table;
1009 
1010   struct rx_hash superset_table;
1011 };
1012 
1013 
1014 
1015 /* The lowest-level search function supports arbitrarily fragmented
1016  * strings and (optionally) suspendable/resumable searches.
1017  *
1018  * Callers have to provide a few hooks.
1019  */
1020 
1021 #ifndef __GNUC__
1022 #if defined(__STDC__) || defined(MSDOS)
1023 #define __const__ const
1024 #else
1025 #define __const__
1026 #endif
1027 #endif
1028 
1029 /* This holds a matcher position */
1030 struct rx_string_position
1031 {
1032   __const__ unsigned char * pos;	/* The current pos. */
1033   __const__ unsigned char * string; /* The current string burst. */
1034   __const__ unsigned char * end;	/* First invalid position >= POS. */
1035   int offset;			/* Integer address of the current burst. */
1036   int size;			/* Current string's size. */
1037   int search_direction;		/* 1 or -1 */
1038   int search_end;		/* First position to not try. */
1039 };
1040 
1041 
1042 enum rx_get_burst_return
1043 {
1044   rx_get_burst_continuation,
1045   rx_get_burst_error,
1046   rx_get_burst_ok,
1047   rx_get_burst_no_more
1048 };
1049 
1050 
1051 /* A call to get burst should make POS valid.  It might be invalid
1052  * if the STRING field doesn't point to a burst that actually
1053  * contains POS.
1054  *
1055  * GET_BURST should take a clue from SEARCH_DIRECTION (1 or -1) as to
1056  * whether or not to pad to the left.  Padding to the right is always
1057  * appropriate, but need not go past the point indicated by STOP.
1058  *
1059  * If a continuation is returned, then the reentering call to
1060  * a search function will retry the get_burst.
1061  */
1062 
1063 #if defined(__STDC__) || defined(MSDOS)
1064 typedef enum rx_get_burst_return
1065   (*rx_get_burst_fn) (struct rx_string_position * pos,
1066 		      void * app_closure,
1067 		      int stop);
1068 
1069 #else
1070 typedef enum rx_get_burst_return (*rx_get_burst_fn) ();
1071 #endif
1072 
1073 
1074 enum rx_back_check_return
1075 {
1076   rx_back_check_continuation,
1077   rx_back_check_error,
1078   rx_back_check_pass,
1079   rx_back_check_fail
1080 };
1081 
1082 /* Back_check should advance the position it is passed
1083  * over rparen - lparen characters and return pass iff
1084  * the characters starting at POS match those indexed
1085  * by [LPAREN..RPAREN].
1086  *
1087  * If a continuation is returned, then the reentering call to
1088  * a search function will retry the back_check.
1089  */
1090 
1091 #if defined(__STDC__)
1092 typedef enum rx_back_check_return
1093   (*rx_back_check_fn) (struct rx_string_position * pos,
1094 		       int lparen,
1095 		       int rparen,
1096 		       unsigned char * translate,
1097 		       void * app_closure,
1098 		       int stop);
1099 
1100 #else
1101 typedef enum rx_back_check_return (*rx_back_check_fn) ();
1102 #endif
1103 
1104 
1105 
1106 
1107 /* A call to fetch_char should return the character at POS or POS + 1.
1108  * Returning continuations here isn't supported.  OFFSET is either 0 or 1
1109  * and indicates which characters is desired.
1110  */
1111 
1112 #if defined(__STDC__) || defined(MSDOS)
1113 typedef int (*rx_fetch_char_fn) (struct rx_string_position * pos,
1114 				 int offset,
1115 				 void * app_closure,
1116 				 int stop);
1117 #else
1118 typedef int (*rx_fetch_char_fn) ();
1119 #endif
1120 
1121 
1122 enum rx_search_return
1123 {
1124   rx_search_continuation = -4,
1125   rx_search_error = -3,
1126   rx_search_soft_fail = -2,	/* failed by running out of string */
1127   rx_search_fail = -1		/* failed only by reaching failure states */
1128   /* return values >= 0 indicate the position of a successful match */
1129 };
1130 
1131 
1132 
1133 
1134 
1135 
1136 /* regex.h
1137  *
1138  * The remaining declarations replace regex.h.
1139  */
1140 
1141 /* This is an array of error messages corresponding to the error codes.
1142  */
1143 extern __const__ char *re_error_msg[];
1144 
1145 /* If any error codes are removed, changed, or added, update the
1146    `re_error_msg' table in regex.c.  */
1147 typedef enum
1148 {
1149   REG_NOERROR = 0,	/* Success.  */
1150   REG_NOMATCH,		/* Didn't find a match (for regexec).  */
1151 
1152   /* POSIX regcomp return error codes.  (In the order listed in the
1153      standard.)  */
1154   REG_BADPAT,		/* Invalid pattern.  */
1155   REG_ECOLLATE,		/* Not implemented.  */
1156   REG_ECTYPE,		/* Invalid character class name.  */
1157   REG_EESCAPE,		/* Trailing backslash.  */
1158   REG_ESUBREG,		/* Invalid back reference.  */
1159   REG_EBRACK,		/* Unmatched left bracket.  */
1160   REG_EPAREN,		/* Parenthesis imbalance.  */
1161   REG_EBRACE,		/* Unmatched \{.  */
1162   REG_BADBR,		/* Invalid contents of \{\}.  */
1163   REG_ERANGE,		/* Invalid range end.  */
1164   REG_ESPACE,		/* Ran out of memory.  */
1165   REG_BADRPT,		/* No preceding re for repetition op.  */
1166 
1167   /* Error codes we've added.  */
1168   REG_EEND,		/* Premature end.  */
1169   REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
1170   REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
1171 } reg_errcode_t;
1172 
1173 /* The regex.c support, as a client of rx, defines a set of possible
1174  * side effects that can be added to the edge lables of nfa edges.
1175  * Here is the list of sidef effects in use.
1176  */
1177 
1178 enum re_side_effects
1179 {
1180 #define RX_WANT_SE_DEFS 1
1181 #undef RX_DEF_SE
1182 #undef RX_DEF_CPLX_SE
1183 #define RX_DEF_SE(IDEM, NAME, VALUE)	      NAME VALUE,
1184 #define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)     NAME VALUE,
1185 #include "rx.h"
1186 #undef RX_DEF_SE
1187 #undef RX_DEF_CPLX_SE
1188 #undef RX_WANT_SE_DEFS
1189 #if defined(MSDOS) && !defined(__GO32__)
1190 #define RX_RE_FLOOGLE_FLAP 32765
1191 #else
1192 #define RX_RE_FLOOGLE_FLAP 65533
1193 #endif
1194    re_floogle_flap = RX_RE_FLOOGLE_FLAP
1195 };
1196 
1197 /* These hold paramaters for the kinds of side effects that are possible
1198  * in the supported pattern languages.  These include things like the
1199  * numeric bounds of {} operators and the index of paren registers for
1200  * subexpression measurement or backreferencing.
1201  */
1202 struct re_se_params
1203 {
1204   enum re_side_effects se;
1205   int op1;
1206   int op2;
1207 };
1208 
1209 typedef unsigned long reg_syntax_t;
1210 
1211 struct re_pattern_buffer
1212 {
1213   struct rx rx;
1214   reg_syntax_t syntax;		/* See below for syntax bit definitions. */
1215 
1216   unsigned int no_sub:1;	/* If set, don't  return register offsets. */
1217   unsigned int not_bol:1;	/* If set, the anchors ('^' and '$') don't */
1218   unsigned int not_eol:1;	/*     match at the ends of the string.  */
1219   unsigned int newline_anchor:1;/* If true, an anchor at a newline matches.*/
1220   unsigned int least_subs:1;	/* If set, and returning registers, return
1221 				 * as few values as possible.  Only
1222 				 * backreferenced groups and group 0 (the whole
1223 				 * match) will be returned.
1224 				 */
1225 
1226   /* If true, this says that the matcher should keep registers on its
1227    * backtracking stack.  For many patterns, we can easily determine that
1228    * this isn't necessary.
1229    */
1230   unsigned int match_regs_on_stack:1;
1231   unsigned int search_regs_on_stack:1;
1232 
1233   /* is_anchored and begbuf_only are filled in by rx_compile. */
1234   unsigned int is_anchored:1;	/* Anchorded by ^? */
1235   unsigned int begbuf_only:1;	/* Anchored to char position 0? */
1236 
1237 
1238   /* If REGS_UNALLOCATED, allocate space in the `regs' structure
1239    * for `max (RE_NREGS, re_nsub + 1)' groups.
1240    * If REGS_REALLOCATE, reallocate space if necessary.
1241    * If REGS_FIXED, use what's there.
1242    */
1243 #define REGS_UNALLOCATED 0
1244 #define REGS_REALLOCATE 1
1245 #define REGS_FIXED 2
1246   unsigned int regs_allocated:2;
1247 
1248 
1249   /* Either a translate table to apply to all characters before
1250    * comparing them, or zero for no translation.  The translation
1251    * is applied to a pattern when it is compiled and to a string
1252    * when it is matched.
1253    */
1254   unsigned char * translate;
1255 
1256   /* If this is a valid pointer, it tells rx not to store the extents of
1257    * certain subexpressions (those corresponding to non-zero entries).
1258    * Passing 0x1 is the same as passing an array of all ones.  Passing 0x0
1259    * is the same as passing an array of all zeros.
1260    * The array should contain as many entries as their are subexps in the
1261    * regexp.
1262    *
1263    * For POSIX compatability, when using regcomp and regexec this field
1264    * is zeroed and ignored.
1265    */
1266   char * syntax_parens;
1267 
1268 	/* Number of subexpressions found by the compiler.  */
1269   size_t re_nsub;
1270 
1271   void * buffer;		/* Malloced memory for the nfa. */
1272   unsigned long allocated;	/* Size of that memory. */
1273 
1274   /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
1275    * the fastmap, if there is one, to skip over impossible
1276    * starting points for matches.  */
1277   char *fastmap;
1278 
1279   unsigned int fastmap_accurate:1; /* These three are internal. */
1280   unsigned int can_match_empty:1;
1281   struct rx_nfa_state * start;	/* The nfa starting state. */
1282 
1283   /* This is the list of iterator bounds for {lo,hi} constructs.
1284    * The memory pointed to is part of the rx->buffer.
1285    */
1286   struct re_se_params *se_params;
1287 
1288   /* This is a bitset representation of the fastmap.
1289    * This is a true fastmap that already takes the translate
1290    * table into account.
1291    */
1292   rx_Bitset fastset;
1293 };
1294 
1295 /* Type for byte offsets within the string.  POSIX mandates this.  */
1296 typedef int regoff_t;
1297 
1298 /* This is the structure we store register match data in.  See
1299    regex.texinfo for a full description of what registers match.  */
1300 struct re_registers
1301 {
1302   unsigned num_regs;
1303   regoff_t *start;
1304   regoff_t *end;
1305 };
1306 
1307 typedef struct re_pattern_buffer regex_t;
1308 
1309 /* POSIX specification for registers.  Aside from the different names than
1310    `re_registers', POSIX uses an array of structures, instead of a
1311    structure of arrays.  */
1312 typedef struct
1313 {
1314   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
1315   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
1316 } regmatch_t;
1317 
1318 
1319 /* The following bits are used to determine the regexp syntax we
1320    recognize.  The set/not-set meanings are chosen so that Emacs syntax
1321    remains the value 0.  The bits are given in alphabetical order, and
1322    the definitions shifted by one from the previous bit; thus, when we
1323    add or remove a bit, only one other definition need change.  */
1324 
1325 /* If this bit is not set, then \ inside a bracket expression is literal.
1326    If set, then such a \ quotes the following character.  */
1327 #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long) 1)
1328 
1329 /* If this bit is not set, then + and ? are operators, and \+ and \? are
1330      literals.
1331    If set, then \+ and \? are operators and + and ? are literals.  */
1332 #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
1333 
1334 /* If this bit is set, then character classes are supported.  They are:
1335      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
1336      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
1337    If not set, then character classes are not supported.  */
1338 #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
1339 
1340 /* If this bit is set, then ^ and $ are always anchors (outside bracket
1341      expressions, of course).
1342    If this bit is not set, then it depends:
1343         ^  is an anchor if it is at the beginning of a regular
1344            expression or after an open-group or an alternation operator;
1345         $  is an anchor if it is at the end of a regular expression, or
1346            before a close-group or an alternation operator.
1347 
1348    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
1349    POSIX draft 11.2 says that * etc. in leading positions is undefined.
1350    We already implemented a previous draft which made those constructs
1351    invalid, though, so we haven't changed the code back.  */
1352 #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
1353 
1354 /* If this bit is set, then special characters are always special
1355      regardless of where they are in the pattern.
1356    If this bit is not set, then special characters are special only in
1357      some contexts; otherwise they are ordinary.  Specifically,
1358      * + ? and intervals are only special when not after the beginning,
1359      open-group, or alternation operator.  */
1360 #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
1361 
1362 /* If this bit is set, then *, +, ?, and { cannot be first in an re or
1363      immediately after an alternation or begin-group operator.  */
1364 #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
1365 
1366 /* If this bit is set, then . matches newline.
1367    If not set, then it doesn't.  */
1368 #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
1369 
1370 /* If this bit is set, then . doesn't match NUL.
1371    If not set, then it does.  */
1372 #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
1373 
1374 /* If this bit is set, nonmatching lists [^...] do not match newline.
1375    If not set, they do.  */
1376 #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
1377 
1378 /* If this bit is set, either \{...\} or {...} defines an
1379      interval, depending on RE_NO_BK_BRACES.
1380    If not set, \{, \}, {, and } are literals.  */
1381 #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
1382 
1383 /* If this bit is set, +, ? and | aren't recognized as operators.
1384    If not set, they are.  */
1385 #define RE_LIMITED_OPS (RE_INTERVALS << 1)
1386 
1387 /* If this bit is set, newline is an alternation operator.
1388    If not set, newline is literal.  */
1389 #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
1390 
1391 /* If this bit is set, then `{...}' defines an interval, and \{ and \}
1392      are literals.
1393   If not set, then `\{...\}' defines an interval.  */
1394 #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
1395 
1396 /* If this bit is set, (...) defines a group, and \( and \) are literals.
1397    If not set, \(...\) defines a group, and ( and ) are literals.  */
1398 #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
1399 
1400 /* If this bit is set, then \<digit> matches <digit>.
1401    If not set, then \<digit> is a back-reference.  */
1402 #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
1403 
1404 /* If this bit is set, then | is an alternation operator, and \| is literal.
1405    If not set, then \| is an alternation operator, and | is literal.  */
1406 #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
1407 
1408 /* If this bit is set, then an ending range point collating higher
1409      than the starting range point, as in [z-a], is invalid.
1410    If not set, then when ending range point collates higher than the
1411      starting range point, the range is ignored.  */
1412 #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
1413 
1414 /* If this bit is set, then an unmatched ) is ordinary.
1415    If not set, then an unmatched ) is invalid.  */
1416 #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
1417 
1418 /* This global variable defines the particular regexp syntax to use (for
1419    some interfaces).  When a regexp is compiled, the syntax used is
1420    stored in the pattern buffer, so changing this does not affect
1421    already-compiled regexps.  */
1422 extern reg_syntax_t re_syntax_options;
1423 
1424 /* Define combinations of the above bits for the standard possibilities.
1425    (The [[[ comments delimit what gets put into the Texinfo file, so
1426    don't delete them!)  */
1427 /* [[[begin syntaxes]]] */
1428 #define RE_SYNTAX_EMACS 0
1429 
1430 #define RE_SYNTAX_AWK							\
1431   (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
1432    | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
1433    | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
1434    | RE_UNMATCHED_RIGHT_PAREN_ORD)
1435 
1436 #define RE_SYNTAX_POSIX_AWK 						\
1437   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
1438 
1439 #define RE_SYNTAX_GREP							\
1440   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
1441    | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
1442    | RE_NEWLINE_ALT)
1443 
1444 #define RE_SYNTAX_EGREP							\
1445   (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
1446    | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
1447    | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
1448    | RE_NO_BK_VBAR)
1449 
1450 #define RE_SYNTAX_POSIX_EGREP						\
1451   (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
1452 
1453 #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
1454 
1455 /* Syntax bits common to both basic and extended POSIX regex syntax.  */
1456 #define _RE_SYNTAX_POSIX_COMMON						\
1457   (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
1458    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
1459 
1460 #define RE_SYNTAX_POSIX_BASIC						\
1461   (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
1462 
1463 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
1464    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
1465    isn't minimal, since other operators, such as \`, aren't disabled.  */
1466 #define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
1467   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
1468 
1469 #define RE_SYNTAX_POSIX_EXTENDED					\
1470   (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
1471    | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
1472    | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
1473    | RE_UNMATCHED_RIGHT_PAREN_ORD)
1474 
1475 /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
1476    replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
1477 #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
1478   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
1479    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
1480    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
1481    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
1482 /* [[[end syntaxes]]] */
1483 
1484 /* Maximum number of duplicates an interval can allow.  Some systems
1485    (erroneously) define this in other header files, but we want our
1486    value, so remove any previous define.  */
1487 #ifdef RE_DUP_MAX
1488 #undef RE_DUP_MAX
1489 #endif
1490 #define RE_DUP_MAX (0x7fff)
1491 
1492 
1493 
1494 /* POSIX `cflags' bits (i.e., information for `regcomp').  */
1495 
1496 /* If this bit is set, then use extended regular expression syntax.
1497    If not set, then use basic regular expression syntax.  */
1498 #define REG_EXTENDED 1
1499 
1500 /* If this bit is set, then ignore case when matching.
1501    If not set, then case is significant.  */
1502 #define REG_ICASE (REG_EXTENDED << 1)
1503 
1504 /* If this bit is set, then anchors do not match at newline
1505      characters in the string.
1506    If not set, then anchors do match at newlines.  */
1507 #define REG_NEWLINE (REG_ICASE << 1)
1508 
1509 /* If this bit is set, then report only success or fail in regexec.
1510    If not set, then returns differ between not matching and errors.  */
1511 #define REG_NOSUB (REG_NEWLINE << 1)
1512 
1513 
1514 /* POSIX `eflags' bits (i.e., information for regexec).  */
1515 
1516 /* If this bit is set, then the beginning-of-line operator doesn't match
1517      the beginning of the string (presumably because it's not the
1518      beginning of a line).
1519    If not set, then the beginning-of-line operator does match the
1520      beginning of the string.  */
1521 #define REG_NOTBOL 1
1522 
1523 /* Like REG_NOTBOL, except for the end-of-line.  */
1524 #define REG_NOTEOL (1 << 1)
1525 
1526 /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
1527  * `re_match_2' returns information about at least this many registers
1528  * the first time a `regs' structure is passed.
1529  *
1530  * Also, this is the greatest number of backreferenced subexpressions
1531  * allowed in a pattern being matched without caller-supplied registers.
1532  */
1533 #ifndef RE_NREGS
1534 #define RE_NREGS 30
1535 #endif
1536 
1537 extern int rx_cache_bound;
1538 extern char rx_version_string[];
1539 
1540 
1541 
1542 #ifdef RX_WANT_RX_DEFS
1543 
1544 /* This is decls to the interesting subsystems and lower layers
1545  * of rx.  Everything which doesn't have a public counterpart in
1546  * regex.c is declared here.
1547  */
1548 
1549 
1550 #if defined(__STDC__)
1551 typedef void (*rx_hash_freefn) (struct rx_hash_item * it);
1552 #else /* ndef __STDC__ */
1553 typedef void (*rx_hash_freefn) ();
1554 #endif /* ndef __STDC__ */
1555 
1556 
1557 
1558 
1559 #if defined(__STDC__) || defined(MSDOS)
1560 RX_DECL int rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b);
1561 RX_DECL int rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b);
1562 RX_DECL int rx_bitset_empty (int size, rx_Bitset set);
1563 RX_DECL void rx_bitset_null (int size, rx_Bitset b);
1564 RX_DECL void rx_bitset_universe (int size, rx_Bitset b);
1565 RX_DECL void rx_bitset_complement (int size, rx_Bitset b);
1566 RX_DECL void rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b);
1567 RX_DECL void rx_bitset_union (int size, rx_Bitset a, rx_Bitset b);
1568 RX_DECL void rx_bitset_intersection (int size,
1569 				     rx_Bitset a, rx_Bitset b);
1570 RX_DECL void rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b);
1571 #if 0
1572 RX_DECL void rx_bitset_revdifference (int size,
1573 				      rx_Bitset a, rx_Bitset b);
1574 #endif
1575 #ifdef emacs
1576 RX_DECL void rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b);
1577 #endif
1578 RX_DECL unsigned long rx_bitset_hash (int size, rx_Bitset b);
1579 RX_DECL struct rx_hash_item * rx_hash_find (struct rx_hash * table,
1580 					    unsigned long hash,
1581 					    void * value,
1582 					    struct rx_hash_rules * rules);
1583 RX_DECL struct rx_hash_item * rx_hash_store (struct rx_hash * table,
1584 					     unsigned long hash,
1585 					     void * value,
1586 					     struct rx_hash_rules * rules);
1587 RX_DECL void rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules);
1588 RX_DECL void rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
1589 				 struct rx_hash_rules * rules);
1590 RX_DECL rx_Bitset rx_cset (struct rx *rx);
1591 RX_DECL rx_Bitset rx_copy_cset (struct rx *rx, rx_Bitset a);
1592 RX_DECL void rx_free_cset (struct rx * rx, rx_Bitset c);
1593 RX_DECL struct rexp_node * rexp_node (struct rx *rx,
1594 				      enum rexp_node_type type);
1595 RX_DECL struct rexp_node * rx_mk_r_cset (struct rx * rx,
1596 					 rx_Bitset b);
1597 RX_DECL struct rexp_node * rx_mk_r_concat (struct rx * rx,
1598 					   struct rexp_node * a,
1599 					   struct rexp_node * b);
1600 RX_DECL struct rexp_node * rx_mk_r_alternate (struct rx * rx,
1601 					      struct rexp_node * a,
1602 					      struct rexp_node * b);
1603 RX_DECL struct rexp_node * rx_mk_r_opt (struct rx * rx,
1604 					struct rexp_node * a);
1605 RX_DECL struct rexp_node * rx_mk_r_star (struct rx * rx,
1606 					 struct rexp_node * a);
1607 RX_DECL struct rexp_node * rx_mk_r_2phase_star (struct rx * rx,
1608 						struct rexp_node * a,
1609 						struct rexp_node * b);
1610 RX_DECL struct rexp_node * rx_mk_r_side_effect (struct rx * rx,
1611 						rx_side_effect a);
1612 #if 0
1613 RX_DECL struct rexp_node * rx_mk_r_data  (struct rx * rx,
1614 					  void * a);
1615 #endif
1616 RX_DECL void rx_free_rexp (struct rx * rx, struct rexp_node * node);
1617 RX_DECL struct rexp_node * rx_copy_rexp (struct rx *rx,
1618 					 struct rexp_node *node);
1619 RX_DECL struct rx_nfa_state * rx_nfa_state (struct rx *rx);
1620 RX_DECL void rx_free_nfa_state (struct rx_nfa_state * n);
1621 RX_DECL struct rx_nfa_state * rx_id_to_nfa_state (struct rx * rx,
1622 						  int id);
1623 RX_DECL struct rx_nfa_edge * rx_nfa_edge (struct rx *rx,
1624 					  enum rx_nfa_etype type,
1625 					  struct rx_nfa_state *start,
1626 					  struct rx_nfa_state *dest);
1627 RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge * e);
1628 RX_DECL void rx_free_nfa (struct rx *rx);
1629 RX_DECL int rx_build_nfa (struct rx *rx,
1630 			  struct rexp_node *rexp,
1631 			  struct rx_nfa_state **start,
1632 			  struct rx_nfa_state **end);
1633 RX_DECL void rx_name_nfa_states (struct rx *rx);
1634 RX_DECL int rx_eclose_nfa (struct rx *rx);
1635 RX_DECL void rx_delete_epsilon_transitions (struct rx *rx);
1636 RX_DECL int rx_compactify_nfa (struct rx *rx,
1637 			       void **mem, unsigned long *size);
1638 RX_DECL void rx_release_superset (struct rx *rx,
1639 				  struct rx_superset *set);
1640 RX_DECL struct rx_superset * rx_superset_cons (struct rx * rx,
1641 					       struct rx_nfa_state *car, struct rx_superset *cdr);
1642 RX_DECL struct rx_superset * rx_superstate_eclosure_union
1643   (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl);
1644 RX_DECL struct rx_superstate * rx_superstate (struct rx *rx,
1645 					      struct rx_superset *set);
1646 RX_DECL struct rx_inx * rx_handle_cache_miss
1647   (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data);
1648 RX_DECL reg_errcode_t rx_compile (__const__ char *pattern, int size,
1649 				  reg_syntax_t syntax,
1650 				  struct re_pattern_buffer * rxb);
1651 RX_DECL void rx_blow_up_fastmap (struct re_pattern_buffer * rxb);
1652 #else /* STDC */
1653 RX_DECL int rx_bitset_is_equal ();
1654 RX_DECL int rx_bitset_is_subset ();
1655 RX_DECL int rx_bitset_empty ();
1656 RX_DECL void rx_bitset_null ();
1657 RX_DECL void rx_bitset_universe ();
1658 RX_DECL void rx_bitset_complement ();
1659 RX_DECL void rx_bitset_assign ();
1660 RX_DECL void rx_bitset_union ();
1661 RX_DECL void rx_bitset_intersection ();
1662 RX_DECL void rx_bitset_difference ();
1663 #if 0
1664 RX_DECL void rx_bitset_revdifference ();
1665 #endif
1666 #ifdef emacs
1667 RX_DECL void rx_bitset_xor ();
1668 #endif
1669 RX_DECL unsigned long rx_bitset_hash ();
1670 RX_DECL struct rx_hash_item * rx_hash_find ();
1671 RX_DECL struct rx_hash_item * rx_hash_store ();
1672 RX_DECL void rx_hash_free ();
1673 RX_DECL void rx_free_hash_table ();
1674 RX_DECL rx_Bitset rx_cset ();
1675 RX_DECL rx_Bitset rx_copy_cset ();
1676 RX_DECL void rx_free_cset ();
1677 RX_DECL struct rexp_node * rexp_node ();
1678 RX_DECL struct rexp_node * rx_mk_r_cset ();
1679 RX_DECL struct rexp_node * rx_mk_r_concat ();
1680 RX_DECL struct rexp_node * rx_mk_r_alternate ();
1681 RX_DECL struct rexp_node * rx_mk_r_opt ();
1682 RX_DECL struct rexp_node * rx_mk_r_star ();
1683 RX_DECL struct rexp_node * rx_mk_r_2phase_star ();
1684 RX_DECL struct rexp_node * rx_mk_r_side_effect ();
1685 #if 0
1686 RX_DECL struct rexp_node * rx_mk_r_data  ();
1687 #endif
1688 RX_DECL void rx_free_rexp ();
1689 RX_DECL struct rexp_node * rx_copy_rexp ();
1690 RX_DECL struct rx_nfa_state * rx_nfa_state ();
1691 RX_DECL void rx_free_nfa_state ();
1692 RX_DECL struct rx_nfa_state * rx_id_to_nfa_state ();
1693 RX_DECL struct rx_nfa_edge * rx_nfa_edge ();
1694 RX_DECL void rx_free_nfa_edge ();
1695 RX_DECL void rx_free_nfa ();
1696 RX_DECL int rx_build_nfa ();
1697 RX_DECL void rx_name_nfa_states ();
1698 RX_DECL int rx_eclose_nfa ();
1699 RX_DECL void rx_delete_epsilon_transitions ();
1700 RX_DECL int rx_compactify_nfa ();
1701 RX_DECL void rx_release_superset ();
1702 RX_DECL struct rx_superset * rx_superset_cons ();
1703 RX_DECL struct rx_superset * rx_superstate_eclosure_union ();
1704 RX_DECL struct rx_superstate * rx_superstate ();
1705 RX_DECL struct rx_inx * rx_handle_cache_miss ();
1706 RX_DECL reg_errcode_t rx_compile ();
1707 RX_DECL void rx_blow_up_fastmap ();
1708 #endif /* STDC */
1709 
1710 
1711 #endif /* RX_WANT_RX_DEFS */
1712 
1713 
1714 
1715 #if defined(__STDC__) || defined(MSDOS)
1716 extern int re_search_2 (struct re_pattern_buffer *rxb,
1717 			__const__ char * string1, int size1,
1718 			__const__ char * string2, int size2,
1719 			int startpos, int range,
1720 			struct re_registers *regs,
1721 			int stop);
1722 extern int re_search (struct re_pattern_buffer * rxb, __const__ char *string,
1723 		      int size, int startpos, int range,
1724 		      struct re_registers *regs);
1725 extern int re_match_2 (struct re_pattern_buffer * rxb,
1726 		       __const__ char * string1, int size1,
1727 		       __const__ char * string2, int size2,
1728 		       int pos, struct re_registers *regs, int stop);
1729 extern int re_match (struct re_pattern_buffer * rxb,
1730 		     __const__ char * string,
1731 		     int size, int pos,
1732 		     struct re_registers *regs);
1733 extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
1734 extern void re_set_registers (struct re_pattern_buffer *bufp,
1735 			      struct re_registers *regs,
1736 			      unsigned num_regs,
1737 			      regoff_t * starts, regoff_t * ends);
1738 extern __const__ char * re_compile_pattern (__const__ char *pattern,
1739 					int length,
1740 					struct re_pattern_buffer * rxb);
1741 extern int re_compile_fastmap (struct re_pattern_buffer * rxb);
1742 extern char * re_comp (__const__ char *s);
1743 extern int re_exec (__const__ char *s);
1744 extern int regcomp (regex_t * preg, __const__ char * pattern, int cflags);
1745 extern int regexec (__const__ regex_t *preg, __const__ char *string,
1746 		    size_t nmatch, regmatch_t pmatch[],
1747 		    int eflags);
1748 extern size_t regerror (int errcode, __const__ regex_t *preg,
1749 			char *errbuf, size_t errbuf_size);
1750 extern void regfree (regex_t *preg);
1751 
1752 #else /* STDC */
1753 extern int re_search_2 ();
1754 extern int re_search ();
1755 extern int re_match_2 ();
1756 extern int re_match ();
1757 extern reg_syntax_t re_set_syntax ();
1758 extern void re_set_registers ();
1759 extern __const__ char * re_compile_pattern ();
1760 extern int re_compile_fastmap ();
1761 extern char * re_comp ();
1762 extern int re_exec ();
1763 extern int regcomp ();
1764 extern int regexec ();
1765 extern size_t regerror ();
1766 extern void regfree ();
1767 
1768 #endif /* STDC */
1769 
1770 
1771 
1772 #ifdef RX_WANT_RX_DEFS
1773 
1774 #include "mbc.h"
1775 
1776 struct rx_counter_frame
1777 {
1778   int tag;
1779   int val;
1780   struct rx_counter_frame * inherited_from; /* If this is a copy. */
1781   struct rx_counter_frame * cdr;
1782 };
1783 
1784 struct rx_backtrack_frame
1785 {
1786   char * counter_stack_sp;
1787 
1788   /* A frame is used to save the matchers state when it crosses a
1789    * backtracking point.  The `stk_' fields correspond to variables
1790    * in re_search_2 (just strip off thes `stk_').  They are documented
1791    * tere.
1792    */
1793   struct rx_superstate * stk_super;
1794   unsigned int stk_c;
1795   struct rx_string_position stk_test_pos;
1796   int stk_last_l;
1797   int stk_last_r;
1798   int stk_test_ret;
1799 
1800   /* This is the list of options left to explore at the backtrack
1801    * point for which this frame was created.
1802    */
1803   struct rx_distinct_future * df;
1804   struct rx_distinct_future * first_df;
1805 
1806 #ifdef RX_DEBUG
1807    int stk_line_no;
1808 #endif
1809 };
1810 
1811 struct rx_stack_chunk
1812 {
1813   struct rx_stack_chunk * next_chunk;
1814   unsigned int bytes_left;
1815   char * sp;
1816 };
1817 
1818 enum rx_outer_entry
1819 {
1820   rx_outer_start,
1821   rx_outer_fastmap,
1822   rx_outer_test,
1823   rx_outer_restore_pos
1824 };
1825 
1826 enum rx_fastmap_return
1827 {
1828   rx_fastmap_continuation,
1829   rx_fastmap_error,
1830   rx_fastmap_ok,
1831   rx_fastmap_fail
1832 };
1833 
1834 enum rx_fastmap_entry
1835 {
1836   rx_fastmap_start,
1837   rx_fastmap_string_break
1838 };
1839 
1840 enum rx_test_return
1841 {
1842   rx_test_continuation,
1843   rx_test_error,
1844   rx_test_fail,
1845   rx_test_ok
1846 };
1847 
1848 enum rx_test_internal_return
1849 {
1850   rx_test_internal_error,
1851   rx_test_found_first,
1852   rx_test_line_finished
1853 };
1854 
1855 enum rx_test_match_entry
1856 {
1857   rx_test_start,
1858   rx_test_cache_hit_loop,
1859   rx_test_backreference_check,
1860   rx_test_backtrack_return
1861 };
1862 
1863 struct rx_search_state
1864 {
1865   /* Two groups of registers are kept.  The group with the register state
1866    * of the current test match, and the group that holds the state at the end
1867    * of the best known match, if any.
1868    *
1869    * For some patterns, there may also be registers saved on the stack.
1870    */
1871   unsigned num_regs;		/* Includes an element for register zero. */
1872   regoff_t * lparen;		/* scratch space for register returns */
1873   regoff_t * rparen;
1874   regoff_t * best_lpspace;	/* in case the user doesn't want these */
1875   regoff_t * best_rpspace;	/* values, we still need space to store
1876 				 * them.  Normally, this memoryis unused
1877 				 * and the space pointed to by REGS is
1878 				 * used instead.
1879 				 */
1880 
1881   int last_l;			/* Highest index of a valid lparen. */
1882   int last_r;			/* It's dual. */
1883 
1884   int * best_lparen;		/* This contains the best known register */
1885   int * best_rparen;		/* assignments.
1886 				 * This may point to the same mem as
1887 				 * best_lpspace, or it might point to memory
1888 				 * passed by the caller.
1889 				 */
1890   int best_last_l;		/* best_last_l:best_lparen::last_l:lparen */
1891   int best_last_r;
1892 
1893 
1894   unsigned char * translate;
1895 
1896   struct rx_string_position outer_pos;
1897 
1898   struct rx_superstate * start_super;
1899   int nfa_choice;
1900   int first_found;		/* If true, return after finding any match. */
1901   int ret_val;
1902 
1903   /* For continuations... */
1904   enum rx_outer_entry outer_search_resume_pt;
1905   struct re_pattern_buffer * saved_rxb;
1906   int saved_startpos;
1907   int saved_range;
1908   int saved_stop;
1909   int saved_total_size;
1910   rx_get_burst_fn saved_get_burst;
1911   rx_back_check_fn saved_back_check;
1912   struct re_registers * saved_regs;
1913 
1914   /**
1915    ** state for fastmap
1916    **/
1917   char * fastmap;
1918   int fastmap_chr;
1919   int fastmap_val;
1920 
1921   /* for continuations in the fastmap procedure: */
1922   enum rx_fastmap_entry fastmap_resume_pt;
1923 
1924   /**
1925    ** state for test_match
1926    **/
1927 
1928   /* The current superNFA position of the matcher. */
1929   struct rx_superstate * super;
1930 
1931   /* The matcher interprets a series of instruction frames.
1932    * This is the `instruction counter' for the interpretation.
1933    */
1934   struct rx_inx * ifr;
1935 
1936   /* We insert a ghost character in the string to prime
1937    * the nfa.  test_pos.pos, test_pos.str_half, and test_pos.end_half
1938    * keep track of the test-match position and string-half.
1939    */
1940   unsigned char c;
1941 
1942   /* Position within the string. */
1943   struct rx_string_position test_pos;
1944 
1945   struct rx_stack_chunk * counter_stack;
1946   struct rx_stack_chunk * backtrack_stack;
1947   int backtrack_frame_bytes;
1948   int chunk_bytes;
1949   struct rx_stack_chunk * free_chunks;
1950 
1951   /* To return from this function, set test_ret and
1952    * `goto test_do_return'.
1953    *
1954    * Possible return values are:
1955    *     1   --- end of string while the superNFA is still going
1956    *     0   --- internal error (out of memory)
1957    *	-1   --- search completed by reaching the superNFA fail state
1958    *    -2   --- a match was found, maybe not the longest.
1959    *
1960    * When the search is complete (-1), best_last_r indicates whether
1961    * a match was found.
1962    *
1963    * -2 is return only if search_state.first_found is non-zero.
1964    *
1965    * if search_state.first_found is non-zero, a return of -1 indicates no match,
1966    * otherwise, best_last_r has to be checked.
1967    */
1968   int test_ret;
1969 
1970   int could_have_continued;
1971 
1972 #ifdef RX_DEBUG
1973   int backtrack_depth;
1974   /* There is a search tree with every node as set of deterministic
1975    * transitions in the super nfa.  For every branch of a
1976    * backtrack point is an edge in the tree.
1977    * This counts up a pre-order of nodes in that tree.
1978    * It's saved on the search stack and printed when debugging.
1979    */
1980   int line_no;
1981   int lines_found;
1982 #endif
1983 
1984 
1985   /* For continuations within the match tester */
1986   enum rx_test_match_entry test_match_resume_pt;
1987   struct rx_inx * saved_next_tr_table;
1988   struct rx_inx * saved_this_tr_table;
1989   int saved_reg;
1990   struct rx_backtrack_frame * saved_bf;
1991 
1992 };
1993 
1994 
1995 extern char rx_slowmap[];
1996 extern unsigned char rx_id_translation[];
1997 
1998 static __inline__ void
init_fastmap(rxb,search_state)1999 init_fastmap (rxb, search_state)
2000      struct re_pattern_buffer * rxb;
2001      struct rx_search_state * search_state;
2002 {
2003   search_state->fastmap = (rxb->fastmap
2004 			   ? (char *)rxb->fastmap
2005 			   : (char *)rx_slowmap);
2006   /* Update the fastmap now if not correct already.
2007    * When the regexp was compiled, the fastmap was computed
2008    * and stored in a bitset.  This expands the bitset into a
2009    * character array containing 1s and 0s.
2010    */
2011   if ((search_state->fastmap == rxb->fastmap) && !rxb->fastmap_accurate)
2012     rx_blow_up_fastmap (rxb);
2013   search_state->fastmap_chr = -1;
2014   search_state->fastmap_val = 0;
2015   search_state->fastmap_resume_pt = rx_fastmap_start;
2016 }
2017 
2018 static __inline__ void
uninit_fastmap(rxb,search_state)2019 uninit_fastmap (rxb, search_state)
2020      struct re_pattern_buffer * rxb;
2021      struct rx_search_state * search_state;
2022 {
2023   /* Unset the fastmap sentinel */
2024   if (search_state->fastmap_chr >= 0)
2025     search_state->fastmap[search_state->fastmap_chr]
2026       = search_state->fastmap_val;
2027 }
2028 
2029 static __inline__ int
fastmap_search(rxb,stop,get_burst,app_closure,search_state)2030 fastmap_search (rxb, stop, get_burst, app_closure, search_state)
2031      struct re_pattern_buffer * rxb;
2032      int stop;
2033      rx_get_burst_fn get_burst;
2034      void * app_closure;
2035      struct rx_search_state * search_state;
2036 {
2037   enum rx_fastmap_entry pc;
2038 
2039   if (0)
2040     {
2041     return_continuation:
2042       search_state->fastmap_resume_pt = pc;
2043       return rx_fastmap_continuation;
2044     }
2045 
2046   pc = search_state->fastmap_resume_pt;
2047 
2048   switch (pc)
2049     {
2050     default:
2051       return rx_fastmap_error;
2052     case rx_fastmap_start:
2053     init_fastmap_sentinal:
2054       /* For the sake of fast fastmapping, set a sentinal in the fastmap.
2055        * This sentinal will trap the fastmap loop when it reaches the last
2056        * valid character in a string half.
2057        *
2058        * This must be reset when the fastmap/search loop crosses a string
2059        * boundry, and before returning to the caller.  So sometimes,
2060        * the fastmap loop is restarted with `continue', othertimes by
2061        * `goto init_fastmap_sentinal'.
2062        */
2063       if (search_state->outer_pos.size)
2064 	{
2065 	  search_state->fastmap_chr = ((search_state->outer_pos.search_direction > 0)
2066 				       ? *(search_state->outer_pos.end - 1)
2067 				       : *search_state->outer_pos.string);
2068 	  search_state->fastmap_val
2069 	    = search_state->fastmap[search_state->fastmap_chr];
2070 	  search_state->fastmap[search_state->fastmap_chr] = 1;
2071 	}
2072       else
2073 	{
2074 	  search_state->fastmap_chr = -1;
2075 	  search_state->fastmap_val = 0;
2076 	}
2077 
2078       if (search_state->outer_pos.pos >= search_state->outer_pos.end)
2079 	goto fastmap_hit_bound;
2080       else
2081 	{
2082 	  if (search_state->outer_pos.search_direction > 0)
2083 	    {
2084 	      if (search_state->fastmap_val)
2085 		{
2086 		  for (;;)
2087 		    {
2088 		      while (!search_state->fastmap[*search_state->outer_pos.pos]) {
2089 			if (ismbchar (*search_state->outer_pos.pos)) {
2090 			  search_state->outer_pos.pos += 2;
2091 			  if (search_state->outer_pos.pos == search_state->outer_pos.end)
2092 			    goto fastmap_hit_bound;
2093 			  continue;
2094 			}
2095 			++search_state->outer_pos.pos;
2096 		      }
2097 		      return rx_fastmap_ok;
2098 		    }
2099 		}
2100 	      else
2101 		{
2102 		  for (;;)
2103 		    {
2104 		      while (!search_state->fastmap[*search_state->outer_pos.pos]) {
2105 			if (ismbchar (*search_state->outer_pos.pos)) {
2106 			  search_state->outer_pos.pos += 2;
2107 			  if (search_state->outer_pos.pos == search_state->outer_pos.end)
2108 			    goto fastmap_hit_bound;
2109 			  continue;
2110 			}
2111 			++search_state->outer_pos.pos;
2112 		      }
2113 		      if (*search_state->outer_pos.pos != search_state->fastmap_chr)
2114 			return rx_fastmap_ok;
2115 		      else
2116 			{
2117 			  ++search_state->outer_pos.pos;
2118 			  if (search_state->outer_pos.pos == search_state->outer_pos.end)
2119 			    goto fastmap_hit_bound;
2120 			}
2121 		    }
2122 		}
2123 	    }
2124 	  else
2125 	    {
2126 	      __const__ unsigned char * bound;
2127 	      bound = search_state->outer_pos.string - 1;
2128 	      if (search_state->fastmap_val)
2129 		{
2130 		  for (;;)
2131 		    {
2132 		      while (!search_state->fastmap[*search_state->outer_pos.pos])
2133 			--search_state->outer_pos.pos;
2134 		      return rx_fastmap_ok;
2135 		    }
2136 		}
2137 	      else
2138 		{
2139 		  for (;;)
2140 		    {
2141 		      while (!search_state->fastmap[*search_state->outer_pos.pos])
2142 			--search_state->outer_pos.pos;
2143 		      if ((*search_state->outer_pos.pos != search_state->fastmap_chr) || search_state->fastmap_val)
2144 			return rx_fastmap_ok;
2145 		      else
2146 			{
2147 			  --search_state->outer_pos.pos;
2148 			  if (search_state->outer_pos.pos == bound)
2149 			    goto fastmap_hit_bound;
2150 			}
2151 		    }
2152 		}
2153 	    }
2154 	}
2155 
2156     case rx_fastmap_string_break:
2157     fastmap_hit_bound:
2158       {
2159 	/* If we hit a bound, it may be time to fetch another burst
2160 	 * of string, or it may be time to return a continuation to
2161  	 * the caller, or it might be time to fail.
2162 	 */
2163 
2164 	int burst_state;
2165 	burst_state = get_burst (&search_state->outer_pos, app_closure, stop);
2166 	switch (burst_state)
2167 	  {
2168 	  default:
2169 	  case rx_get_burst_error:
2170 	    return rx_fastmap_error;
2171 	  case rx_get_burst_continuation:
2172 	    {
2173 	      pc = rx_fastmap_string_break;
2174 	      goto return_continuation;
2175 	    }
2176 	  case rx_get_burst_ok:
2177 	    goto init_fastmap_sentinal;
2178 	  case rx_get_burst_no_more:
2179 	    /* ...not a string split, simply no more string.
2180 	     *
2181 	     * When searching backward, running out of string
2182 	     * is reason to quit.
2183 	     *
2184 	     * When searching forward, we allow the possibility
2185 	     * of an (empty) match after the last character in the
2186 	     * virtual string.  So, fall through to the matcher
2187 	     */
2188 	    return (  (search_state->outer_pos.search_direction > 0)
2189 		    ? rx_fastmap_ok
2190 		    : rx_fastmap_fail);
2191 	  }
2192       }
2193     }
2194 
2195 }
2196 
2197 
2198 
2199 #ifdef emacs
2200 /* The `emacs' switch turns on certain matching commands
2201  * that make sense only in Emacs.
2202  */
2203 #include "config.h"
2204 #include "lisp.h"
2205 #include "buffer.h"
2206 #include "syntax.h"
2207 #endif /* emacs */
2208 
2209 /* Setting RX_MEMDBUG is useful if you have dbmalloc.  Maybe with similar
2210  * packages too.
2211  */
2212 #ifdef RX_MEMDBUG
2213 #include <malloc.h>
2214 #endif /* RX_RX_MEMDBUG */
2215 
2216 /* We used to test for `BSTRING' here, but only GCC and Emacs define
2217  * `BSTRING', as far as I know, and neither of them use this code.
2218  */
2219 #if HAVE_STRING_H || STDC_HEADERS
2220 #include <string.h>
2221 
2222 #ifndef bcmp
2223 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
2224 #endif
2225 
2226 #ifndef bcopy
2227 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
2228 #endif
2229 
2230 #ifndef bzero
2231 #define bzero(s, n)	memset ((s), 0, (n))
2232 #endif
2233 
2234 #else /*  HAVE_STRING_H || STDC_HEADERS */
2235 #include <strings.h>
2236 #endif   /* not (HAVE_STRING_H || STDC_HEADERS) */
2237 
2238 #ifdef STDC_HEADERS
2239 #include <stdlib.h>
2240 #else /* not STDC_HEADERS */
2241 char *malloc ();
2242 char *realloc ();
2243 #endif /* not STDC_HEADERS */
2244 
2245 
2246 
2247 
2248 /* How many characters in the character set.  */
2249 #define CHAR_SET_SIZE (1 << CHARBITS)
2250 
2251 #ifndef emacs
2252 /* Define the syntax basics for \<, \>, etc.
2253  * This must be nonzero for the wordchar and notwordchar pattern
2254  * commands in re_match_2.
2255  */
2256 #ifndef Sword
2257 #define Sword 1
2258 #endif
2259 #define SYNTAX(c) re_syntax_table[c]
2260 RX_DECL char re_syntax_table[CHAR_SET_SIZE];
2261 #endif /* not emacs */
2262 
2263 
2264 /* Test if at very beginning or at very end of the virtual concatenation
2265  *  of `string1' and `string2'.  If only one string, it's `string2'.
2266  */
2267 
2268 #define AT_STRINGS_BEG() \
2269   (   -1		 \
2270    == ((search_state.test_pos.pos - search_state.test_pos.string) \
2271        + search_state.test_pos.offset))
2272 
2273 #define AT_STRINGS_END() \
2274   (   (total_size - 1)	 \
2275    == ((search_state.test_pos.pos - search_state.test_pos.string) \
2276        + search_state.test_pos.offset))
2277 
2278 
2279 /* Test if POS + 1 points to a character which is word-constituent.  We have
2280  * two special cases to check for: if past the end of string1, look at
2281  * the first character in string2; and if before the beginning of
2282  * string2, look at the last character in string1.
2283  *
2284  * Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG ().
2285  */
2286 #define LETTER_P(POS,OFF)						\
2287   (   SYNTAX (fetch_char(POS, OFF, app_closure, stop))			\
2288    == Sword)
2289 
2290 /* Test if the character at D and the one after D differ with respect
2291  * to being word-constituent.
2292  */
2293 #define AT_WORD_BOUNDARY(d)						\
2294   (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d,0) != LETTER_P (d, 1))
2295 
2296 
2297 #ifdef RX_SUPPORT_CONTINUATIONS
2298 #define RX_STACK_ALLOC(BYTES) malloc(BYTES)
2299 #define RX_STACK_FREE(MEM) free(MEM)
2300 #else
2301 #define RX_STACK_ALLOC(BYTES) alloca(BYTES)
2302 #define RX_STACK_FREE(MEM) \
2303       ((struct rx_stack_chunk *)MEM)->next_chunk = search_state.free_chunks; \
2304       search_state.free_chunks = ((struct rx_stack_chunk *)MEM);
2305 
2306 #endif
2307 
2308 #define PUSH(CHUNK_VAR,BYTES)   \
2309   if (!CHUNK_VAR || (CHUNK_VAR->bytes_left < (BYTES)))  \
2310     {					\
2311       struct rx_stack_chunk * new_chunk;	\
2312       if (search_state.free_chunks)			\
2313 	{				\
2314 	  new_chunk = search_state.free_chunks;	\
2315 	  search_state.free_chunks = search_state.free_chunks->next_chunk; \
2316 	}				\
2317       else				\
2318 	{				\
2319 	  new_chunk = (struct rx_stack_chunk *)RX_STACK_ALLOC(search_state.chunk_bytes); \
2320 	  if (!new_chunk)		\
2321 	    {				\
2322 	      search_state.ret_val = 0;		\
2323 	      goto test_do_return;	\
2324 	    }				\
2325 	}				\
2326       new_chunk->sp = (char *)new_chunk + sizeof (struct rx_stack_chunk); \
2327       new_chunk->bytes_left = (search_state.chunk_bytes \
2328 			       - (BYTES) \
2329 			       - sizeof (struct rx_stack_chunk)); \
2330       new_chunk->next_chunk = CHUNK_VAR; \
2331       CHUNK_VAR = new_chunk;		\
2332     } \
2333   else \
2334     (CHUNK_VAR->sp += (BYTES)), (CHUNK_VAR->bytes_left -= (BYTES))
2335 
2336 #define POP(CHUNK_VAR,BYTES) \
2337   if (CHUNK_VAR->sp == ((char *)CHUNK_VAR + sizeof(*CHUNK_VAR))) \
2338     { \
2339       struct rx_stack_chunk * new_chunk = CHUNK_VAR->next_chunk; \
2340       RX_STACK_FREE(CHUNK_VAR); \
2341       CHUNK_VAR = new_chunk; \
2342     } \
2343   else \
2344     (CHUNK_VAR->sp -= BYTES), (CHUNK_VAR->bytes_left += BYTES)
2345 
2346 
2347 
2348 #define SRCH_TRANSLATE(C)  search_state.translate[(unsigned char) (C)]
2349 
2350 
2351 
2352 
2353 #if defined(__STDC__) || defined(MSDOS)
2354 RX_DECL __inline__ int
rx_search(struct re_pattern_buffer * rxb,int startpos,int range,int stop,int total_size,rx_get_burst_fn get_burst,rx_back_check_fn back_check,rx_fetch_char_fn fetch_char,void * app_closure,struct re_registers * regs,struct rx_search_state * resume_state,struct rx_search_state * save_state)2355 rx_search  (struct re_pattern_buffer * rxb,
2356 	    int startpos,
2357 	    int range,
2358 	    int stop,
2359 	    int total_size,
2360 	    rx_get_burst_fn get_burst,
2361 	    rx_back_check_fn back_check,
2362 	    rx_fetch_char_fn fetch_char,
2363 	    void * app_closure,
2364 	    struct re_registers * regs,
2365 	    struct rx_search_state * resume_state,
2366 	    struct rx_search_state * save_state)
2367 #else
2368 RX_DECL __inline__ int
2369 rx_search  (rxb, startpos, range, stop, total_size,
2370 	    get_burst, back_check, fetch_char,
2371 	    app_closure, regs, resume_state, save_state)
2372      struct re_pattern_buffer * rxb;
2373      int startpos;
2374      int range;
2375      int stop;
2376      int total_size;
2377      rx_get_burst_fn get_burst;
2378      rx_back_check_fn back_check;
2379      rx_fetch_char_fn fetch_char;
2380      void * app_closure;
2381      struct re_registers * regs;
2382      struct rx_search_state * resume_state;
2383      struct rx_search_state * save_state;
2384 #endif
2385 {
2386   int pc;
2387   int test_state;
2388   struct rx_search_state search_state;
2389 
2390   search_state.free_chunks = 0;
2391   if (!resume_state)
2392     pc = rx_outer_start;
2393   else
2394     {
2395       search_state = *resume_state;
2396       regs = search_state.saved_regs;
2397       rxb = search_state.saved_rxb;
2398       startpos = search_state.saved_startpos;
2399       range = search_state.saved_range;
2400       stop = search_state.saved_stop;
2401       total_size = search_state.saved_total_size;
2402       get_burst = search_state.saved_get_burst;
2403       back_check = search_state.saved_back_check;
2404       pc = search_state.outer_search_resume_pt;
2405       if (0)
2406 	{
2407 	return_continuation:
2408 	  if (save_state)
2409 	    {
2410 	      *save_state = search_state;
2411 	      save_state->saved_regs = regs;
2412 	      save_state->saved_rxb = rxb;
2413 	      save_state->saved_startpos = startpos;
2414 	      save_state->saved_range = range;
2415 	      save_state->saved_stop = stop;
2416 	      save_state->saved_total_size = total_size;
2417 	      save_state->saved_get_burst = get_burst;
2418 	      save_state->saved_back_check = back_check;
2419 	      save_state->outer_search_resume_pt = pc;
2420 	    }
2421 	  return rx_search_continuation;
2422 	}
2423     }
2424 
2425   switch (pc)
2426     {
2427     case rx_outer_start:
2428       search_state.ret_val = rx_search_fail;
2429       (  search_state.lparen
2430        = search_state.rparen
2431        = search_state.best_lpspace
2432        = search_state.best_rpspace
2433        = 0);
2434 
2435       /* figure the number of registers we may need for use in backreferences.
2436        * the number here includes an element for register zero.
2437        */
2438       search_state.num_regs = rxb->re_nsub + 1;
2439 
2440 
2441       /* check for out-of-range startpos.  */
2442       if ((startpos < 0) || (startpos > total_size))
2443 	return rx_search_fail;
2444 
2445       /* fix up range if it might eventually take us outside the string. */
2446       {
2447 	int endpos;
2448 	endpos = startpos + range;
2449 	if (endpos < -1)
2450 	  range = (-1 - startpos);
2451 	else if (endpos > (total_size + 1))
2452 	  range = total_size - startpos;
2453       }
2454 
2455       /* if the search isn't to be a backwards one, don't waste time in a
2456        * long search for a pattern that says it is anchored.
2457        */
2458       if (rxb->begbuf_only && (range > 0))
2459 	{
2460 	  if (startpos > 0)
2461 	    return rx_search_fail;
2462 	  else
2463 	    range = 1;
2464 	}
2465 
2466       /* decide whether to use internal or user-provided reg buffers. */
2467       if (!regs || rxb->no_sub)
2468 	{
2469 	  search_state.best_lpspace =
2470 	    (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
2471 	  search_state.best_rpspace =
2472 	    (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
2473 	  search_state.best_lparen = search_state.best_lpspace;
2474 	  search_state.best_rparen = search_state.best_rpspace;
2475 	}
2476       else
2477 	{
2478 	  /* have the register data arrays been allocated?  */
2479 	  if (rxb->regs_allocated == REGS_UNALLOCATED)
2480 	    { /* no.  so allocate them with malloc.  we need one
2481 		 extra element beyond `search_state.num_regs' for the `-1' marker
2482 		 gnu code uses.  */
2483 	      regs->num_regs = MAX (RE_NREGS, rxb->re_nsub + 1);
2484 	      regs->start = ((regoff_t *)
2485 			     malloc (regs->num_regs * sizeof ( regoff_t)));
2486 	      regs->end = ((regoff_t *)
2487 			   malloc (regs->num_regs * sizeof ( regoff_t)));
2488 	      if (regs->start == 0 || regs->end == 0)
2489 		return rx_search_error;
2490 	      rxb->regs_allocated = REGS_REALLOCATE;
2491 	    }
2492 	  else if (rxb->regs_allocated == REGS_REALLOCATE)
2493 	    { /* yes.  if we need more elements than were already
2494 		 allocated, reallocate them.  if we need fewer, just
2495 		 leave it alone.  */
2496 	      if (regs->num_regs < search_state.num_regs + 1)
2497 		{
2498 		  regs->num_regs = search_state.num_regs + 1;
2499 		  regs->start = ((regoff_t *)
2500 				 realloc (regs->start,
2501 					  regs->num_regs * sizeof (regoff_t)));
2502 		  regs->end = ((regoff_t *)
2503 			       realloc (regs->end,
2504 					regs->num_regs * sizeof ( regoff_t)));
2505 		  if (regs->start == 0 || regs->end == 0)
2506 		    return rx_search_error;
2507 		}
2508 	    }
2509 	  else if (rxb->regs_allocated != REGS_FIXED)
2510 	    return rx_search_error;
2511 
2512 	  if (regs->num_regs < search_state.num_regs + 1)
2513 	    {
2514 	      search_state.best_lpspace =
2515 		((regoff_t *)
2516 		 REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)));
2517 	      search_state.best_rpspace =
2518 		((regoff_t *)
2519 		 REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)));
2520 	      search_state.best_lparen = search_state.best_lpspace;
2521 	      search_state.best_rparen = search_state.best_rpspace;
2522 	    }
2523 	  else
2524 	    {
2525 	      search_state.best_lparen = regs->start;
2526 	      search_state.best_rparen = regs->end;
2527 	    }
2528 	}
2529 
2530       search_state.lparen =
2531 	(regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
2532       search_state.rparen =
2533 	(regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
2534 
2535       if (! (   search_state.best_rparen
2536 	     && search_state.best_lparen
2537 	     && search_state.lparen && search_state.rparen))
2538 	return rx_search_error;
2539 
2540       search_state.best_last_l = search_state.best_last_r = -1;
2541 
2542       search_state.translate = (rxb->translate
2543 				? rxb->translate
2544 				: rx_id_translation);
2545 
2546 
2547 
2548       /*
2549        * two nfa's were compiled.
2550        * `0' is complete.
2551        * `1' faster but gets registers wrong and ends too soon.
2552        */
2553       search_state.nfa_choice = (regs && !rxb->least_subs) ? '\0' : '\1';
2554 
2555       /* we have the option to look for the best match or the first
2556        * one we can find.  if the user isn't asking for register information,
2557        * we don't need to find the best match.
2558        */
2559       search_state.first_found = !regs;
2560 
2561       if (range >= 0)
2562 	{
2563 	  search_state.outer_pos.search_end = startpos + range;
2564 	  search_state.outer_pos.search_direction = 1;
2565 	}
2566       else
2567 	{
2568 	  search_state.outer_pos.search_end = startpos + range;
2569 	  search_state.outer_pos.search_direction = -1;
2570 	}
2571 
2572       /* the vacuous search always turns up nothing. */
2573       if ((search_state.outer_pos.search_direction > 0)
2574 	  ? (startpos > search_state.outer_pos.search_end)
2575 	  : (startpos < search_state.outer_pos.search_end))
2576 	return rx_search_fail;
2577 
2578       /* now we build the starting state of the supernfa. */
2579       {
2580 	struct rx_superset * start_contents;
2581 	struct rx_nfa_state_set * start_nfa_set;
2582 
2583 	/* we presume here that the nfa start state has only one
2584 	 * possible future with no side effects.
2585 	 */
2586 	start_nfa_set = rxb->start->futures->destset;
2587 	if (   rxb->rx.start_set
2588 	    && (rxb->rx.start_set->starts_for == &rxb->rx))
2589 	  start_contents = rxb->rx.start_set;
2590 	else
2591 	  {
2592 	    start_contents =
2593 	      rx_superstate_eclosure_union (&rxb->rx,
2594 					    rx_superset_cons (&rxb->rx, 0, 0),
2595 					    start_nfa_set);
2596 
2597 	    if (!start_contents)
2598 	      return rx_search_fail;
2599 
2600 	    start_contents->starts_for = &rxb->rx;
2601 	    rxb->rx.start_set = start_contents;
2602 	  }
2603 	if (   start_contents->superstate
2604 	    && (start_contents->superstate->rx_id == rxb->rx.rx_id))
2605 	  {
2606 	    search_state.start_super = start_contents->superstate;
2607 	    rx_lock_superstate (&rxb->rx, search_state.start_super);
2608 	  }
2609 	else
2610 	  {
2611 	    rx_protect_superset (&rxb->rx, start_contents);
2612 
2613 	    search_state.start_super = rx_superstate (&rxb->rx, start_contents);
2614 	    if (!search_state.start_super)
2615 	      return rx_search_fail;
2616 	    rx_lock_superstate (&rxb->rx, search_state.start_super);
2617 	    rx_release_superset (&rxb->rx, start_contents);
2618 	  }
2619       }
2620 
2621 
2622       /* The outer_pos tracks the position within the strings
2623        * as seen by loop that calls fastmap_search.
2624        *
2625        * The caller supplied get_burst function actually
2626        * gives us pointers to chars.
2627        *
2628        * Communication with the get_burst function is through an
2629        * rx_string_position structure.  Here, the structure for
2630        * outer_pos is initialized.   It is set to point to the
2631        * NULL string, at an offset of STARTPOS.  STARTPOS is out
2632        * of range of the NULL string, so the first call to
2633        * getburst will patch up the rx_string_position to point
2634        * to valid characters.
2635        */
2636 
2637       (  search_state.outer_pos.string
2638        = search_state.outer_pos.end
2639        = 0);
2640 
2641       search_state.outer_pos.offset = 0;
2642       search_state.outer_pos.size = 0;
2643       search_state.outer_pos.pos = (unsigned char *)startpos;
2644       init_fastmap (rxb, &search_state);
2645 
2646       search_state.fastmap_resume_pt = rx_fastmap_start;
2647     case rx_outer_fastmap:
2648       /* do { */
2649     pseudo_do:
2650       {
2651 	{
2652 	  int fastmap_state;
2653 	  fastmap_state = fastmap_search (rxb, stop, get_burst, app_closure,
2654 					  &search_state);
2655 	  switch (fastmap_state)
2656 	    {
2657 	    case rx_fastmap_continuation:
2658 	      pc = rx_outer_fastmap;
2659 	      goto return_continuation;
2660 	    case rx_fastmap_fail:
2661 	      goto finish;
2662 	    case rx_fastmap_ok:
2663 	      break;
2664 	    }
2665 	}
2666 
2667 	/* now the fastmap loop has brought us to a plausible
2668 	 * starting point for a match.  so, it's time to run the
2669 	 * nfa and see if a match occured.
2670 	 */
2671 	startpos = (  search_state.outer_pos.pos
2672 		    - search_state.outer_pos.string
2673 		    + search_state.outer_pos.offset);
2674 #if 0
2675 /*|*/	if ((range > 0) && (startpos == search_state.outer_pos.search_end))
2676 /*|*/	  goto finish;
2677 #endif
2678       }
2679 
2680       search_state.test_match_resume_pt = rx_test_start;
2681       /* do interrupted for entry point... */
2682     case rx_outer_test:
2683       /* ...do continued */
2684       {
2685 	goto test_match;
2686       test_returns_to_search:
2687 	switch (test_state)
2688 	  {
2689 	  case rx_test_continuation:
2690 	    pc = rx_outer_test;
2691 	    goto return_continuation;
2692 	  case rx_test_error:
2693 	    search_state.ret_val = rx_search_error;
2694 	    goto finish;
2695 	  case rx_test_fail:
2696 	    break;
2697 	  case rx_test_ok:
2698 	    goto finish;
2699 	  }
2700 	if (search_state.outer_pos.search_direction > 0)
2701 	  {
2702 	    if (ismbchar (*search_state.outer_pos.pos))
2703 	      {
2704 		search_state.outer_pos.pos++;
2705 		startpos++;
2706 	      }
2707 	    search_state.outer_pos.pos++;
2708 	    startpos++;
2709 	  }
2710 	else
2711 	  {
2712 	    __const__ unsigned char *p;
2713 	    search_state.outer_pos.pos--;
2714 	    startpos--;
2715 	    for (p = search_state.outer_pos.pos;
2716 		 p-- > search_state.outer_pos.string && ismbchar (*p);
2717 		 /* no-op */)
2718 	      ;
2719 	    if (!((search_state.outer_pos.pos - p) & 1))
2720 	      {
2721 		search_state.outer_pos.pos--;
2722 		startpos--;
2723 	      }
2724 	  }
2725 #if 0
2726 /*|*/	if (search_state.test_pos.pos < search_state.test_pos.end)
2727 /*|*/	  break;
2728 #endif
2729       }
2730       /* do interrupted for entry point... */
2731     case rx_outer_restore_pos:
2732       {
2733 	int x;
2734 	x = get_burst (&search_state.outer_pos, app_closure, stop);
2735 	switch (x)
2736 	  {
2737 	  case rx_get_burst_continuation:
2738 	    pc = rx_outer_restore_pos;
2739 	    goto return_continuation;
2740 	  case rx_get_burst_error:
2741 	    search_state.ret_val = rx_search_error;
2742 	    goto finish;
2743 	  case rx_get_burst_no_more:
2744 	    if (rxb->can_match_empty)
2745 	      break;
2746 	    goto finish;
2747 	  case rx_get_burst_ok:
2748 	    break;
2749 	  }
2750       } /* } while (...see below...) */
2751 
2752       if ((search_state.outer_pos.search_direction > 0)
2753 	  ? (startpos <= search_state.outer_pos.search_end)
2754 	  : (startpos > search_state.outer_pos.search_end))
2755 	goto pseudo_do;
2756 
2757 
2758     finish:
2759       uninit_fastmap (rxb, &search_state);
2760       if (search_state.start_super)
2761 	rx_unlock_superstate (&rxb->rx, search_state.start_super);
2762 
2763 #ifdef regex_malloc
2764       if (search_state.lparen) free (search_state.lparen);
2765       if (search_state.rparen) free (search_state.rparen);
2766       if (search_state.best_lpspace) free (search_state.best_lpspace);
2767       if (search_state.best_rpspace) free (search_state.best_rpspace);
2768 #endif
2769       return search_state.ret_val;
2770     }
2771 
2772 
2773  test_match:
2774   {
2775     enum rx_test_match_entry test_pc;
2776     int inx;
2777     test_pc = search_state.test_match_resume_pt;
2778     if (test_pc == rx_test_start)
2779       {
2780 #ifdef RX_DEBUG
2781 	search_state.backtrack_depth = 0;
2782 #endif
2783 	search_state.last_l = search_state.last_r = 0;
2784 	search_state.lparen[0] = startpos;
2785 	search_state.super = search_state.start_super;
2786 	search_state.c = search_state.nfa_choice;
2787 	search_state.test_pos.pos = search_state.outer_pos.pos - 1;
2788 	search_state.test_pos.string = search_state.outer_pos.string;
2789 	search_state.test_pos.end = search_state.outer_pos.end;
2790 	search_state.test_pos.offset = search_state.outer_pos.offset;
2791 	search_state.test_pos.size = search_state.outer_pos.size;
2792 	search_state.test_pos.search_direction = 1;
2793 	search_state.counter_stack = 0;
2794 	search_state.backtrack_stack = 0;
2795 	search_state.backtrack_frame_bytes =
2796 	  (sizeof (struct rx_backtrack_frame)
2797 	   + (rxb->match_regs_on_stack
2798 	      ? sizeof (regoff_t) * (search_state.num_regs + 1) * 2
2799 	      : 0));
2800 	search_state.chunk_bytes = search_state.backtrack_frame_bytes * 64;
2801 	search_state.test_ret = rx_test_line_finished;
2802 	search_state.could_have_continued = 0;
2803       }
2804     /* This is while (1)...except that the body of the loop is interrupted
2805      * by some alternative entry points.
2806      */
2807   pseudo_while_1:
2808     switch (test_pc)
2809       {
2810       case rx_test_cache_hit_loop:
2811 	goto resume_continuation_1;
2812       case rx_test_backreference_check:
2813 	goto resume_continuation_2;
2814       case rx_test_backtrack_return:
2815 	goto resume_continuation_3;
2816       case rx_test_start:
2817 #ifdef RX_DEBUG
2818 	/* There is a search tree with every node as set of deterministic
2819 	 * transitions in the super nfa.  For every branch of a
2820 	 * backtrack point is an edge in the tree.
2821 	 * This counts up a pre-order of nodes in that tree.
2822 	 * It's saved on the search stack and printed when debugging.
2823 	 */
2824 	search_state.line_no = 0;
2825 	search_state.lines_found = 0;
2826 #endif
2827 
2828       top_of_cycle:
2829 	/* A superstate is basicly a transition table, indexed by
2830 	 * characters from the string being tested, and containing
2831 	 * RX_INX (`instruction frame') structures.
2832 	 */
2833 	search_state.ifr = &search_state.super->transitions [search_state.c];
2834 
2835       recurse_test_match:
2836 	/* This is the point to which control is sent when the
2837 	 * test matcher `recurses'.  Before jumping here, some variables
2838 	 * need to be saved on the stack and the next instruction frame
2839 	 * has to be computed.
2840 	 */
2841 
2842       restart:
2843 	/* Some instructions don't advance the matcher, but just
2844 	 * carry out some side effects and fetch a new instruction.
2845 	 * To dispatch that new instruction, `goto restart'.
2846 	 */
2847 
2848 	{
2849 	  struct rx_inx * next_tr_table;
2850 	  struct rx_inx * this_tr_table;
2851 	  /* The fastest route through the loop is when the instruction
2852 	   * is RX_NEXT_CHAR.  This case is detected when SEARCH_STATE.IFR->DATA
2853 	   * is non-zero.  In that case, it points to the next
2854 	   * superstate.
2855 	   *
2856 	   * This allows us to not bother fetching the bytecode.
2857 	   */
2858 	  next_tr_table = (struct rx_inx *)search_state.ifr->data;
2859 	  this_tr_table = search_state.super->transitions;
2860 	  while (next_tr_table)
2861 	    {
2862 #ifdef RX_DEBUG_0
2863 	      if (rx_debug_trace)
2864 		{
2865 		  struct rx_superset * setp;
2866 
2867 		  fprintf (stderr, "%d %d>> re_next_char @ %d (%d)",
2868 			   search_state.line_no,
2869 			   search_state.backtrack_depth,
2870 			   (search_state.test_pos.pos - search_state.test_pos.string
2871 			    + search_state.test_pos.offset), search_state.c);
2872 
2873 		  search_state.super =
2874 		    ((struct rx_superstate *)
2875 		     ((char *)this_tr_table
2876 		      - ((unsigned long)
2877 			 ((struct rx_superstate *)0)->transitions)));
2878 
2879 		  setp = search_state.super->contents;
2880 		  fprintf (stderr, "   superstet (rx=%d, &=%x: ",
2881 			   rxb->rx.rx_id, setp);
2882 		  while (setp)
2883 		    {
2884 		      fprintf (stderr, "%d ", setp->id);
2885 		      setp = setp->cdr;
2886 		    }
2887 		  fprintf (stderr, "\n");
2888 		}
2889 #endif
2890 	      this_tr_table = next_tr_table;
2891 	      ++search_state.test_pos.pos;
2892 	      if (search_state.test_pos.pos == search_state.test_pos.end)
2893 		{
2894 		  int burst_state;
2895 		try_burst_1:
2896 		  burst_state = get_burst (&search_state.test_pos,
2897 					   app_closure, stop);
2898 		  switch (burst_state)
2899 		    {
2900 		    case rx_get_burst_continuation:
2901 		      search_state.saved_this_tr_table = this_tr_table;
2902 		      search_state.saved_next_tr_table = next_tr_table;
2903 		      test_pc = rx_test_cache_hit_loop;
2904 		      goto test_return_continuation;
2905 
2906 		    resume_continuation_1:
2907 		      /* Continuation one jumps here to do its work: */
2908 		      search_state.saved_this_tr_table = this_tr_table;
2909 		      search_state.saved_next_tr_table = next_tr_table;
2910 		      goto try_burst_1;
2911 
2912 		    case rx_get_burst_ok:
2913 		      /* get_burst succeeded...keep going */
2914 		      break;
2915 
2916 		    case rx_get_burst_no_more:
2917 		      search_state.test_ret = rx_test_line_finished;
2918 		      search_state.could_have_continued = 1;
2919 		      goto test_do_return;
2920 
2921 		    case rx_get_burst_error:
2922 		      /* An error... */
2923 		      search_state.test_ret = rx_test_internal_error;
2924 		      goto test_do_return;
2925 		    }
2926 		}
2927 	      search_state.c = *search_state.test_pos.pos;
2928 	      search_state.ifr = this_tr_table + search_state.c;
2929 	      next_tr_table = (struct rx_inx *)search_state.ifr->data;
2930 	    } /* Fast loop through cached transition tables */
2931 
2932 	  /* Here when we ran out of cached next-char transitions.
2933 	   * So, it will be necessary to do a more expensive
2934 	   * dispatch on the current instruction.  The superstate
2935 	   * pointer is allowed to become invalid during next-char
2936 	   * transitions -- now we must bring it up to date.
2937 	   */
2938 	  search_state.super =
2939 	    ((struct rx_superstate *)
2940 	     ((char *)this_tr_table
2941 	      - ((unsigned long)
2942 		 ((struct rx_superstate *)0)->transitions)));
2943 	}
2944 
2945 	/* We've encountered an instruction other than next-char.
2946 	 * Dispatch that instruction:
2947 	 */
2948 	inx = (int)search_state.ifr->inx;
2949 #ifdef RX_DEBUG_0
2950 	if (rx_debug_trace)
2951 	  {
2952 	    struct rx_superset * setp = search_state.super->contents;
2953 
2954 	    fprintf (stderr, "%d %d>> %s @ %d (%d)", search_state.line_no,
2955 		     search_state.backtrack_depth,
2956 		     inx_names[inx],
2957 		     (search_state.test_pos.pos - search_state.test_pos.string
2958 		      + (test_pos.half == 0 ? 0 : size1)), search_state.c);
2959 
2960 	    fprintf (stderr, "   superstet (rx=%d, &=%x: ",
2961 		     rxb->rx.rx_id, setp);
2962 	    while (setp)
2963 	      {
2964 		fprintf (stderr, "%d ", setp->id);
2965 		setp = setp->cdr;
2966 	      }
2967 	    fprintf (stderr, "\n");
2968 	  }
2969 #endif
2970 	switch ((enum rx_opcode)inx)
2971 	  {
2972 	  case rx_do_side_effects:
2973 
2974 	    /*  RX_DO_SIDE_EFFECTS occurs when we cross epsilon
2975 	     *  edges associated with parentheses, backreferencing, etc.
2976 	     */
2977 	    {
2978 	      struct rx_distinct_future * df =
2979 		(struct rx_distinct_future *)search_state.ifr->data_2;
2980 	      struct rx_se_list * el = df->effects;
2981 	      /* Side effects come in lists.  This walks down
2982 	       * a list, dispatching.
2983 	       */
2984 	      while (el)
2985 		{
2986 		  long effect;
2987 		  effect = (long)el->car;
2988 		  if (effect < 0)
2989 		    {
2990 #ifdef RX_DEBUG_0
2991 		      if (rx_debug_trace)
2992 			{
2993 			  struct rx_superset * setp = search_state.super->contents;
2994 
2995 			  fprintf (stderr, "....%d %d>> %s\n", search_state.line_no,
2996 				   search_state.backtrack_depth,
2997 				   efnames[-effect]);
2998 			}
2999 #endif
3000 		      switch ((enum re_side_effects) effect)
3001 
3002 			{
3003 			case re_se_pushback:
3004 			  search_state.ifr = &df->future_frame;
3005 			  if (!search_state.ifr->data)
3006 			    {
3007 			      struct rx_superstate * sup;
3008 			      sup = search_state.super;
3009 			      rx_lock_superstate (rx, sup);
3010 			      if (!rx_handle_cache_miss (&rxb->rx,
3011 							 search_state.super,
3012 							 search_state.c,
3013 							 (search_state.ifr
3014 							  ->data_2)))
3015 				{
3016 				  rx_unlock_superstate (rx, sup);
3017 				  search_state.test_ret = rx_test_internal_error;
3018 				  goto test_do_return;
3019 				}
3020 			      rx_unlock_superstate (rx, sup);
3021 			    }
3022 			  /* --search_state.test_pos.pos; */
3023 			  search_state.c = 't';
3024 			  search_state.super
3025 			    = ((struct rx_superstate *)
3026 			       ((char *)search_state.ifr->data
3027 				- (long)(((struct rx_superstate *)0)
3028 					 ->transitions)));
3029 			  goto top_of_cycle;
3030 			  break;
3031 			case re_se_push0:
3032 			  {
3033 			    struct rx_counter_frame * old_cf
3034 			      = (search_state.counter_stack
3035 				 ? ((struct rx_counter_frame *)
3036 				    search_state.counter_stack->sp)
3037 				 : 0);
3038 			    struct rx_counter_frame * cf;
3039 			    PUSH (search_state.counter_stack,
3040 				  sizeof (struct rx_counter_frame));
3041 			    cf = ((struct rx_counter_frame *)
3042 				  search_state.counter_stack->sp);
3043 			    cf->tag = re_se_iter;
3044 			    cf->val = 0;
3045 			    cf->inherited_from = 0;
3046 			    cf->cdr = old_cf;
3047 			    break;
3048 			  }
3049 			case re_se_fail:
3050 			  goto test_do_return;
3051 			case re_se_begbuf:
3052 			  if (!AT_STRINGS_BEG ())
3053 			    goto test_do_return;
3054 			  break;
3055 			case re_se_endbuf:
3056 			  if (!AT_STRINGS_END ())
3057 			    goto test_do_return;
3058 			  break;
3059 			case re_se_wordbeg:
3060 			  if (   LETTER_P (&search_state.test_pos, 1)
3061 			      && (   AT_STRINGS_BEG()
3062 				  || !LETTER_P (&search_state.test_pos, 0)))
3063 			    break;
3064 			  else
3065 			    goto test_do_return;
3066 			case re_se_wordend:
3067 			  if (   !AT_STRINGS_BEG ()
3068 			      && LETTER_P (&search_state.test_pos, 0)
3069 			      && (AT_STRINGS_END ()
3070 				  || !LETTER_P (&search_state.test_pos, 1)))
3071 			    break;
3072 			  else
3073 			    goto test_do_return;
3074 			case re_se_wordbound:
3075 			  if (AT_WORD_BOUNDARY (&search_state.test_pos))
3076 			    break;
3077 			  else
3078 			    goto test_do_return;
3079 			case re_se_notwordbound:
3080 			  if (!AT_WORD_BOUNDARY (&search_state.test_pos))
3081 			    break;
3082 			  else
3083 			    goto test_do_return;
3084 			case re_se_hat:
3085 			  if (AT_STRINGS_BEG ())
3086 			    {
3087 			      if (rxb->not_bol)
3088 				goto test_do_return;
3089 			      else
3090 				break;
3091 			    }
3092 			  else
3093 			    {
3094 			      char pos_c = *search_state.test_pos.pos;
3095 			      if (   (SRCH_TRANSLATE (pos_c)
3096 				      == SRCH_TRANSLATE('\n'))
3097 				  && rxb->newline_anchor)
3098 				break;
3099 			      else
3100 				goto test_do_return;
3101 			    }
3102 			case re_se_dollar:
3103 			  if (AT_STRINGS_END ())
3104 			    {
3105 			      if (rxb->not_eol)
3106 				goto test_do_return;
3107 			      else
3108 				break;
3109 			    }
3110 			  else
3111 			    {
3112 			      if (   (   SRCH_TRANSLATE (fetch_char
3113 						    (&search_state.test_pos, 1,
3114 						     app_closure, stop))
3115 				      == SRCH_TRANSLATE ('\n'))
3116 				  && rxb->newline_anchor)
3117 				break;
3118 			      else
3119 				goto test_do_return;
3120 			    }
3121 
3122 			case re_se_try:
3123 			  /* This is the first side effect in every
3124 			   * expression.
3125 			   *
3126 			   *  FOR NO GOOD REASON...get rid of it...
3127 			   */
3128 			  break;
3129 
3130 			case re_se_pushpos:
3131 			  {
3132 			    int urhere =
3133 			      ((int)(search_state.test_pos.pos
3134 				     - search_state.test_pos.string)
3135 			       + search_state.test_pos.offset);
3136 			    struct rx_counter_frame * old_cf
3137 			      = (search_state.counter_stack
3138 				 ? ((struct rx_counter_frame *)
3139 				    search_state.counter_stack->sp)
3140 				 : 0);
3141 			    struct rx_counter_frame * cf;
3142 			    PUSH(search_state.counter_stack,
3143 				 sizeof (struct rx_counter_frame));
3144 			    cf = ((struct rx_counter_frame *)
3145 				  search_state.counter_stack->sp);
3146 			    cf->tag = re_se_pushpos;
3147 			    cf->val = urhere;
3148 			    cf->inherited_from = 0;
3149 			    cf->cdr = old_cf;
3150 			    break;
3151 			  }
3152 
3153 			case re_se_chkpos:
3154 			  {
3155 			    int urhere =
3156 			      ((int)(search_state.test_pos.pos
3157 				     - search_state.test_pos.string)
3158 			       + search_state.test_pos.offset);
3159 			    struct rx_counter_frame * cf
3160 			      = ((struct rx_counter_frame *)
3161 				 search_state.counter_stack->sp);
3162 			    if (cf->val == urhere)
3163 			      goto test_do_return;
3164 			    cf->val = urhere;
3165 			    break;
3166 			  }
3167 			  break;
3168 
3169 			case re_se_poppos:
3170 			  POP(search_state.counter_stack,
3171 			      sizeof (struct rx_counter_frame));
3172 			  break;
3173 
3174 
3175 			case re_se_at_dot:
3176 			case re_se_syntax:
3177 			case re_se_not_syntax:
3178 #ifdef emacs
3179 			  /*
3180 			   * this release lacks emacs support
3181 			   */
3182 #endif
3183 			  break;
3184 			case re_se_win:
3185 			case re_se_lparen:
3186 			case re_se_rparen:
3187 			case re_se_backref:
3188 			case re_se_iter:
3189 			case re_se_end_iter:
3190 			case re_se_tv:
3191 			case re_floogle_flap:
3192 			  search_state.ret_val = 0;
3193 			  goto test_do_return;
3194 			}
3195 		    }
3196 		  else
3197 		    {
3198 #ifdef RX_DEBUG_0
3199 		      if (rx_debug_trace)
3200 			fprintf (stderr, "....%d %d>> %s %d %d\n", search_state.line_no,
3201 				 search_state.backtrack_depth,
3202 				 efnames2[rxb->se_params [effect].se],
3203 				 rxb->se_params [effect].op1,
3204 				 rxb->se_params [effect].op2);
3205 #endif
3206 		      switch (rxb->se_params [effect].se)
3207 			{
3208 			case re_se_win:
3209 			  /* This side effect indicates that we've
3210 			   * found a match, though not necessarily the
3211 			   * best match.  This is a fancy assignment to
3212 			   * register 0 unless the caller didn't
3213 			   * care about registers.  In which case,
3214 			   * this stops the match.
3215 			   */
3216 			  {
3217 			    int urhere =
3218 			      ((int)(search_state.test_pos.pos
3219 				     - search_state.test_pos.string)
3220 			       + search_state.test_pos.offset);
3221 
3222 			    if (   (search_state.best_last_r < 0)
3223 				|| (urhere + 1 > search_state.best_rparen[0]))
3224 			      {
3225 				/* Record the best known and keep
3226 				 * looking.
3227 				 */
3228 				int x;
3229 				for (x = 0; x <= search_state.last_l; ++x)
3230 				  search_state.best_lparen[x] = search_state.lparen[x];
3231 				search_state.best_last_l = search_state.last_l;
3232 				for (x = 0; x <= search_state.last_r; ++x)
3233 				  search_state.best_rparen[x] = search_state.rparen[x];
3234 				search_state.best_rparen[0] = urhere + 1;
3235 				search_state.best_last_r = search_state.last_r;
3236 			      }
3237 			    /* If we're not reporting the match-length
3238 			     * or other register info, we need look no
3239 			     * further.
3240 			     */
3241 			    if (search_state.first_found)
3242 			      {
3243 				search_state.test_ret = rx_test_found_first;
3244 				goto test_do_return;
3245 			      }
3246 			  }
3247 			  break;
3248 			case re_se_lparen:
3249 			  {
3250 			    int urhere =
3251 			      ((int)(search_state.test_pos.pos
3252 				     - search_state.test_pos.string)
3253 			       + search_state.test_pos.offset);
3254 
3255 			    int reg = rxb->se_params [effect].op1;
3256 #if 0
3257 			    if (reg > search_state.last_l)
3258 #endif
3259 			      {
3260 				search_state.lparen[reg] = urhere + 1;
3261 				/* In addition to making this assignment,
3262 				 * we now know that lower numbered regs
3263 				 * that haven't already been assigned,
3264 				 * won't be.  We make sure they're
3265 				 * filled with -1, so they can be
3266 				 * recognized as unassigned.
3267 				 */
3268 				if (search_state.last_l < reg)
3269 				  while (++search_state.last_l < reg)
3270 				    search_state.lparen[search_state.last_l] = -1;
3271 			      }
3272 			    break;
3273 			  }
3274 
3275 			case re_se_rparen:
3276 			  {
3277 			    int urhere =
3278 			      ((int)(search_state.test_pos.pos
3279 				     - search_state.test_pos.string)
3280 			       + search_state.test_pos.offset);
3281 			    int reg = rxb->se_params [effect].op1;
3282 			    search_state.rparen[reg] = urhere + 1;
3283 			    if (search_state.last_r < reg)
3284 			      {
3285 				while (++search_state.last_r < reg)
3286 				  search_state.rparen[search_state.last_r]
3287 				    = -1;
3288 			      }
3289 			    break;
3290 			  }
3291 
3292 			case re_se_backref:
3293 			  {
3294 			    int reg = rxb->se_params [effect].op1;
3295 			    if (   reg > search_state.last_r
3296 				|| search_state.rparen[reg] < 0)
3297 			      goto test_do_return;
3298 
3299 			    {
3300 			      int backref_status;
3301 			    check_backreference:
3302 			      backref_status
3303 				= back_check (&search_state.test_pos,
3304 					      search_state.lparen[reg],
3305 					      search_state.rparen[reg],
3306 					      search_state.translate,
3307 					      app_closure,
3308 					      stop);
3309 			      switch (backref_status)
3310 				{
3311 				case rx_back_check_continuation:
3312 				  search_state.saved_reg = reg;
3313 				  test_pc = rx_test_backreference_check;
3314 				  goto test_return_continuation;
3315 				resume_continuation_2:
3316 				  reg = search_state.saved_reg;
3317 				  goto check_backreference;
3318 				case rx_back_check_fail:
3319 				  /* Fail */
3320 				  goto test_do_return;
3321 				case rx_back_check_pass:
3322 				  /* pass --
3323 				   * test_pos now advanced to last
3324 				   * char matched by backref
3325 				   */
3326 				  break;
3327 				}
3328 			    }
3329 			    break;
3330 			  }
3331 			case re_se_iter:
3332 			  {
3333 			    struct rx_counter_frame * csp
3334 			      = ((struct rx_counter_frame *)
3335 				 search_state.counter_stack->sp);
3336 			    if (csp->val == rxb->se_params[effect].op2)
3337 			      goto test_do_return;
3338 			    else
3339 			      ++csp->val;
3340 			    break;
3341 			  }
3342 			case re_se_end_iter:
3343 			  {
3344 			    struct rx_counter_frame * csp
3345 			      = ((struct rx_counter_frame *)
3346 				 search_state.counter_stack->sp);
3347 			    if (csp->val < rxb->se_params[effect].op1)
3348 			      goto test_do_return;
3349 			    else
3350 			      {
3351 				struct rx_counter_frame * source = csp;
3352 				while (source->inherited_from)
3353 				  source = source->inherited_from;
3354 				if (!source || !source->cdr)
3355 				  {
3356 				    POP(search_state.counter_stack,
3357 					sizeof(struct rx_counter_frame));
3358 				  }
3359 				else
3360 				  {
3361 				    source = source->cdr;
3362 				    csp->val = source->val;
3363 				    csp->tag = source->tag;
3364 				    csp->cdr = 0;
3365 				    csp->inherited_from = source;
3366 				  }
3367 			      }
3368 			    break;
3369 			  }
3370 			case re_se_tv:
3371 			  /* is a noop */
3372 			  break;
3373 			case re_se_try:
3374 			case re_se_pushback:
3375 			case re_se_push0:
3376 			case re_se_pushpos:
3377 			case re_se_chkpos:
3378 			case re_se_poppos:
3379 			case re_se_at_dot:
3380 			case re_se_syntax:
3381 			case re_se_not_syntax:
3382 			case re_se_begbuf:
3383 			case re_se_hat:
3384 			case re_se_wordbeg:
3385 			case re_se_wordbound:
3386 			case re_se_notwordbound:
3387 			case re_se_wordend:
3388 			case re_se_endbuf:
3389 			case re_se_dollar:
3390 			case re_se_fail:
3391 			case re_floogle_flap:
3392 			  search_state.ret_val = 0;
3393 			  goto test_do_return;
3394 			}
3395 		    }
3396 		  el = el->cdr;
3397 		}
3398 	      /* Now the side effects are done,
3399 	       * so get the next instruction.
3400 	       * and move on.
3401 	       */
3402 	      search_state.ifr = &df->future_frame;
3403 	      goto restart;
3404 	    }
3405 
3406 	  case rx_backtrack_point:
3407 	    {
3408 	      /* A backtrack point indicates that we've reached a
3409 	       * non-determinism in the superstate NFA.  This is a
3410 	       * loop that exhaustively searches the possibilities.
3411 	       *
3412 	       * A backtracking strategy is used.  We keep track of what
3413 	       * registers are valid so we can erase side effects.
3414 	       *
3415 	       * First, make sure there is some stack space to hold
3416 	       * our state.
3417 	       */
3418 
3419 	      struct rx_backtrack_frame * bf;
3420 
3421 	      PUSH(search_state.backtrack_stack,
3422 		   search_state.backtrack_frame_bytes);
3423 #ifdef RX_DEBUG_0
3424 	      ++search_state.backtrack_depth;
3425 #endif
3426 
3427 	      bf = ((struct rx_backtrack_frame *)
3428 		    search_state.backtrack_stack->sp);
3429 	      {
3430 		bf->stk_super = search_state.super;
3431 		/* We prevent the current superstate from being
3432 		 * deleted from the superstate cache.
3433 		 */
3434 		rx_lock_superstate (&rxb->rx, search_state.super);
3435 #ifdef RX_DEBUG_0
3436 		bf->stk_search_state.line_no = search_state.line_no;
3437 #endif
3438 		bf->stk_c = search_state.c;
3439 		bf->stk_test_pos = search_state.test_pos;
3440 		bf->stk_last_l = search_state.last_l;
3441 		bf->stk_last_r = search_state.last_r;
3442 		bf->df = ((struct rx_super_edge *)
3443 			  search_state.ifr->data_2)->options;
3444 		bf->first_df = bf->df;
3445 		bf->counter_stack_sp = (search_state.counter_stack
3446 					? search_state.counter_stack->sp
3447 					: 0);
3448 		bf->stk_test_ret = search_state.test_ret;
3449 		if (rxb->match_regs_on_stack)
3450 		  {
3451 		    int x;
3452 		    regoff_t * stk =
3453 		      (regoff_t *)((char *)bf + sizeof (*bf));
3454 		    for (x = 0; x <= search_state.last_l; ++x)
3455 		      stk[x] = search_state.lparen[x];
3456 		    stk += x;
3457 		    for (x = 0; x <= search_state.last_r; ++x)
3458 		      stk[x] = search_state.rparen[x];
3459 		  }
3460 	      }
3461 
3462 	      /* Here is a while loop whose body is mainly a function
3463 	       * call and some code to handle a return from that
3464 	       * function.
3465 	       *
3466 	       * From here on for the rest of `case backtrack_point' it
3467 	       * is unsafe to assume that the search_state copies of
3468 	       * variables saved on the backtracking stack are valid
3469 	       * -- so read their values from the backtracking stack.
3470 	       *
3471 	       * This lets us use one generation fewer stack saves in
3472 	       * the call-graph of a search.
3473 	       */
3474 
3475 	    while_non_det_options:
3476 #ifdef RX_DEBUG_0
3477 	      ++search_state.lines_found;
3478 	      if (rx_debug_trace)
3479 		fprintf (stderr, "@@@ %d calls %d @@@\n",
3480 			 search_state.line_no, search_state.lines_found);
3481 
3482 	      search_state.line_no = search_state.lines_found;
3483 #endif
3484 
3485 	      if (bf->df->next_same_super_edge[0] == bf->first_df)
3486 		{
3487 		  /* This is a tail-call optimization -- we don't recurse
3488 		   * for the last of the possible futures.
3489 		   */
3490 		  search_state.ifr = (bf->df->effects
3491 				      ? &bf->df->side_effects_frame
3492 				      : &bf->df->future_frame);
3493 
3494 		  rx_unlock_superstate (&rxb->rx, search_state.super);
3495 		  POP(search_state.backtrack_stack,
3496 		      search_state.backtrack_frame_bytes);
3497 #ifdef RX_DEBUG
3498 		  --search_state.backtrack_depth;
3499 #endif
3500 		  goto restart;
3501 		}
3502 	      else
3503 		{
3504 		  if (search_state.counter_stack)
3505 		    {
3506 		      struct rx_counter_frame * old_cf
3507 			= ((struct rx_counter_frame *)search_state.counter_stack->sp);
3508 		      struct rx_counter_frame * cf;
3509 		      PUSH(search_state.counter_stack, sizeof (struct rx_counter_frame));
3510 		      cf = ((struct rx_counter_frame *)search_state.counter_stack->sp);
3511 		      cf->tag = old_cf->tag;
3512 		      cf->val = old_cf->val;
3513 		      cf->inherited_from = old_cf;
3514 		      cf->cdr = 0;
3515 		    }
3516 		  /* `Call' this test-match block */
3517 		  search_state.ifr = (bf->df->effects
3518 				      ? &bf->df->side_effects_frame
3519 				      : &bf->df->future_frame);
3520 		  goto recurse_test_match;
3521 		}
3522 
3523 	      /* Returns in this block are accomplished by
3524 	       * goto test_do_return.  There are two cases.
3525 	       * If there is some search-stack left,
3526 	       * then it is a return from a `recursive' call.
3527 	       * If there is no search-stack left, then
3528 	       * we should return to the fastmap/search loop.
3529 	       */
3530 
3531 	    test_do_return:
3532 
3533 	      if (!search_state.backtrack_stack)
3534 		{
3535 #ifdef RX_DEBUG_0
3536 		  if (rx_debug_trace)
3537 		    fprintf (stderr, "!!! %d bails returning %d !!!\n",
3538 			     search_state.line_no, search_state.test_ret);
3539 #endif
3540 
3541 		  /* No more search-stack -- this test is done. */
3542 		  if (search_state.test_ret != rx_test_internal_error)
3543 		    goto return_from_test_match;
3544 		  else
3545 		    goto error_in_testing_match;
3546 		}
3547 
3548 	      /* Returning from a recursive call to
3549 	       * the test match block:
3550 	       */
3551 
3552 	      bf = ((struct rx_backtrack_frame *)
3553 		    search_state.backtrack_stack->sp);
3554 #ifdef RX_DEBUG_0
3555 	      if (rx_debug_trace)
3556 		fprintf (stderr, "+++ %d returns %d (to %d)+++\n",
3557 			 search_state.line_no,
3558 			 search_state.test_ret,
3559 			 bf->stk_search_state.line_no);
3560 #endif
3561 
3562 	      while (search_state.counter_stack
3563 		     && (!bf->counter_stack_sp
3564 			 || (bf->counter_stack_sp
3565 			     != search_state.counter_stack->sp)))
3566 		{
3567 		  POP(search_state.counter_stack,
3568 		      sizeof (struct rx_counter_frame));
3569 		}
3570 
3571 	      if (search_state.test_ret == rx_test_internal_error)
3572 		{
3573 		  POP (search_state.backtrack_stack,
3574 		       search_state.backtrack_frame_bytes);
3575 		  search_state.test_ret = rx_test_internal_error;
3576 		  goto test_do_return;
3577 		}
3578 
3579 	      /* If a non-longest match was found and that is good
3580 	       * enough, return immediately.
3581 	       */
3582 	      if (   (search_state.test_ret == rx_test_found_first)
3583 		  && search_state.first_found)
3584 		{
3585 		  rx_unlock_superstate (&rxb->rx, bf->stk_super);
3586 		  POP (search_state.backtrack_stack,
3587 		       search_state.backtrack_frame_bytes);
3588 		  goto test_do_return;
3589 		}
3590 
3591 	      search_state.test_ret = bf->stk_test_ret;
3592 	      search_state.last_l = bf->stk_last_l;
3593 	      search_state.last_r = bf->stk_last_r;
3594 	      bf->df = bf->df->next_same_super_edge[0];
3595 	      search_state.super = bf->stk_super;
3596 	      search_state.c = bf->stk_c;
3597 #ifdef RX_DEBUG_0
3598 	      search_state.line_no = bf->stk_search_state.line_no;
3599 #endif
3600 
3601 	      if (rxb->match_regs_on_stack)
3602 		{
3603 		  int x;
3604 		  regoff_t * stk =
3605 		    (regoff_t *)((char *)bf + sizeof (*bf));
3606 		  for (x = 0; x <= search_state.last_l; ++x)
3607 		    search_state.lparen[x] = stk[x];
3608 		  stk += x;
3609 		  for (x = 0; x <= search_state.last_r; ++x)
3610 		    search_state.rparen[x] = stk[x];
3611 		}
3612 
3613 	      {
3614 		int x;
3615 	      try_burst_2:
3616 		x = get_burst (&bf->stk_test_pos, app_closure, stop);
3617 		switch (x)
3618 		  {
3619 		  case rx_get_burst_continuation:
3620 		    search_state.saved_bf = bf;
3621 		    test_pc = rx_test_backtrack_return;
3622 		    goto test_return_continuation;
3623 		  resume_continuation_3:
3624 		    bf = search_state.saved_bf;
3625 		    goto try_burst_2;
3626 		  case rx_get_burst_no_more:
3627 		    /* Since we've been here before, it is some kind of
3628 		     * error that we can't return.
3629 		     */
3630 		  case rx_get_burst_error:
3631 		    search_state.test_ret = rx_test_internal_error;
3632 		    goto test_do_return;
3633 		  case rx_get_burst_ok:
3634 		    break;
3635 		  }
3636 	      }
3637 	      search_state.test_pos = bf->stk_test_pos;
3638 	      goto while_non_det_options;
3639 	    }
3640 
3641 
3642 	  case rx_cache_miss:
3643 	    /* Because the superstate NFA is lazily constructed,
3644 	     * and in fact may erode from underneath us, we sometimes
3645 	     * have to construct the next instruction from the hard way.
3646 	     * This invokes one step in the lazy-conversion.
3647 	     */
3648 	    search_state.ifr = rx_handle_cache_miss (&rxb->rx,
3649 						     search_state.super,
3650 						     search_state.c,
3651 						     search_state.ifr->data_2);
3652 	    if (!search_state.ifr)
3653 	      {
3654 		search_state.test_ret = rx_test_internal_error;
3655 		goto test_do_return;
3656 	      }
3657 	    goto restart;
3658 
3659 	  case rx_backtrack:
3660 	    /* RX_BACKTRACK means that we've reached the empty
3661 	     * superstate, indicating that match can't succeed
3662 	     * from this point.
3663 	     */
3664 	    goto test_do_return;
3665 
3666 	  case rx_next_char:
3667 	  case rx_error_inx:
3668 	  case rx_num_instructions:
3669 	    search_state.ret_val = 0;
3670 	    goto test_do_return;
3671 	  }
3672 	goto pseudo_while_1;
3673       }
3674 
3675     /* Healthy exits from the test-match loop do a
3676      * `goto return_from_test_match'   On the other hand,
3677      * we might end up here.
3678      */
3679   error_in_testing_match:
3680     test_state = rx_test_error;
3681     goto test_returns_to_search;
3682 
3683     /***** fastmap/search loop body
3684      *	      considering the results testing for a match
3685      */
3686 
3687   return_from_test_match:
3688 
3689     if (search_state.best_last_l >= 0)
3690       {
3691 	if (regs && (regs->start != search_state.best_lparen))
3692 	  {
3693 	    bcopy (search_state.best_lparen, regs->start,
3694 		   regs->num_regs * sizeof (int));
3695 	    bcopy (search_state.best_rparen, regs->end,
3696 		   regs->num_regs * sizeof (int));
3697 	  }
3698 	if (regs && !rxb->no_sub)
3699 	  {
3700 	    int q;
3701 	    int bound = (regs->num_regs > search_state.num_regs
3702 			 ? regs->num_regs
3703 			 : search_state.num_regs);
3704 	    regoff_t * s = regs->start;
3705 	    regoff_t * e = regs->end;
3706 	    for (q = search_state.best_last_l + 1;  q < bound; ++q)
3707 	      s[q] = e[q] = -1;
3708 	  }
3709 	search_state.ret_val = search_state.best_lparen[0];
3710 	test_state = rx_test_ok;
3711 	goto test_returns_to_search;
3712       }
3713     else
3714       {
3715 	test_state = rx_test_fail;
3716 	goto test_returns_to_search;
3717       }
3718 
3719   test_return_continuation:
3720     search_state.test_match_resume_pt = test_pc;
3721     test_state = rx_test_continuation;
3722     goto test_returns_to_search;
3723   }
3724 }
3725 
3726 
3727 
3728 #endif /* RX_WANT_RX_DEFS */
3729 
3730 
3731 
3732 #else /* RX_WANT_SE_DEFS */
3733   /* Integers are used to represent side effects.
3734    *
3735    * Simple side effects are given negative integer names by these enums.
3736    *
3737    * Non-negative names are reserved for complex effects.
3738    *
3739    * Complex effects are those that take arguments.  For example,
3740    * a register assignment associated with a group is complex because
3741    * it requires an argument to tell which group is being matched.
3742    *
3743    * The integer name of a complex effect is an index into rxb->se_params.
3744    */
3745 
3746   RX_DEF_SE(1, re_se_try, = -1)		/* Epsilon from start state */
3747 
3748   RX_DEF_SE(0, re_se_pushback, = re_se_try - 1)
3749   RX_DEF_SE(0, re_se_push0, = re_se_pushback -1)
3750   RX_DEF_SE(0, re_se_pushpos, = re_se_push0 - 1)
3751   RX_DEF_SE(0, re_se_chkpos, = re_se_pushpos -1)
3752   RX_DEF_SE(0, re_se_poppos, = re_se_chkpos - 1)
3753 
3754   RX_DEF_SE(1, re_se_at_dot, = re_se_poppos - 1)	/* Emacs only */
3755   RX_DEF_SE(0, re_se_syntax, = re_se_at_dot - 1) /* Emacs only */
3756   RX_DEF_SE(0, re_se_not_syntax, = re_se_syntax - 1) /* Emacs only */
3757 
3758   RX_DEF_SE(1, re_se_begbuf, = re_se_not_syntax - 1) /* match beginning of buffer */
3759   RX_DEF_SE(1, re_se_hat, = re_se_begbuf - 1) /* match beginning of line */
3760 
3761   RX_DEF_SE(1, re_se_wordbeg, = re_se_hat - 1)
3762   RX_DEF_SE(1, re_se_wordbound, = re_se_wordbeg - 1)
3763   RX_DEF_SE(1, re_se_notwordbound, = re_se_wordbound - 1)
3764 
3765   RX_DEF_SE(1, re_se_wordend, = re_se_notwordbound - 1)
3766   RX_DEF_SE(1, re_se_endbuf, = re_se_wordend - 1)
3767 
3768   /* This fails except at the end of a line.
3769    * It deserves to go here since it is typicly one of the last steps
3770    * in a match.
3771    */
3772   RX_DEF_SE(1, re_se_dollar, = re_se_endbuf - 1)
3773 
3774   /* Simple effects: */
3775   RX_DEF_SE(1, re_se_fail, = re_se_dollar - 1)
3776 
3777   /* Complex effects.  These are used in the 'se' field of
3778    * a struct re_se_params.  Indexes into the se array
3779    * are stored as instructions on nfa edges.
3780    */
3781   RX_DEF_CPLX_SE(1, re_se_win, = 0)
3782   RX_DEF_CPLX_SE(1, re_se_lparen, = re_se_win + 1)
3783   RX_DEF_CPLX_SE(1, re_se_rparen, = re_se_lparen + 1)
3784   RX_DEF_CPLX_SE(0, re_se_backref, = re_se_rparen + 1)
3785   RX_DEF_CPLX_SE(0, re_se_iter, = re_se_backref + 1)
3786   RX_DEF_CPLX_SE(0, re_se_end_iter, = re_se_iter + 1)
3787   RX_DEF_CPLX_SE(0, re_se_tv, = re_se_end_iter + 1)
3788 
3789 #endif
3790 
3791 #endif
3792