xref: /freebsd/contrib/flex/src/nfa.c (revision 7e382390)
1*7e382390SJung-uk Kim /* nfa - NFA construction routines */
2*7e382390SJung-uk Kim 
3*7e382390SJung-uk Kim /*  Copyright (c) 1990 The Regents of the University of California. */
4*7e382390SJung-uk Kim /*  All rights reserved. */
5*7e382390SJung-uk Kim 
6*7e382390SJung-uk Kim /*  This code is derived from software contributed to Berkeley by */
7*7e382390SJung-uk Kim /*  Vern Paxson. */
8*7e382390SJung-uk Kim 
9*7e382390SJung-uk Kim /*  The United States Government has rights in this work pursuant */
10*7e382390SJung-uk Kim /*  to contract no. DE-AC03-76SF00098 between the United States */
11*7e382390SJung-uk Kim /*  Department of Energy and the University of California. */
12*7e382390SJung-uk Kim 
13*7e382390SJung-uk Kim /*  This file is part of flex. */
14*7e382390SJung-uk Kim 
15*7e382390SJung-uk Kim /*  Redistribution and use in source and binary forms, with or without */
16*7e382390SJung-uk Kim /*  modification, are permitted provided that the following conditions */
17*7e382390SJung-uk Kim /*  are met: */
18*7e382390SJung-uk Kim 
19*7e382390SJung-uk Kim /*  1. Redistributions of source code must retain the above copyright */
20*7e382390SJung-uk Kim /*     notice, this list of conditions and the following disclaimer. */
21*7e382390SJung-uk Kim /*  2. Redistributions in binary form must reproduce the above copyright */
22*7e382390SJung-uk Kim /*     notice, this list of conditions and the following disclaimer in the */
23*7e382390SJung-uk Kim /*     documentation and/or other materials provided with the distribution. */
24*7e382390SJung-uk Kim 
25*7e382390SJung-uk Kim /*  Neither the name of the University nor the names of its contributors */
26*7e382390SJung-uk Kim /*  may be used to endorse or promote products derived from this software */
27*7e382390SJung-uk Kim /*  without specific prior written permission. */
28*7e382390SJung-uk Kim 
29*7e382390SJung-uk Kim /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30*7e382390SJung-uk Kim /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31*7e382390SJung-uk Kim /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32*7e382390SJung-uk Kim /*  PURPOSE. */
33*7e382390SJung-uk Kim 
34*7e382390SJung-uk Kim #include "flexdef.h"
35*7e382390SJung-uk Kim 
36*7e382390SJung-uk Kim 
37*7e382390SJung-uk Kim /* declare functions that have forward references */
38*7e382390SJung-uk Kim 
39*7e382390SJung-uk Kim int	dupmachine(int);
40*7e382390SJung-uk Kim void	mkxtion(int, int);
41*7e382390SJung-uk Kim 
42*7e382390SJung-uk Kim 
43*7e382390SJung-uk Kim /* add_accept - add an accepting state to a machine
44*7e382390SJung-uk Kim  *
45*7e382390SJung-uk Kim  * accepting_number becomes mach's accepting number.
46*7e382390SJung-uk Kim  */
47*7e382390SJung-uk Kim 
add_accept(int mach,int accepting_number)48*7e382390SJung-uk Kim void    add_accept (int mach, int accepting_number)
49*7e382390SJung-uk Kim {
50*7e382390SJung-uk Kim 	/* Hang the accepting number off an epsilon state.  if it is associated
51*7e382390SJung-uk Kim 	 * with a state that has a non-epsilon out-transition, then the state
52*7e382390SJung-uk Kim 	 * will accept BEFORE it makes that transition, i.e., one character
53*7e382390SJung-uk Kim 	 * too soon.
54*7e382390SJung-uk Kim 	 */
55*7e382390SJung-uk Kim 
56*7e382390SJung-uk Kim 	if (transchar[finalst[mach]] == SYM_EPSILON)
57*7e382390SJung-uk Kim 		accptnum[finalst[mach]] = accepting_number;
58*7e382390SJung-uk Kim 
59*7e382390SJung-uk Kim 	else {
60*7e382390SJung-uk Kim 		int     astate = mkstate (SYM_EPSILON);
61*7e382390SJung-uk Kim 
62*7e382390SJung-uk Kim 		accptnum[astate] = accepting_number;
63*7e382390SJung-uk Kim 		(void) link_machines (mach, astate);
64*7e382390SJung-uk Kim 	}
65*7e382390SJung-uk Kim }
66*7e382390SJung-uk Kim 
67*7e382390SJung-uk Kim 
68*7e382390SJung-uk Kim /* copysingl - make a given number of copies of a singleton machine
69*7e382390SJung-uk Kim  *
70*7e382390SJung-uk Kim  * synopsis
71*7e382390SJung-uk Kim  *
72*7e382390SJung-uk Kim  *   newsng = copysingl( singl, num );
73*7e382390SJung-uk Kim  *
74*7e382390SJung-uk Kim  *     newsng - a new singleton composed of num copies of singl
75*7e382390SJung-uk Kim  *     singl  - a singleton machine
76*7e382390SJung-uk Kim  *     num    - the number of copies of singl to be present in newsng
77*7e382390SJung-uk Kim  */
78*7e382390SJung-uk Kim 
copysingl(int singl,int num)79*7e382390SJung-uk Kim int     copysingl (int singl, int num)
80*7e382390SJung-uk Kim {
81*7e382390SJung-uk Kim 	int     copy, i;
82*7e382390SJung-uk Kim 
83*7e382390SJung-uk Kim 	copy = mkstate (SYM_EPSILON);
84*7e382390SJung-uk Kim 
85*7e382390SJung-uk Kim 	for (i = 1; i <= num; ++i)
86*7e382390SJung-uk Kim 		copy = link_machines (copy, dupmachine (singl));
87*7e382390SJung-uk Kim 
88*7e382390SJung-uk Kim 	return copy;
89*7e382390SJung-uk Kim }
90*7e382390SJung-uk Kim 
91*7e382390SJung-uk Kim 
92*7e382390SJung-uk Kim /* dumpnfa - debugging routine to write out an nfa */
93*7e382390SJung-uk Kim 
dumpnfa(int state1)94*7e382390SJung-uk Kim void    dumpnfa (int state1)
95*7e382390SJung-uk Kim {
96*7e382390SJung-uk Kim 	int     sym, tsp1, tsp2, anum, ns;
97*7e382390SJung-uk Kim 
98*7e382390SJung-uk Kim 	fprintf (stderr,
99*7e382390SJung-uk Kim 		 _
100*7e382390SJung-uk Kim 		 ("\n\n********** beginning dump of nfa with start state %d\n"),
101*7e382390SJung-uk Kim 		 state1);
102*7e382390SJung-uk Kim 
103*7e382390SJung-uk Kim 	/* We probably should loop starting at firstst[state1] and going to
104*7e382390SJung-uk Kim 	 * lastst[state1], but they're not maintained properly when we "or"
105*7e382390SJung-uk Kim 	 * all of the rules together.  So we use our knowledge that the machine
106*7e382390SJung-uk Kim 	 * starts at state 1 and ends at lastnfa.
107*7e382390SJung-uk Kim 	 */
108*7e382390SJung-uk Kim 
109*7e382390SJung-uk Kim 	/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
110*7e382390SJung-uk Kim 	for (ns = 1; ns <= lastnfa; ++ns) {
111*7e382390SJung-uk Kim 		fprintf (stderr, _("state # %4d\t"), ns);
112*7e382390SJung-uk Kim 
113*7e382390SJung-uk Kim 		sym = transchar[ns];
114*7e382390SJung-uk Kim 		tsp1 = trans1[ns];
115*7e382390SJung-uk Kim 		tsp2 = trans2[ns];
116*7e382390SJung-uk Kim 		anum = accptnum[ns];
117*7e382390SJung-uk Kim 
118*7e382390SJung-uk Kim 		fprintf (stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2);
119*7e382390SJung-uk Kim 
120*7e382390SJung-uk Kim 		if (anum != NIL)
121*7e382390SJung-uk Kim 			fprintf (stderr, "  [%d]", anum);
122*7e382390SJung-uk Kim 
123*7e382390SJung-uk Kim 		fprintf (stderr, "\n");
124*7e382390SJung-uk Kim 	}
125*7e382390SJung-uk Kim 
126*7e382390SJung-uk Kim 	fprintf (stderr, _("********** end of dump\n"));
127*7e382390SJung-uk Kim }
128*7e382390SJung-uk Kim 
129*7e382390SJung-uk Kim 
130*7e382390SJung-uk Kim /* dupmachine - make a duplicate of a given machine
131*7e382390SJung-uk Kim  *
132*7e382390SJung-uk Kim  * synopsis
133*7e382390SJung-uk Kim  *
134*7e382390SJung-uk Kim  *   copy = dupmachine( mach );
135*7e382390SJung-uk Kim  *
136*7e382390SJung-uk Kim  *     copy - holds duplicate of mach
137*7e382390SJung-uk Kim  *     mach - machine to be duplicated
138*7e382390SJung-uk Kim  *
139*7e382390SJung-uk Kim  * note that the copy of mach is NOT an exact duplicate; rather, all the
140*7e382390SJung-uk Kim  * transition states values are adjusted so that the copy is self-contained,
141*7e382390SJung-uk Kim  * as the original should have been.
142*7e382390SJung-uk Kim  *
143*7e382390SJung-uk Kim  * also note that the original MUST be contiguous, with its low and high
144*7e382390SJung-uk Kim  * states accessible by the arrays firstst and lastst
145*7e382390SJung-uk Kim  */
146*7e382390SJung-uk Kim 
dupmachine(int mach)147*7e382390SJung-uk Kim int     dupmachine (int mach)
148*7e382390SJung-uk Kim {
149*7e382390SJung-uk Kim 	int     i, init, state_offset;
150*7e382390SJung-uk Kim 	int     state = 0;
151*7e382390SJung-uk Kim 	int     last = lastst[mach];
152*7e382390SJung-uk Kim 
153*7e382390SJung-uk Kim 	for (i = firstst[mach]; i <= last; ++i) {
154*7e382390SJung-uk Kim 		state = mkstate (transchar[i]);
155*7e382390SJung-uk Kim 
156*7e382390SJung-uk Kim 		if (trans1[i] != NO_TRANSITION) {
157*7e382390SJung-uk Kim 			mkxtion (finalst[state], trans1[i] + state - i);
158*7e382390SJung-uk Kim 
159*7e382390SJung-uk Kim 			if (transchar[i] == SYM_EPSILON &&
160*7e382390SJung-uk Kim 			    trans2[i] != NO_TRANSITION)
161*7e382390SJung-uk Kim 					mkxtion (finalst[state],
162*7e382390SJung-uk Kim 						 trans2[i] + state - i);
163*7e382390SJung-uk Kim 		}
164*7e382390SJung-uk Kim 
165*7e382390SJung-uk Kim 		accptnum[state] = accptnum[i];
166*7e382390SJung-uk Kim 	}
167*7e382390SJung-uk Kim 
168*7e382390SJung-uk Kim 	if (state == 0)
169*7e382390SJung-uk Kim 		flexfatal (_("empty machine in dupmachine()"));
170*7e382390SJung-uk Kim 
171*7e382390SJung-uk Kim 	state_offset = state - i + 1;
172*7e382390SJung-uk Kim 
173*7e382390SJung-uk Kim 	init = mach + state_offset;
174*7e382390SJung-uk Kim 	firstst[init] = firstst[mach] + state_offset;
175*7e382390SJung-uk Kim 	finalst[init] = finalst[mach] + state_offset;
176*7e382390SJung-uk Kim 	lastst[init] = lastst[mach] + state_offset;
177*7e382390SJung-uk Kim 
178*7e382390SJung-uk Kim 	return init;
179*7e382390SJung-uk Kim }
180*7e382390SJung-uk Kim 
181*7e382390SJung-uk Kim 
182*7e382390SJung-uk Kim /* finish_rule - finish up the processing for a rule
183*7e382390SJung-uk Kim  *
184*7e382390SJung-uk Kim  * An accepting number is added to the given machine.  If variable_trail_rule
185*7e382390SJung-uk Kim  * is true then the rule has trailing context and both the head and trail
186*7e382390SJung-uk Kim  * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
187*7e382390SJung-uk Kim  * the machine recognizes a pattern with trailing context and headcnt is
188*7e382390SJung-uk Kim  * the number of characters in the matched part of the pattern, or zero
189*7e382390SJung-uk Kim  * if the matched part has variable length.  trailcnt is the number of
190*7e382390SJung-uk Kim  * trailing context characters in the pattern, or zero if the trailing
191*7e382390SJung-uk Kim  * context has variable length.
192*7e382390SJung-uk Kim  */
193*7e382390SJung-uk Kim 
finish_rule(int mach,int variable_trail_rule,int headcnt,int trailcnt,int pcont_act)194*7e382390SJung-uk Kim void    finish_rule (int mach, int variable_trail_rule, int headcnt, int trailcnt,
195*7e382390SJung-uk Kim 		     int pcont_act)
196*7e382390SJung-uk Kim {
197*7e382390SJung-uk Kim 	char    action_text[MAXLINE];
198*7e382390SJung-uk Kim 
199*7e382390SJung-uk Kim 	add_accept (mach, num_rules);
200*7e382390SJung-uk Kim 
201*7e382390SJung-uk Kim 	/* We did this in new_rule(), but it often gets the wrong
202*7e382390SJung-uk Kim 	 * number because we do it before we start parsing the current rule.
203*7e382390SJung-uk Kim 	 */
204*7e382390SJung-uk Kim 	rule_linenum[num_rules] = linenum;
205*7e382390SJung-uk Kim 
206*7e382390SJung-uk Kim 	/* If this is a continued action, then the line-number has already
207*7e382390SJung-uk Kim 	 * been updated, giving us the wrong number.
208*7e382390SJung-uk Kim 	 */
209*7e382390SJung-uk Kim 	if (continued_action)
210*7e382390SJung-uk Kim 		--rule_linenum[num_rules];
211*7e382390SJung-uk Kim 
212*7e382390SJung-uk Kim 
213*7e382390SJung-uk Kim 	/* If the previous rule was continued action, then we inherit the
214*7e382390SJung-uk Kim 	 * previous newline flag, possibly overriding the current one.
215*7e382390SJung-uk Kim 	 */
216*7e382390SJung-uk Kim 	if (pcont_act && rule_has_nl[num_rules - 1])
217*7e382390SJung-uk Kim 		rule_has_nl[num_rules] = true;
218*7e382390SJung-uk Kim 
219*7e382390SJung-uk Kim 	snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
220*7e382390SJung-uk Kim 	add_action (action_text);
221*7e382390SJung-uk Kim 	if (rule_has_nl[num_rules]) {
222*7e382390SJung-uk Kim 		snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
223*7e382390SJung-uk Kim 			 num_rules);
224*7e382390SJung-uk Kim 		add_action (action_text);
225*7e382390SJung-uk Kim 	}
226*7e382390SJung-uk Kim 
227*7e382390SJung-uk Kim 
228*7e382390SJung-uk Kim 	if (variable_trail_rule) {
229*7e382390SJung-uk Kim 		rule_type[num_rules] = RULE_VARIABLE;
230*7e382390SJung-uk Kim 
231*7e382390SJung-uk Kim 		if (performance_report > 0)
232*7e382390SJung-uk Kim 			fprintf (stderr,
233*7e382390SJung-uk Kim 				 _
234*7e382390SJung-uk Kim 				 ("Variable trailing context rule at line %d\n"),
235*7e382390SJung-uk Kim 				 rule_linenum[num_rules]);
236*7e382390SJung-uk Kim 
237*7e382390SJung-uk Kim 		variable_trailing_context_rules = true;
238*7e382390SJung-uk Kim 	}
239*7e382390SJung-uk Kim 
240*7e382390SJung-uk Kim 	else {
241*7e382390SJung-uk Kim 		rule_type[num_rules] = RULE_NORMAL;
242*7e382390SJung-uk Kim 
243*7e382390SJung-uk Kim 		if (headcnt > 0 || trailcnt > 0) {
244*7e382390SJung-uk Kim 			/* Do trailing context magic to not match the trailing
245*7e382390SJung-uk Kim 			 * characters.
246*7e382390SJung-uk Kim 			 */
247*7e382390SJung-uk Kim 			char   *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
248*7e382390SJung-uk Kim 			char   *scanner_bp = "yy_bp";
249*7e382390SJung-uk Kim 
250*7e382390SJung-uk Kim 			add_action
251*7e382390SJung-uk Kim 				("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
252*7e382390SJung-uk Kim 
253*7e382390SJung-uk Kim 			if (headcnt > 0) {
254*7e382390SJung-uk Kim 				if (rule_has_nl[num_rules]) {
255*7e382390SJung-uk Kim 					snprintf (action_text, sizeof(action_text),
256*7e382390SJung-uk Kim 						"YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt);
257*7e382390SJung-uk Kim 					add_action (action_text);
258*7e382390SJung-uk Kim 				}
259*7e382390SJung-uk Kim 				snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
260*7e382390SJung-uk Kim 					 scanner_cp, scanner_bp, headcnt);
261*7e382390SJung-uk Kim 				add_action (action_text);
262*7e382390SJung-uk Kim 			}
263*7e382390SJung-uk Kim 
264*7e382390SJung-uk Kim 			else {
265*7e382390SJung-uk Kim 				if (rule_has_nl[num_rules]) {
266*7e382390SJung-uk Kim 					snprintf (action_text, sizeof(action_text),
267*7e382390SJung-uk Kim 						 "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt);
268*7e382390SJung-uk Kim 					add_action (action_text);
269*7e382390SJung-uk Kim 				}
270*7e382390SJung-uk Kim 
271*7e382390SJung-uk Kim 				snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
272*7e382390SJung-uk Kim 					 scanner_cp, trailcnt);
273*7e382390SJung-uk Kim 				add_action (action_text);
274*7e382390SJung-uk Kim 			}
275*7e382390SJung-uk Kim 
276*7e382390SJung-uk Kim 			add_action
277*7e382390SJung-uk Kim 				("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
278*7e382390SJung-uk Kim 		}
279*7e382390SJung-uk Kim 	}
280*7e382390SJung-uk Kim 
281*7e382390SJung-uk Kim 	/* Okay, in the action code at this point yytext and yyleng have
282*7e382390SJung-uk Kim 	 * their proper final values for this rule, so here's the point
283*7e382390SJung-uk Kim 	 * to do any user action.  But don't do it for continued actions,
284*7e382390SJung-uk Kim 	 * as that'll result in multiple YY_RULE_SETUP's.
285*7e382390SJung-uk Kim 	 */
286*7e382390SJung-uk Kim 	if (!continued_action)
287*7e382390SJung-uk Kim 		add_action ("YY_RULE_SETUP\n");
288*7e382390SJung-uk Kim 
289*7e382390SJung-uk Kim 	line_directive_out(NULL, 1);
290*7e382390SJung-uk Kim         add_action("[[");
291*7e382390SJung-uk Kim }
292*7e382390SJung-uk Kim 
293*7e382390SJung-uk Kim 
294*7e382390SJung-uk Kim /* link_machines - connect two machines together
295*7e382390SJung-uk Kim  *
296*7e382390SJung-uk Kim  * synopsis
297*7e382390SJung-uk Kim  *
298*7e382390SJung-uk Kim  *   new = link_machines( first, last );
299*7e382390SJung-uk Kim  *
300*7e382390SJung-uk Kim  *     new    - a machine constructed by connecting first to last
301*7e382390SJung-uk Kim  *     first  - the machine whose successor is to be last
302*7e382390SJung-uk Kim  *     last   - the machine whose predecessor is to be first
303*7e382390SJung-uk Kim  *
304*7e382390SJung-uk Kim  * note: this routine concatenates the machine first with the machine
305*7e382390SJung-uk Kim  *  last to produce a machine new which will pattern-match first first
306*7e382390SJung-uk Kim  *  and then last, and will fail if either of the sub-patterns fails.
307*7e382390SJung-uk Kim  *  FIRST is set to new by the operation.  last is unmolested.
308*7e382390SJung-uk Kim  */
309*7e382390SJung-uk Kim 
link_machines(int first,int last)310*7e382390SJung-uk Kim int     link_machines (int first, int last)
311*7e382390SJung-uk Kim {
312*7e382390SJung-uk Kim 	if (first == NIL)
313*7e382390SJung-uk Kim 		return last;
314*7e382390SJung-uk Kim 
315*7e382390SJung-uk Kim 	else if (last == NIL)
316*7e382390SJung-uk Kim 		return first;
317*7e382390SJung-uk Kim 
318*7e382390SJung-uk Kim 	else {
319*7e382390SJung-uk Kim 		mkxtion (finalst[first], last);
320*7e382390SJung-uk Kim 		finalst[first] = finalst[last];
321*7e382390SJung-uk Kim 		lastst[first] = MAX (lastst[first], lastst[last]);
322*7e382390SJung-uk Kim 		firstst[first] = MIN (firstst[first], firstst[last]);
323*7e382390SJung-uk Kim 
324*7e382390SJung-uk Kim 		return first;
325*7e382390SJung-uk Kim 	}
326*7e382390SJung-uk Kim }
327*7e382390SJung-uk Kim 
328*7e382390SJung-uk Kim 
329*7e382390SJung-uk Kim /* mark_beginning_as_normal - mark each "beginning" state in a machine
330*7e382390SJung-uk Kim  *                            as being a "normal" (i.e., not trailing context-
331*7e382390SJung-uk Kim  *                            associated) states
332*7e382390SJung-uk Kim  *
333*7e382390SJung-uk Kim  * The "beginning" states are the epsilon closure of the first state
334*7e382390SJung-uk Kim  */
335*7e382390SJung-uk Kim 
mark_beginning_as_normal(int mach)336*7e382390SJung-uk Kim void    mark_beginning_as_normal (int mach)
337*7e382390SJung-uk Kim {
338*7e382390SJung-uk Kim 	switch (state_type[mach]) {
339*7e382390SJung-uk Kim 	case STATE_NORMAL:
340*7e382390SJung-uk Kim 		/* Oh, we've already visited here. */
341*7e382390SJung-uk Kim 		return;
342*7e382390SJung-uk Kim 
343*7e382390SJung-uk Kim 	case STATE_TRAILING_CONTEXT:
344*7e382390SJung-uk Kim 		state_type[mach] = STATE_NORMAL;
345*7e382390SJung-uk Kim 
346*7e382390SJung-uk Kim 		if (transchar[mach] == SYM_EPSILON) {
347*7e382390SJung-uk Kim 			if (trans1[mach] != NO_TRANSITION)
348*7e382390SJung-uk Kim 				mark_beginning_as_normal (trans1[mach]);
349*7e382390SJung-uk Kim 
350*7e382390SJung-uk Kim 			if (trans2[mach] != NO_TRANSITION)
351*7e382390SJung-uk Kim 				mark_beginning_as_normal (trans2[mach]);
352*7e382390SJung-uk Kim 		}
353*7e382390SJung-uk Kim 		break;
354*7e382390SJung-uk Kim 
355*7e382390SJung-uk Kim 	default:
356*7e382390SJung-uk Kim 		flexerror (_
357*7e382390SJung-uk Kim 			   ("bad state type in mark_beginning_as_normal()"));
358*7e382390SJung-uk Kim 		break;
359*7e382390SJung-uk Kim 	}
360*7e382390SJung-uk Kim }
361*7e382390SJung-uk Kim 
362*7e382390SJung-uk Kim 
363*7e382390SJung-uk Kim /* mkbranch - make a machine that branches to two machines
364*7e382390SJung-uk Kim  *
365*7e382390SJung-uk Kim  * synopsis
366*7e382390SJung-uk Kim  *
367*7e382390SJung-uk Kim  *   branch = mkbranch( first, second );
368*7e382390SJung-uk Kim  *
369*7e382390SJung-uk Kim  *     branch - a machine which matches either first's pattern or second's
370*7e382390SJung-uk Kim  *     first, second - machines whose patterns are to be or'ed (the | operator)
371*7e382390SJung-uk Kim  *
372*7e382390SJung-uk Kim  * Note that first and second are NEITHER destroyed by the operation.  Also,
373*7e382390SJung-uk Kim  * the resulting machine CANNOT be used with any other "mk" operation except
374*7e382390SJung-uk Kim  * more mkbranch's.  Compare with mkor()
375*7e382390SJung-uk Kim  */
376*7e382390SJung-uk Kim 
mkbranch(int first,int second)377*7e382390SJung-uk Kim int     mkbranch (int first, int second)
378*7e382390SJung-uk Kim {
379*7e382390SJung-uk Kim 	int     eps;
380*7e382390SJung-uk Kim 
381*7e382390SJung-uk Kim 	if (first == NO_TRANSITION)
382*7e382390SJung-uk Kim 		return second;
383*7e382390SJung-uk Kim 
384*7e382390SJung-uk Kim 	else if (second == NO_TRANSITION)
385*7e382390SJung-uk Kim 		return first;
386*7e382390SJung-uk Kim 
387*7e382390SJung-uk Kim 	eps = mkstate (SYM_EPSILON);
388*7e382390SJung-uk Kim 
389*7e382390SJung-uk Kim 	mkxtion (eps, first);
390*7e382390SJung-uk Kim 	mkxtion (eps, second);
391*7e382390SJung-uk Kim 
392*7e382390SJung-uk Kim 	return eps;
393*7e382390SJung-uk Kim }
394*7e382390SJung-uk Kim 
395*7e382390SJung-uk Kim 
396*7e382390SJung-uk Kim /* mkclos - convert a machine into a closure
397*7e382390SJung-uk Kim  *
398*7e382390SJung-uk Kim  * synopsis
399*7e382390SJung-uk Kim  *   new = mkclos( state );
400*7e382390SJung-uk Kim  *
401*7e382390SJung-uk Kim  * new - a new state which matches the closure of "state"
402*7e382390SJung-uk Kim  */
403*7e382390SJung-uk Kim 
mkclos(int state)404*7e382390SJung-uk Kim int     mkclos (int state)
405*7e382390SJung-uk Kim {
406*7e382390SJung-uk Kim 	return mkopt (mkposcl (state));
407*7e382390SJung-uk Kim }
408*7e382390SJung-uk Kim 
409*7e382390SJung-uk Kim 
410*7e382390SJung-uk Kim /* mkopt - make a machine optional
411*7e382390SJung-uk Kim  *
412*7e382390SJung-uk Kim  * synopsis
413*7e382390SJung-uk Kim  *
414*7e382390SJung-uk Kim  *   new = mkopt( mach );
415*7e382390SJung-uk Kim  *
416*7e382390SJung-uk Kim  *     new  - a machine which optionally matches whatever mach matched
417*7e382390SJung-uk Kim  *     mach - the machine to make optional
418*7e382390SJung-uk Kim  *
419*7e382390SJung-uk Kim  * notes:
420*7e382390SJung-uk Kim  *     1. mach must be the last machine created
421*7e382390SJung-uk Kim  *     2. mach is destroyed by the call
422*7e382390SJung-uk Kim  */
423*7e382390SJung-uk Kim 
mkopt(int mach)424*7e382390SJung-uk Kim int     mkopt (int mach)
425*7e382390SJung-uk Kim {
426*7e382390SJung-uk Kim 	int     eps;
427*7e382390SJung-uk Kim 
428*7e382390SJung-uk Kim 	if (!SUPER_FREE_EPSILON (finalst[mach])) {
429*7e382390SJung-uk Kim 		eps = mkstate (SYM_EPSILON);
430*7e382390SJung-uk Kim 		mach = link_machines (mach, eps);
431*7e382390SJung-uk Kim 	}
432*7e382390SJung-uk Kim 
433*7e382390SJung-uk Kim 	/* Can't skimp on the following if FREE_EPSILON(mach) is true because
434*7e382390SJung-uk Kim 	 * some state interior to "mach" might point back to the beginning
435*7e382390SJung-uk Kim 	 * for a closure.
436*7e382390SJung-uk Kim 	 */
437*7e382390SJung-uk Kim 	eps = mkstate (SYM_EPSILON);
438*7e382390SJung-uk Kim 	mach = link_machines (eps, mach);
439*7e382390SJung-uk Kim 
440*7e382390SJung-uk Kim 	mkxtion (mach, finalst[mach]);
441*7e382390SJung-uk Kim 
442*7e382390SJung-uk Kim 	return mach;
443*7e382390SJung-uk Kim }
444*7e382390SJung-uk Kim 
445*7e382390SJung-uk Kim 
446*7e382390SJung-uk Kim /* mkor - make a machine that matches either one of two machines
447*7e382390SJung-uk Kim  *
448*7e382390SJung-uk Kim  * synopsis
449*7e382390SJung-uk Kim  *
450*7e382390SJung-uk Kim  *   new = mkor( first, second );
451*7e382390SJung-uk Kim  *
452*7e382390SJung-uk Kim  *     new - a machine which matches either first's pattern or second's
453*7e382390SJung-uk Kim  *     first, second - machines whose patterns are to be or'ed (the | operator)
454*7e382390SJung-uk Kim  *
455*7e382390SJung-uk Kim  * note that first and second are both destroyed by the operation
456*7e382390SJung-uk Kim  * the code is rather convoluted because an attempt is made to minimize
457*7e382390SJung-uk Kim  * the number of epsilon states needed
458*7e382390SJung-uk Kim  */
459*7e382390SJung-uk Kim 
mkor(int first,int second)460*7e382390SJung-uk Kim int     mkor (int first, int second)
461*7e382390SJung-uk Kim {
462*7e382390SJung-uk Kim 	int     eps, orend;
463*7e382390SJung-uk Kim 
464*7e382390SJung-uk Kim 	if (first == NIL)
465*7e382390SJung-uk Kim 		return second;
466*7e382390SJung-uk Kim 
467*7e382390SJung-uk Kim 	else if (second == NIL)
468*7e382390SJung-uk Kim 		return first;
469*7e382390SJung-uk Kim 
470*7e382390SJung-uk Kim 	else {
471*7e382390SJung-uk Kim 		/* See comment in mkopt() about why we can't use the first
472*7e382390SJung-uk Kim 		 * state of "first" or "second" if they satisfy "FREE_EPSILON".
473*7e382390SJung-uk Kim 		 */
474*7e382390SJung-uk Kim 		eps = mkstate (SYM_EPSILON);
475*7e382390SJung-uk Kim 
476*7e382390SJung-uk Kim 		first = link_machines (eps, first);
477*7e382390SJung-uk Kim 
478*7e382390SJung-uk Kim 		mkxtion (first, second);
479*7e382390SJung-uk Kim 
480*7e382390SJung-uk Kim 		if (SUPER_FREE_EPSILON (finalst[first]) &&
481*7e382390SJung-uk Kim 		    accptnum[finalst[first]] == NIL) {
482*7e382390SJung-uk Kim 			orend = finalst[first];
483*7e382390SJung-uk Kim 			mkxtion (finalst[second], orend);
484*7e382390SJung-uk Kim 		}
485*7e382390SJung-uk Kim 
486*7e382390SJung-uk Kim 		else if (SUPER_FREE_EPSILON (finalst[second]) &&
487*7e382390SJung-uk Kim 			 accptnum[finalst[second]] == NIL) {
488*7e382390SJung-uk Kim 			orend = finalst[second];
489*7e382390SJung-uk Kim 			mkxtion (finalst[first], orend);
490*7e382390SJung-uk Kim 		}
491*7e382390SJung-uk Kim 
492*7e382390SJung-uk Kim 		else {
493*7e382390SJung-uk Kim 			eps = mkstate (SYM_EPSILON);
494*7e382390SJung-uk Kim 
495*7e382390SJung-uk Kim 			first = link_machines (first, eps);
496*7e382390SJung-uk Kim 			orend = finalst[first];
497*7e382390SJung-uk Kim 
498*7e382390SJung-uk Kim 			mkxtion (finalst[second], orend);
499*7e382390SJung-uk Kim 		}
500*7e382390SJung-uk Kim 	}
501*7e382390SJung-uk Kim 
502*7e382390SJung-uk Kim 	finalst[first] = orend;
503*7e382390SJung-uk Kim 	return first;
504*7e382390SJung-uk Kim }
505*7e382390SJung-uk Kim 
506*7e382390SJung-uk Kim 
507*7e382390SJung-uk Kim /* mkposcl - convert a machine into a positive closure
508*7e382390SJung-uk Kim  *
509*7e382390SJung-uk Kim  * synopsis
510*7e382390SJung-uk Kim  *   new = mkposcl( state );
511*7e382390SJung-uk Kim  *
512*7e382390SJung-uk Kim  *    new - a machine matching the positive closure of "state"
513*7e382390SJung-uk Kim  */
514*7e382390SJung-uk Kim 
mkposcl(int state)515*7e382390SJung-uk Kim int     mkposcl (int state)
516*7e382390SJung-uk Kim {
517*7e382390SJung-uk Kim 	int     eps;
518*7e382390SJung-uk Kim 
519*7e382390SJung-uk Kim 	if (SUPER_FREE_EPSILON (finalst[state])) {
520*7e382390SJung-uk Kim 		mkxtion (finalst[state], state);
521*7e382390SJung-uk Kim 		return state;
522*7e382390SJung-uk Kim 	}
523*7e382390SJung-uk Kim 
524*7e382390SJung-uk Kim 	else {
525*7e382390SJung-uk Kim 		eps = mkstate (SYM_EPSILON);
526*7e382390SJung-uk Kim 		mkxtion (eps, state);
527*7e382390SJung-uk Kim 		return link_machines (state, eps);
528*7e382390SJung-uk Kim 	}
529*7e382390SJung-uk Kim }
530*7e382390SJung-uk Kim 
531*7e382390SJung-uk Kim 
532*7e382390SJung-uk Kim /* mkrep - make a replicated machine
533*7e382390SJung-uk Kim  *
534*7e382390SJung-uk Kim  * synopsis
535*7e382390SJung-uk Kim  *   new = mkrep( mach, lb, ub );
536*7e382390SJung-uk Kim  *
537*7e382390SJung-uk Kim  *    new - a machine that matches whatever "mach" matched from "lb"
538*7e382390SJung-uk Kim  *          number of times to "ub" number of times
539*7e382390SJung-uk Kim  *
540*7e382390SJung-uk Kim  * note
541*7e382390SJung-uk Kim  *   if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
542*7e382390SJung-uk Kim  */
543*7e382390SJung-uk Kim 
mkrep(int mach,int lb,int ub)544*7e382390SJung-uk Kim int     mkrep (int mach, int lb, int ub)
545*7e382390SJung-uk Kim {
546*7e382390SJung-uk Kim 	int     base_mach, tail, copy, i;
547*7e382390SJung-uk Kim 
548*7e382390SJung-uk Kim 	base_mach = copysingl (mach, lb - 1);
549*7e382390SJung-uk Kim 
550*7e382390SJung-uk Kim 	if (ub == INFINITE_REPEAT) {
551*7e382390SJung-uk Kim 		copy = dupmachine (mach);
552*7e382390SJung-uk Kim 		mach = link_machines (mach,
553*7e382390SJung-uk Kim 				      link_machines (base_mach,
554*7e382390SJung-uk Kim 						     mkclos (copy)));
555*7e382390SJung-uk Kim 	}
556*7e382390SJung-uk Kim 
557*7e382390SJung-uk Kim 	else {
558*7e382390SJung-uk Kim 		tail = mkstate (SYM_EPSILON);
559*7e382390SJung-uk Kim 
560*7e382390SJung-uk Kim 		for (i = lb; i < ub; ++i) {
561*7e382390SJung-uk Kim 			copy = dupmachine (mach);
562*7e382390SJung-uk Kim 			tail = mkopt (link_machines (copy, tail));
563*7e382390SJung-uk Kim 		}
564*7e382390SJung-uk Kim 
565*7e382390SJung-uk Kim 		mach =
566*7e382390SJung-uk Kim 			link_machines (mach,
567*7e382390SJung-uk Kim 				       link_machines (base_mach, tail));
568*7e382390SJung-uk Kim 	}
569*7e382390SJung-uk Kim 
570*7e382390SJung-uk Kim 	return mach;
571*7e382390SJung-uk Kim }
572*7e382390SJung-uk Kim 
573*7e382390SJung-uk Kim 
574*7e382390SJung-uk Kim /* mkstate - create a state with a transition on a given symbol
575*7e382390SJung-uk Kim  *
576*7e382390SJung-uk Kim  * synopsis
577*7e382390SJung-uk Kim  *
578*7e382390SJung-uk Kim  *   state = mkstate( sym );
579*7e382390SJung-uk Kim  *
580*7e382390SJung-uk Kim  *     state - a new state matching sym
581*7e382390SJung-uk Kim  *     sym   - the symbol the new state is to have an out-transition on
582*7e382390SJung-uk Kim  *
583*7e382390SJung-uk Kim  * note that this routine makes new states in ascending order through the
584*7e382390SJung-uk Kim  * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
585*7e382390SJung-uk Kim  * relies on machines being made in ascending order and that they are
586*7e382390SJung-uk Kim  * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
587*7e382390SJung-uk Kim  * that it admittedly is)
588*7e382390SJung-uk Kim  */
589*7e382390SJung-uk Kim 
mkstate(int sym)590*7e382390SJung-uk Kim int     mkstate (int sym)
591*7e382390SJung-uk Kim {
592*7e382390SJung-uk Kim 	if (++lastnfa >= current_mns) {
593*7e382390SJung-uk Kim 		if ((current_mns += MNS_INCREMENT) >= maximum_mns)
594*7e382390SJung-uk Kim 			lerr(_
595*7e382390SJung-uk Kim 				("input rules are too complicated (>= %d NFA states)"),
596*7e382390SJung-uk Kim current_mns);
597*7e382390SJung-uk Kim 
598*7e382390SJung-uk Kim 		++num_reallocs;
599*7e382390SJung-uk Kim 
600*7e382390SJung-uk Kim 		firstst = reallocate_integer_array (firstst, current_mns);
601*7e382390SJung-uk Kim 		lastst = reallocate_integer_array (lastst, current_mns);
602*7e382390SJung-uk Kim 		finalst = reallocate_integer_array (finalst, current_mns);
603*7e382390SJung-uk Kim 		transchar =
604*7e382390SJung-uk Kim 			reallocate_integer_array (transchar, current_mns);
605*7e382390SJung-uk Kim 		trans1 = reallocate_integer_array (trans1, current_mns);
606*7e382390SJung-uk Kim 		trans2 = reallocate_integer_array (trans2, current_mns);
607*7e382390SJung-uk Kim 		accptnum =
608*7e382390SJung-uk Kim 			reallocate_integer_array (accptnum, current_mns);
609*7e382390SJung-uk Kim 		assoc_rule =
610*7e382390SJung-uk Kim 			reallocate_integer_array (assoc_rule, current_mns);
611*7e382390SJung-uk Kim 		state_type =
612*7e382390SJung-uk Kim 			reallocate_integer_array (state_type, current_mns);
613*7e382390SJung-uk Kim 	}
614*7e382390SJung-uk Kim 
615*7e382390SJung-uk Kim 	firstst[lastnfa] = lastnfa;
616*7e382390SJung-uk Kim 	finalst[lastnfa] = lastnfa;
617*7e382390SJung-uk Kim 	lastst[lastnfa] = lastnfa;
618*7e382390SJung-uk Kim 	transchar[lastnfa] = sym;
619*7e382390SJung-uk Kim 	trans1[lastnfa] = NO_TRANSITION;
620*7e382390SJung-uk Kim 	trans2[lastnfa] = NO_TRANSITION;
621*7e382390SJung-uk Kim 	accptnum[lastnfa] = NIL;
622*7e382390SJung-uk Kim 	assoc_rule[lastnfa] = num_rules;
623*7e382390SJung-uk Kim 	state_type[lastnfa] = current_state_type;
624*7e382390SJung-uk Kim 
625*7e382390SJung-uk Kim 	/* Fix up equivalence classes base on this transition.  Note that any
626*7e382390SJung-uk Kim 	 * character which has its own transition gets its own equivalence
627*7e382390SJung-uk Kim 	 * class.  Thus only characters which are only in character classes
628*7e382390SJung-uk Kim 	 * have a chance at being in the same equivalence class.  E.g. "a|b"
629*7e382390SJung-uk Kim 	 * puts 'a' and 'b' into two different equivalence classes.  "[ab]"
630*7e382390SJung-uk Kim 	 * puts them in the same equivalence class (barring other differences
631*7e382390SJung-uk Kim 	 * elsewhere in the input).
632*7e382390SJung-uk Kim 	 */
633*7e382390SJung-uk Kim 
634*7e382390SJung-uk Kim 	if (sym < 0) {
635*7e382390SJung-uk Kim 		/* We don't have to update the equivalence classes since
636*7e382390SJung-uk Kim 		 * that was already done when the ccl was created for the
637*7e382390SJung-uk Kim 		 * first time.
638*7e382390SJung-uk Kim 		 */
639*7e382390SJung-uk Kim 	}
640*7e382390SJung-uk Kim 
641*7e382390SJung-uk Kim 	else if (sym == SYM_EPSILON)
642*7e382390SJung-uk Kim 		++numeps;
643*7e382390SJung-uk Kim 
644*7e382390SJung-uk Kim 	else {
645*7e382390SJung-uk Kim 		check_char (sym);
646*7e382390SJung-uk Kim 
647*7e382390SJung-uk Kim 		if (useecs)
648*7e382390SJung-uk Kim 			/* Map NUL's to csize. */
649*7e382390SJung-uk Kim 			mkechar (sym ? sym : csize, nextecm, ecgroup);
650*7e382390SJung-uk Kim 	}
651*7e382390SJung-uk Kim 
652*7e382390SJung-uk Kim 	return lastnfa;
653*7e382390SJung-uk Kim }
654*7e382390SJung-uk Kim 
655*7e382390SJung-uk Kim 
656*7e382390SJung-uk Kim /* mkxtion - make a transition from one state to another
657*7e382390SJung-uk Kim  *
658*7e382390SJung-uk Kim  * synopsis
659*7e382390SJung-uk Kim  *
660*7e382390SJung-uk Kim  *   mkxtion( statefrom, stateto );
661*7e382390SJung-uk Kim  *
662*7e382390SJung-uk Kim  *     statefrom - the state from which the transition is to be made
663*7e382390SJung-uk Kim  *     stateto   - the state to which the transition is to be made
664*7e382390SJung-uk Kim  */
665*7e382390SJung-uk Kim 
mkxtion(int statefrom,int stateto)666*7e382390SJung-uk Kim void    mkxtion (int statefrom, int stateto)
667*7e382390SJung-uk Kim {
668*7e382390SJung-uk Kim 	if (trans1[statefrom] == NO_TRANSITION)
669*7e382390SJung-uk Kim 		trans1[statefrom] = stateto;
670*7e382390SJung-uk Kim 
671*7e382390SJung-uk Kim 	else if ((transchar[statefrom] != SYM_EPSILON) ||
672*7e382390SJung-uk Kim 		 (trans2[statefrom] != NO_TRANSITION))
673*7e382390SJung-uk Kim 		flexfatal (_("found too many transitions in mkxtion()"));
674*7e382390SJung-uk Kim 
675*7e382390SJung-uk Kim 	else {			/* second out-transition for an epsilon state */
676*7e382390SJung-uk Kim 		++eps2;
677*7e382390SJung-uk Kim 		trans2[statefrom] = stateto;
678*7e382390SJung-uk Kim 	}
679*7e382390SJung-uk Kim }
680*7e382390SJung-uk Kim 
681*7e382390SJung-uk Kim /* new_rule - initialize for a new rule */
682*7e382390SJung-uk Kim 
new_rule(void)683*7e382390SJung-uk Kim void    new_rule (void)
684*7e382390SJung-uk Kim {
685*7e382390SJung-uk Kim 	if (++num_rules >= current_max_rules) {
686*7e382390SJung-uk Kim 		++num_reallocs;
687*7e382390SJung-uk Kim 		current_max_rules += MAX_RULES_INCREMENT;
688*7e382390SJung-uk Kim 		rule_type = reallocate_integer_array (rule_type,
689*7e382390SJung-uk Kim 						      current_max_rules);
690*7e382390SJung-uk Kim 		rule_linenum = reallocate_integer_array (rule_linenum,
691*7e382390SJung-uk Kim 							 current_max_rules);
692*7e382390SJung-uk Kim 		rule_useful = reallocate_integer_array (rule_useful,
693*7e382390SJung-uk Kim 							current_max_rules);
694*7e382390SJung-uk Kim 		rule_has_nl = reallocate_bool_array (rule_has_nl,
695*7e382390SJung-uk Kim 						     current_max_rules);
696*7e382390SJung-uk Kim 	}
697*7e382390SJung-uk Kim 
698*7e382390SJung-uk Kim 	if (num_rules > MAX_RULE)
699*7e382390SJung-uk Kim 		lerr (_("too many rules (> %d)!"), MAX_RULE);
700*7e382390SJung-uk Kim 
701*7e382390SJung-uk Kim 	rule_linenum[num_rules] = linenum;
702*7e382390SJung-uk Kim 	rule_useful[num_rules] = false;
703*7e382390SJung-uk Kim 	rule_has_nl[num_rules] = false;
704*7e382390SJung-uk Kim }
705