1*7e382390SJung-uk Kim /* nfa - NFA construction routines */
2*7e382390SJung-uk Kim
3*7e382390SJung-uk Kim /* Copyright (c) 1990 The Regents of the University of California. */
4*7e382390SJung-uk Kim /* All rights reserved. */
5*7e382390SJung-uk Kim
6*7e382390SJung-uk Kim /* This code is derived from software contributed to Berkeley by */
7*7e382390SJung-uk Kim /* Vern Paxson. */
8*7e382390SJung-uk Kim
9*7e382390SJung-uk Kim /* The United States Government has rights in this work pursuant */
10*7e382390SJung-uk Kim /* to contract no. DE-AC03-76SF00098 between the United States */
11*7e382390SJung-uk Kim /* Department of Energy and the University of California. */
12*7e382390SJung-uk Kim
13*7e382390SJung-uk Kim /* This file is part of flex. */
14*7e382390SJung-uk Kim
15*7e382390SJung-uk Kim /* Redistribution and use in source and binary forms, with or without */
16*7e382390SJung-uk Kim /* modification, are permitted provided that the following conditions */
17*7e382390SJung-uk Kim /* are met: */
18*7e382390SJung-uk Kim
19*7e382390SJung-uk Kim /* 1. Redistributions of source code must retain the above copyright */
20*7e382390SJung-uk Kim /* notice, this list of conditions and the following disclaimer. */
21*7e382390SJung-uk Kim /* 2. Redistributions in binary form must reproduce the above copyright */
22*7e382390SJung-uk Kim /* notice, this list of conditions and the following disclaimer in the */
23*7e382390SJung-uk Kim /* documentation and/or other materials provided with the distribution. */
24*7e382390SJung-uk Kim
25*7e382390SJung-uk Kim /* Neither the name of the University nor the names of its contributors */
26*7e382390SJung-uk Kim /* may be used to endorse or promote products derived from this software */
27*7e382390SJung-uk Kim /* without specific prior written permission. */
28*7e382390SJung-uk Kim
29*7e382390SJung-uk Kim /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30*7e382390SJung-uk Kim /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31*7e382390SJung-uk Kim /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32*7e382390SJung-uk Kim /* PURPOSE. */
33*7e382390SJung-uk Kim
34*7e382390SJung-uk Kim #include "flexdef.h"
35*7e382390SJung-uk Kim
36*7e382390SJung-uk Kim
37*7e382390SJung-uk Kim /* declare functions that have forward references */
38*7e382390SJung-uk Kim
39*7e382390SJung-uk Kim int dupmachine(int);
40*7e382390SJung-uk Kim void mkxtion(int, int);
41*7e382390SJung-uk Kim
42*7e382390SJung-uk Kim
43*7e382390SJung-uk Kim /* add_accept - add an accepting state to a machine
44*7e382390SJung-uk Kim *
45*7e382390SJung-uk Kim * accepting_number becomes mach's accepting number.
46*7e382390SJung-uk Kim */
47*7e382390SJung-uk Kim
add_accept(int mach,int accepting_number)48*7e382390SJung-uk Kim void add_accept (int mach, int accepting_number)
49*7e382390SJung-uk Kim {
50*7e382390SJung-uk Kim /* Hang the accepting number off an epsilon state. if it is associated
51*7e382390SJung-uk Kim * with a state that has a non-epsilon out-transition, then the state
52*7e382390SJung-uk Kim * will accept BEFORE it makes that transition, i.e., one character
53*7e382390SJung-uk Kim * too soon.
54*7e382390SJung-uk Kim */
55*7e382390SJung-uk Kim
56*7e382390SJung-uk Kim if (transchar[finalst[mach]] == SYM_EPSILON)
57*7e382390SJung-uk Kim accptnum[finalst[mach]] = accepting_number;
58*7e382390SJung-uk Kim
59*7e382390SJung-uk Kim else {
60*7e382390SJung-uk Kim int astate = mkstate (SYM_EPSILON);
61*7e382390SJung-uk Kim
62*7e382390SJung-uk Kim accptnum[astate] = accepting_number;
63*7e382390SJung-uk Kim (void) link_machines (mach, astate);
64*7e382390SJung-uk Kim }
65*7e382390SJung-uk Kim }
66*7e382390SJung-uk Kim
67*7e382390SJung-uk Kim
68*7e382390SJung-uk Kim /* copysingl - make a given number of copies of a singleton machine
69*7e382390SJung-uk Kim *
70*7e382390SJung-uk Kim * synopsis
71*7e382390SJung-uk Kim *
72*7e382390SJung-uk Kim * newsng = copysingl( singl, num );
73*7e382390SJung-uk Kim *
74*7e382390SJung-uk Kim * newsng - a new singleton composed of num copies of singl
75*7e382390SJung-uk Kim * singl - a singleton machine
76*7e382390SJung-uk Kim * num - the number of copies of singl to be present in newsng
77*7e382390SJung-uk Kim */
78*7e382390SJung-uk Kim
copysingl(int singl,int num)79*7e382390SJung-uk Kim int copysingl (int singl, int num)
80*7e382390SJung-uk Kim {
81*7e382390SJung-uk Kim int copy, i;
82*7e382390SJung-uk Kim
83*7e382390SJung-uk Kim copy = mkstate (SYM_EPSILON);
84*7e382390SJung-uk Kim
85*7e382390SJung-uk Kim for (i = 1; i <= num; ++i)
86*7e382390SJung-uk Kim copy = link_machines (copy, dupmachine (singl));
87*7e382390SJung-uk Kim
88*7e382390SJung-uk Kim return copy;
89*7e382390SJung-uk Kim }
90*7e382390SJung-uk Kim
91*7e382390SJung-uk Kim
92*7e382390SJung-uk Kim /* dumpnfa - debugging routine to write out an nfa */
93*7e382390SJung-uk Kim
dumpnfa(int state1)94*7e382390SJung-uk Kim void dumpnfa (int state1)
95*7e382390SJung-uk Kim {
96*7e382390SJung-uk Kim int sym, tsp1, tsp2, anum, ns;
97*7e382390SJung-uk Kim
98*7e382390SJung-uk Kim fprintf (stderr,
99*7e382390SJung-uk Kim _
100*7e382390SJung-uk Kim ("\n\n********** beginning dump of nfa with start state %d\n"),
101*7e382390SJung-uk Kim state1);
102*7e382390SJung-uk Kim
103*7e382390SJung-uk Kim /* We probably should loop starting at firstst[state1] and going to
104*7e382390SJung-uk Kim * lastst[state1], but they're not maintained properly when we "or"
105*7e382390SJung-uk Kim * all of the rules together. So we use our knowledge that the machine
106*7e382390SJung-uk Kim * starts at state 1 and ends at lastnfa.
107*7e382390SJung-uk Kim */
108*7e382390SJung-uk Kim
109*7e382390SJung-uk Kim /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
110*7e382390SJung-uk Kim for (ns = 1; ns <= lastnfa; ++ns) {
111*7e382390SJung-uk Kim fprintf (stderr, _("state # %4d\t"), ns);
112*7e382390SJung-uk Kim
113*7e382390SJung-uk Kim sym = transchar[ns];
114*7e382390SJung-uk Kim tsp1 = trans1[ns];
115*7e382390SJung-uk Kim tsp2 = trans2[ns];
116*7e382390SJung-uk Kim anum = accptnum[ns];
117*7e382390SJung-uk Kim
118*7e382390SJung-uk Kim fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
119*7e382390SJung-uk Kim
120*7e382390SJung-uk Kim if (anum != NIL)
121*7e382390SJung-uk Kim fprintf (stderr, " [%d]", anum);
122*7e382390SJung-uk Kim
123*7e382390SJung-uk Kim fprintf (stderr, "\n");
124*7e382390SJung-uk Kim }
125*7e382390SJung-uk Kim
126*7e382390SJung-uk Kim fprintf (stderr, _("********** end of dump\n"));
127*7e382390SJung-uk Kim }
128*7e382390SJung-uk Kim
129*7e382390SJung-uk Kim
130*7e382390SJung-uk Kim /* dupmachine - make a duplicate of a given machine
131*7e382390SJung-uk Kim *
132*7e382390SJung-uk Kim * synopsis
133*7e382390SJung-uk Kim *
134*7e382390SJung-uk Kim * copy = dupmachine( mach );
135*7e382390SJung-uk Kim *
136*7e382390SJung-uk Kim * copy - holds duplicate of mach
137*7e382390SJung-uk Kim * mach - machine to be duplicated
138*7e382390SJung-uk Kim *
139*7e382390SJung-uk Kim * note that the copy of mach is NOT an exact duplicate; rather, all the
140*7e382390SJung-uk Kim * transition states values are adjusted so that the copy is self-contained,
141*7e382390SJung-uk Kim * as the original should have been.
142*7e382390SJung-uk Kim *
143*7e382390SJung-uk Kim * also note that the original MUST be contiguous, with its low and high
144*7e382390SJung-uk Kim * states accessible by the arrays firstst and lastst
145*7e382390SJung-uk Kim */
146*7e382390SJung-uk Kim
dupmachine(int mach)147*7e382390SJung-uk Kim int dupmachine (int mach)
148*7e382390SJung-uk Kim {
149*7e382390SJung-uk Kim int i, init, state_offset;
150*7e382390SJung-uk Kim int state = 0;
151*7e382390SJung-uk Kim int last = lastst[mach];
152*7e382390SJung-uk Kim
153*7e382390SJung-uk Kim for (i = firstst[mach]; i <= last; ++i) {
154*7e382390SJung-uk Kim state = mkstate (transchar[i]);
155*7e382390SJung-uk Kim
156*7e382390SJung-uk Kim if (trans1[i] != NO_TRANSITION) {
157*7e382390SJung-uk Kim mkxtion (finalst[state], trans1[i] + state - i);
158*7e382390SJung-uk Kim
159*7e382390SJung-uk Kim if (transchar[i] == SYM_EPSILON &&
160*7e382390SJung-uk Kim trans2[i] != NO_TRANSITION)
161*7e382390SJung-uk Kim mkxtion (finalst[state],
162*7e382390SJung-uk Kim trans2[i] + state - i);
163*7e382390SJung-uk Kim }
164*7e382390SJung-uk Kim
165*7e382390SJung-uk Kim accptnum[state] = accptnum[i];
166*7e382390SJung-uk Kim }
167*7e382390SJung-uk Kim
168*7e382390SJung-uk Kim if (state == 0)
169*7e382390SJung-uk Kim flexfatal (_("empty machine in dupmachine()"));
170*7e382390SJung-uk Kim
171*7e382390SJung-uk Kim state_offset = state - i + 1;
172*7e382390SJung-uk Kim
173*7e382390SJung-uk Kim init = mach + state_offset;
174*7e382390SJung-uk Kim firstst[init] = firstst[mach] + state_offset;
175*7e382390SJung-uk Kim finalst[init] = finalst[mach] + state_offset;
176*7e382390SJung-uk Kim lastst[init] = lastst[mach] + state_offset;
177*7e382390SJung-uk Kim
178*7e382390SJung-uk Kim return init;
179*7e382390SJung-uk Kim }
180*7e382390SJung-uk Kim
181*7e382390SJung-uk Kim
182*7e382390SJung-uk Kim /* finish_rule - finish up the processing for a rule
183*7e382390SJung-uk Kim *
184*7e382390SJung-uk Kim * An accepting number is added to the given machine. If variable_trail_rule
185*7e382390SJung-uk Kim * is true then the rule has trailing context and both the head and trail
186*7e382390SJung-uk Kim * are variable size. Otherwise if headcnt or trailcnt is non-zero then
187*7e382390SJung-uk Kim * the machine recognizes a pattern with trailing context and headcnt is
188*7e382390SJung-uk Kim * the number of characters in the matched part of the pattern, or zero
189*7e382390SJung-uk Kim * if the matched part has variable length. trailcnt is the number of
190*7e382390SJung-uk Kim * trailing context characters in the pattern, or zero if the trailing
191*7e382390SJung-uk Kim * context has variable length.
192*7e382390SJung-uk Kim */
193*7e382390SJung-uk Kim
finish_rule(int mach,int variable_trail_rule,int headcnt,int trailcnt,int pcont_act)194*7e382390SJung-uk Kim void finish_rule (int mach, int variable_trail_rule, int headcnt, int trailcnt,
195*7e382390SJung-uk Kim int pcont_act)
196*7e382390SJung-uk Kim {
197*7e382390SJung-uk Kim char action_text[MAXLINE];
198*7e382390SJung-uk Kim
199*7e382390SJung-uk Kim add_accept (mach, num_rules);
200*7e382390SJung-uk Kim
201*7e382390SJung-uk Kim /* We did this in new_rule(), but it often gets the wrong
202*7e382390SJung-uk Kim * number because we do it before we start parsing the current rule.
203*7e382390SJung-uk Kim */
204*7e382390SJung-uk Kim rule_linenum[num_rules] = linenum;
205*7e382390SJung-uk Kim
206*7e382390SJung-uk Kim /* If this is a continued action, then the line-number has already
207*7e382390SJung-uk Kim * been updated, giving us the wrong number.
208*7e382390SJung-uk Kim */
209*7e382390SJung-uk Kim if (continued_action)
210*7e382390SJung-uk Kim --rule_linenum[num_rules];
211*7e382390SJung-uk Kim
212*7e382390SJung-uk Kim
213*7e382390SJung-uk Kim /* If the previous rule was continued action, then we inherit the
214*7e382390SJung-uk Kim * previous newline flag, possibly overriding the current one.
215*7e382390SJung-uk Kim */
216*7e382390SJung-uk Kim if (pcont_act && rule_has_nl[num_rules - 1])
217*7e382390SJung-uk Kim rule_has_nl[num_rules] = true;
218*7e382390SJung-uk Kim
219*7e382390SJung-uk Kim snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
220*7e382390SJung-uk Kim add_action (action_text);
221*7e382390SJung-uk Kim if (rule_has_nl[num_rules]) {
222*7e382390SJung-uk Kim snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
223*7e382390SJung-uk Kim num_rules);
224*7e382390SJung-uk Kim add_action (action_text);
225*7e382390SJung-uk Kim }
226*7e382390SJung-uk Kim
227*7e382390SJung-uk Kim
228*7e382390SJung-uk Kim if (variable_trail_rule) {
229*7e382390SJung-uk Kim rule_type[num_rules] = RULE_VARIABLE;
230*7e382390SJung-uk Kim
231*7e382390SJung-uk Kim if (performance_report > 0)
232*7e382390SJung-uk Kim fprintf (stderr,
233*7e382390SJung-uk Kim _
234*7e382390SJung-uk Kim ("Variable trailing context rule at line %d\n"),
235*7e382390SJung-uk Kim rule_linenum[num_rules]);
236*7e382390SJung-uk Kim
237*7e382390SJung-uk Kim variable_trailing_context_rules = true;
238*7e382390SJung-uk Kim }
239*7e382390SJung-uk Kim
240*7e382390SJung-uk Kim else {
241*7e382390SJung-uk Kim rule_type[num_rules] = RULE_NORMAL;
242*7e382390SJung-uk Kim
243*7e382390SJung-uk Kim if (headcnt > 0 || trailcnt > 0) {
244*7e382390SJung-uk Kim /* Do trailing context magic to not match the trailing
245*7e382390SJung-uk Kim * characters.
246*7e382390SJung-uk Kim */
247*7e382390SJung-uk Kim char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
248*7e382390SJung-uk Kim char *scanner_bp = "yy_bp";
249*7e382390SJung-uk Kim
250*7e382390SJung-uk Kim add_action
251*7e382390SJung-uk Kim ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
252*7e382390SJung-uk Kim
253*7e382390SJung-uk Kim if (headcnt > 0) {
254*7e382390SJung-uk Kim if (rule_has_nl[num_rules]) {
255*7e382390SJung-uk Kim snprintf (action_text, sizeof(action_text),
256*7e382390SJung-uk Kim "YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt);
257*7e382390SJung-uk Kim add_action (action_text);
258*7e382390SJung-uk Kim }
259*7e382390SJung-uk Kim snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
260*7e382390SJung-uk Kim scanner_cp, scanner_bp, headcnt);
261*7e382390SJung-uk Kim add_action (action_text);
262*7e382390SJung-uk Kim }
263*7e382390SJung-uk Kim
264*7e382390SJung-uk Kim else {
265*7e382390SJung-uk Kim if (rule_has_nl[num_rules]) {
266*7e382390SJung-uk Kim snprintf (action_text, sizeof(action_text),
267*7e382390SJung-uk Kim "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt);
268*7e382390SJung-uk Kim add_action (action_text);
269*7e382390SJung-uk Kim }
270*7e382390SJung-uk Kim
271*7e382390SJung-uk Kim snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
272*7e382390SJung-uk Kim scanner_cp, trailcnt);
273*7e382390SJung-uk Kim add_action (action_text);
274*7e382390SJung-uk Kim }
275*7e382390SJung-uk Kim
276*7e382390SJung-uk Kim add_action
277*7e382390SJung-uk Kim ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
278*7e382390SJung-uk Kim }
279*7e382390SJung-uk Kim }
280*7e382390SJung-uk Kim
281*7e382390SJung-uk Kim /* Okay, in the action code at this point yytext and yyleng have
282*7e382390SJung-uk Kim * their proper final values for this rule, so here's the point
283*7e382390SJung-uk Kim * to do any user action. But don't do it for continued actions,
284*7e382390SJung-uk Kim * as that'll result in multiple YY_RULE_SETUP's.
285*7e382390SJung-uk Kim */
286*7e382390SJung-uk Kim if (!continued_action)
287*7e382390SJung-uk Kim add_action ("YY_RULE_SETUP\n");
288*7e382390SJung-uk Kim
289*7e382390SJung-uk Kim line_directive_out(NULL, 1);
290*7e382390SJung-uk Kim add_action("[[");
291*7e382390SJung-uk Kim }
292*7e382390SJung-uk Kim
293*7e382390SJung-uk Kim
294*7e382390SJung-uk Kim /* link_machines - connect two machines together
295*7e382390SJung-uk Kim *
296*7e382390SJung-uk Kim * synopsis
297*7e382390SJung-uk Kim *
298*7e382390SJung-uk Kim * new = link_machines( first, last );
299*7e382390SJung-uk Kim *
300*7e382390SJung-uk Kim * new - a machine constructed by connecting first to last
301*7e382390SJung-uk Kim * first - the machine whose successor is to be last
302*7e382390SJung-uk Kim * last - the machine whose predecessor is to be first
303*7e382390SJung-uk Kim *
304*7e382390SJung-uk Kim * note: this routine concatenates the machine first with the machine
305*7e382390SJung-uk Kim * last to produce a machine new which will pattern-match first first
306*7e382390SJung-uk Kim * and then last, and will fail if either of the sub-patterns fails.
307*7e382390SJung-uk Kim * FIRST is set to new by the operation. last is unmolested.
308*7e382390SJung-uk Kim */
309*7e382390SJung-uk Kim
link_machines(int first,int last)310*7e382390SJung-uk Kim int link_machines (int first, int last)
311*7e382390SJung-uk Kim {
312*7e382390SJung-uk Kim if (first == NIL)
313*7e382390SJung-uk Kim return last;
314*7e382390SJung-uk Kim
315*7e382390SJung-uk Kim else if (last == NIL)
316*7e382390SJung-uk Kim return first;
317*7e382390SJung-uk Kim
318*7e382390SJung-uk Kim else {
319*7e382390SJung-uk Kim mkxtion (finalst[first], last);
320*7e382390SJung-uk Kim finalst[first] = finalst[last];
321*7e382390SJung-uk Kim lastst[first] = MAX (lastst[first], lastst[last]);
322*7e382390SJung-uk Kim firstst[first] = MIN (firstst[first], firstst[last]);
323*7e382390SJung-uk Kim
324*7e382390SJung-uk Kim return first;
325*7e382390SJung-uk Kim }
326*7e382390SJung-uk Kim }
327*7e382390SJung-uk Kim
328*7e382390SJung-uk Kim
329*7e382390SJung-uk Kim /* mark_beginning_as_normal - mark each "beginning" state in a machine
330*7e382390SJung-uk Kim * as being a "normal" (i.e., not trailing context-
331*7e382390SJung-uk Kim * associated) states
332*7e382390SJung-uk Kim *
333*7e382390SJung-uk Kim * The "beginning" states are the epsilon closure of the first state
334*7e382390SJung-uk Kim */
335*7e382390SJung-uk Kim
mark_beginning_as_normal(int mach)336*7e382390SJung-uk Kim void mark_beginning_as_normal (int mach)
337*7e382390SJung-uk Kim {
338*7e382390SJung-uk Kim switch (state_type[mach]) {
339*7e382390SJung-uk Kim case STATE_NORMAL:
340*7e382390SJung-uk Kim /* Oh, we've already visited here. */
341*7e382390SJung-uk Kim return;
342*7e382390SJung-uk Kim
343*7e382390SJung-uk Kim case STATE_TRAILING_CONTEXT:
344*7e382390SJung-uk Kim state_type[mach] = STATE_NORMAL;
345*7e382390SJung-uk Kim
346*7e382390SJung-uk Kim if (transchar[mach] == SYM_EPSILON) {
347*7e382390SJung-uk Kim if (trans1[mach] != NO_TRANSITION)
348*7e382390SJung-uk Kim mark_beginning_as_normal (trans1[mach]);
349*7e382390SJung-uk Kim
350*7e382390SJung-uk Kim if (trans2[mach] != NO_TRANSITION)
351*7e382390SJung-uk Kim mark_beginning_as_normal (trans2[mach]);
352*7e382390SJung-uk Kim }
353*7e382390SJung-uk Kim break;
354*7e382390SJung-uk Kim
355*7e382390SJung-uk Kim default:
356*7e382390SJung-uk Kim flexerror (_
357*7e382390SJung-uk Kim ("bad state type in mark_beginning_as_normal()"));
358*7e382390SJung-uk Kim break;
359*7e382390SJung-uk Kim }
360*7e382390SJung-uk Kim }
361*7e382390SJung-uk Kim
362*7e382390SJung-uk Kim
363*7e382390SJung-uk Kim /* mkbranch - make a machine that branches to two machines
364*7e382390SJung-uk Kim *
365*7e382390SJung-uk Kim * synopsis
366*7e382390SJung-uk Kim *
367*7e382390SJung-uk Kim * branch = mkbranch( first, second );
368*7e382390SJung-uk Kim *
369*7e382390SJung-uk Kim * branch - a machine which matches either first's pattern or second's
370*7e382390SJung-uk Kim * first, second - machines whose patterns are to be or'ed (the | operator)
371*7e382390SJung-uk Kim *
372*7e382390SJung-uk Kim * Note that first and second are NEITHER destroyed by the operation. Also,
373*7e382390SJung-uk Kim * the resulting machine CANNOT be used with any other "mk" operation except
374*7e382390SJung-uk Kim * more mkbranch's. Compare with mkor()
375*7e382390SJung-uk Kim */
376*7e382390SJung-uk Kim
mkbranch(int first,int second)377*7e382390SJung-uk Kim int mkbranch (int first, int second)
378*7e382390SJung-uk Kim {
379*7e382390SJung-uk Kim int eps;
380*7e382390SJung-uk Kim
381*7e382390SJung-uk Kim if (first == NO_TRANSITION)
382*7e382390SJung-uk Kim return second;
383*7e382390SJung-uk Kim
384*7e382390SJung-uk Kim else if (second == NO_TRANSITION)
385*7e382390SJung-uk Kim return first;
386*7e382390SJung-uk Kim
387*7e382390SJung-uk Kim eps = mkstate (SYM_EPSILON);
388*7e382390SJung-uk Kim
389*7e382390SJung-uk Kim mkxtion (eps, first);
390*7e382390SJung-uk Kim mkxtion (eps, second);
391*7e382390SJung-uk Kim
392*7e382390SJung-uk Kim return eps;
393*7e382390SJung-uk Kim }
394*7e382390SJung-uk Kim
395*7e382390SJung-uk Kim
396*7e382390SJung-uk Kim /* mkclos - convert a machine into a closure
397*7e382390SJung-uk Kim *
398*7e382390SJung-uk Kim * synopsis
399*7e382390SJung-uk Kim * new = mkclos( state );
400*7e382390SJung-uk Kim *
401*7e382390SJung-uk Kim * new - a new state which matches the closure of "state"
402*7e382390SJung-uk Kim */
403*7e382390SJung-uk Kim
mkclos(int state)404*7e382390SJung-uk Kim int mkclos (int state)
405*7e382390SJung-uk Kim {
406*7e382390SJung-uk Kim return mkopt (mkposcl (state));
407*7e382390SJung-uk Kim }
408*7e382390SJung-uk Kim
409*7e382390SJung-uk Kim
410*7e382390SJung-uk Kim /* mkopt - make a machine optional
411*7e382390SJung-uk Kim *
412*7e382390SJung-uk Kim * synopsis
413*7e382390SJung-uk Kim *
414*7e382390SJung-uk Kim * new = mkopt( mach );
415*7e382390SJung-uk Kim *
416*7e382390SJung-uk Kim * new - a machine which optionally matches whatever mach matched
417*7e382390SJung-uk Kim * mach - the machine to make optional
418*7e382390SJung-uk Kim *
419*7e382390SJung-uk Kim * notes:
420*7e382390SJung-uk Kim * 1. mach must be the last machine created
421*7e382390SJung-uk Kim * 2. mach is destroyed by the call
422*7e382390SJung-uk Kim */
423*7e382390SJung-uk Kim
mkopt(int mach)424*7e382390SJung-uk Kim int mkopt (int mach)
425*7e382390SJung-uk Kim {
426*7e382390SJung-uk Kim int eps;
427*7e382390SJung-uk Kim
428*7e382390SJung-uk Kim if (!SUPER_FREE_EPSILON (finalst[mach])) {
429*7e382390SJung-uk Kim eps = mkstate (SYM_EPSILON);
430*7e382390SJung-uk Kim mach = link_machines (mach, eps);
431*7e382390SJung-uk Kim }
432*7e382390SJung-uk Kim
433*7e382390SJung-uk Kim /* Can't skimp on the following if FREE_EPSILON(mach) is true because
434*7e382390SJung-uk Kim * some state interior to "mach" might point back to the beginning
435*7e382390SJung-uk Kim * for a closure.
436*7e382390SJung-uk Kim */
437*7e382390SJung-uk Kim eps = mkstate (SYM_EPSILON);
438*7e382390SJung-uk Kim mach = link_machines (eps, mach);
439*7e382390SJung-uk Kim
440*7e382390SJung-uk Kim mkxtion (mach, finalst[mach]);
441*7e382390SJung-uk Kim
442*7e382390SJung-uk Kim return mach;
443*7e382390SJung-uk Kim }
444*7e382390SJung-uk Kim
445*7e382390SJung-uk Kim
446*7e382390SJung-uk Kim /* mkor - make a machine that matches either one of two machines
447*7e382390SJung-uk Kim *
448*7e382390SJung-uk Kim * synopsis
449*7e382390SJung-uk Kim *
450*7e382390SJung-uk Kim * new = mkor( first, second );
451*7e382390SJung-uk Kim *
452*7e382390SJung-uk Kim * new - a machine which matches either first's pattern or second's
453*7e382390SJung-uk Kim * first, second - machines whose patterns are to be or'ed (the | operator)
454*7e382390SJung-uk Kim *
455*7e382390SJung-uk Kim * note that first and second are both destroyed by the operation
456*7e382390SJung-uk Kim * the code is rather convoluted because an attempt is made to minimize
457*7e382390SJung-uk Kim * the number of epsilon states needed
458*7e382390SJung-uk Kim */
459*7e382390SJung-uk Kim
mkor(int first,int second)460*7e382390SJung-uk Kim int mkor (int first, int second)
461*7e382390SJung-uk Kim {
462*7e382390SJung-uk Kim int eps, orend;
463*7e382390SJung-uk Kim
464*7e382390SJung-uk Kim if (first == NIL)
465*7e382390SJung-uk Kim return second;
466*7e382390SJung-uk Kim
467*7e382390SJung-uk Kim else if (second == NIL)
468*7e382390SJung-uk Kim return first;
469*7e382390SJung-uk Kim
470*7e382390SJung-uk Kim else {
471*7e382390SJung-uk Kim /* See comment in mkopt() about why we can't use the first
472*7e382390SJung-uk Kim * state of "first" or "second" if they satisfy "FREE_EPSILON".
473*7e382390SJung-uk Kim */
474*7e382390SJung-uk Kim eps = mkstate (SYM_EPSILON);
475*7e382390SJung-uk Kim
476*7e382390SJung-uk Kim first = link_machines (eps, first);
477*7e382390SJung-uk Kim
478*7e382390SJung-uk Kim mkxtion (first, second);
479*7e382390SJung-uk Kim
480*7e382390SJung-uk Kim if (SUPER_FREE_EPSILON (finalst[first]) &&
481*7e382390SJung-uk Kim accptnum[finalst[first]] == NIL) {
482*7e382390SJung-uk Kim orend = finalst[first];
483*7e382390SJung-uk Kim mkxtion (finalst[second], orend);
484*7e382390SJung-uk Kim }
485*7e382390SJung-uk Kim
486*7e382390SJung-uk Kim else if (SUPER_FREE_EPSILON (finalst[second]) &&
487*7e382390SJung-uk Kim accptnum[finalst[second]] == NIL) {
488*7e382390SJung-uk Kim orend = finalst[second];
489*7e382390SJung-uk Kim mkxtion (finalst[first], orend);
490*7e382390SJung-uk Kim }
491*7e382390SJung-uk Kim
492*7e382390SJung-uk Kim else {
493*7e382390SJung-uk Kim eps = mkstate (SYM_EPSILON);
494*7e382390SJung-uk Kim
495*7e382390SJung-uk Kim first = link_machines (first, eps);
496*7e382390SJung-uk Kim orend = finalst[first];
497*7e382390SJung-uk Kim
498*7e382390SJung-uk Kim mkxtion (finalst[second], orend);
499*7e382390SJung-uk Kim }
500*7e382390SJung-uk Kim }
501*7e382390SJung-uk Kim
502*7e382390SJung-uk Kim finalst[first] = orend;
503*7e382390SJung-uk Kim return first;
504*7e382390SJung-uk Kim }
505*7e382390SJung-uk Kim
506*7e382390SJung-uk Kim
507*7e382390SJung-uk Kim /* mkposcl - convert a machine into a positive closure
508*7e382390SJung-uk Kim *
509*7e382390SJung-uk Kim * synopsis
510*7e382390SJung-uk Kim * new = mkposcl( state );
511*7e382390SJung-uk Kim *
512*7e382390SJung-uk Kim * new - a machine matching the positive closure of "state"
513*7e382390SJung-uk Kim */
514*7e382390SJung-uk Kim
mkposcl(int state)515*7e382390SJung-uk Kim int mkposcl (int state)
516*7e382390SJung-uk Kim {
517*7e382390SJung-uk Kim int eps;
518*7e382390SJung-uk Kim
519*7e382390SJung-uk Kim if (SUPER_FREE_EPSILON (finalst[state])) {
520*7e382390SJung-uk Kim mkxtion (finalst[state], state);
521*7e382390SJung-uk Kim return state;
522*7e382390SJung-uk Kim }
523*7e382390SJung-uk Kim
524*7e382390SJung-uk Kim else {
525*7e382390SJung-uk Kim eps = mkstate (SYM_EPSILON);
526*7e382390SJung-uk Kim mkxtion (eps, state);
527*7e382390SJung-uk Kim return link_machines (state, eps);
528*7e382390SJung-uk Kim }
529*7e382390SJung-uk Kim }
530*7e382390SJung-uk Kim
531*7e382390SJung-uk Kim
532*7e382390SJung-uk Kim /* mkrep - make a replicated machine
533*7e382390SJung-uk Kim *
534*7e382390SJung-uk Kim * synopsis
535*7e382390SJung-uk Kim * new = mkrep( mach, lb, ub );
536*7e382390SJung-uk Kim *
537*7e382390SJung-uk Kim * new - a machine that matches whatever "mach" matched from "lb"
538*7e382390SJung-uk Kim * number of times to "ub" number of times
539*7e382390SJung-uk Kim *
540*7e382390SJung-uk Kim * note
541*7e382390SJung-uk Kim * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
542*7e382390SJung-uk Kim */
543*7e382390SJung-uk Kim
mkrep(int mach,int lb,int ub)544*7e382390SJung-uk Kim int mkrep (int mach, int lb, int ub)
545*7e382390SJung-uk Kim {
546*7e382390SJung-uk Kim int base_mach, tail, copy, i;
547*7e382390SJung-uk Kim
548*7e382390SJung-uk Kim base_mach = copysingl (mach, lb - 1);
549*7e382390SJung-uk Kim
550*7e382390SJung-uk Kim if (ub == INFINITE_REPEAT) {
551*7e382390SJung-uk Kim copy = dupmachine (mach);
552*7e382390SJung-uk Kim mach = link_machines (mach,
553*7e382390SJung-uk Kim link_machines (base_mach,
554*7e382390SJung-uk Kim mkclos (copy)));
555*7e382390SJung-uk Kim }
556*7e382390SJung-uk Kim
557*7e382390SJung-uk Kim else {
558*7e382390SJung-uk Kim tail = mkstate (SYM_EPSILON);
559*7e382390SJung-uk Kim
560*7e382390SJung-uk Kim for (i = lb; i < ub; ++i) {
561*7e382390SJung-uk Kim copy = dupmachine (mach);
562*7e382390SJung-uk Kim tail = mkopt (link_machines (copy, tail));
563*7e382390SJung-uk Kim }
564*7e382390SJung-uk Kim
565*7e382390SJung-uk Kim mach =
566*7e382390SJung-uk Kim link_machines (mach,
567*7e382390SJung-uk Kim link_machines (base_mach, tail));
568*7e382390SJung-uk Kim }
569*7e382390SJung-uk Kim
570*7e382390SJung-uk Kim return mach;
571*7e382390SJung-uk Kim }
572*7e382390SJung-uk Kim
573*7e382390SJung-uk Kim
574*7e382390SJung-uk Kim /* mkstate - create a state with a transition on a given symbol
575*7e382390SJung-uk Kim *
576*7e382390SJung-uk Kim * synopsis
577*7e382390SJung-uk Kim *
578*7e382390SJung-uk Kim * state = mkstate( sym );
579*7e382390SJung-uk Kim *
580*7e382390SJung-uk Kim * state - a new state matching sym
581*7e382390SJung-uk Kim * sym - the symbol the new state is to have an out-transition on
582*7e382390SJung-uk Kim *
583*7e382390SJung-uk Kim * note that this routine makes new states in ascending order through the
584*7e382390SJung-uk Kim * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
585*7e382390SJung-uk Kim * relies on machines being made in ascending order and that they are
586*7e382390SJung-uk Kim * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
587*7e382390SJung-uk Kim * that it admittedly is)
588*7e382390SJung-uk Kim */
589*7e382390SJung-uk Kim
mkstate(int sym)590*7e382390SJung-uk Kim int mkstate (int sym)
591*7e382390SJung-uk Kim {
592*7e382390SJung-uk Kim if (++lastnfa >= current_mns) {
593*7e382390SJung-uk Kim if ((current_mns += MNS_INCREMENT) >= maximum_mns)
594*7e382390SJung-uk Kim lerr(_
595*7e382390SJung-uk Kim ("input rules are too complicated (>= %d NFA states)"),
596*7e382390SJung-uk Kim current_mns);
597*7e382390SJung-uk Kim
598*7e382390SJung-uk Kim ++num_reallocs;
599*7e382390SJung-uk Kim
600*7e382390SJung-uk Kim firstst = reallocate_integer_array (firstst, current_mns);
601*7e382390SJung-uk Kim lastst = reallocate_integer_array (lastst, current_mns);
602*7e382390SJung-uk Kim finalst = reallocate_integer_array (finalst, current_mns);
603*7e382390SJung-uk Kim transchar =
604*7e382390SJung-uk Kim reallocate_integer_array (transchar, current_mns);
605*7e382390SJung-uk Kim trans1 = reallocate_integer_array (trans1, current_mns);
606*7e382390SJung-uk Kim trans2 = reallocate_integer_array (trans2, current_mns);
607*7e382390SJung-uk Kim accptnum =
608*7e382390SJung-uk Kim reallocate_integer_array (accptnum, current_mns);
609*7e382390SJung-uk Kim assoc_rule =
610*7e382390SJung-uk Kim reallocate_integer_array (assoc_rule, current_mns);
611*7e382390SJung-uk Kim state_type =
612*7e382390SJung-uk Kim reallocate_integer_array (state_type, current_mns);
613*7e382390SJung-uk Kim }
614*7e382390SJung-uk Kim
615*7e382390SJung-uk Kim firstst[lastnfa] = lastnfa;
616*7e382390SJung-uk Kim finalst[lastnfa] = lastnfa;
617*7e382390SJung-uk Kim lastst[lastnfa] = lastnfa;
618*7e382390SJung-uk Kim transchar[lastnfa] = sym;
619*7e382390SJung-uk Kim trans1[lastnfa] = NO_TRANSITION;
620*7e382390SJung-uk Kim trans2[lastnfa] = NO_TRANSITION;
621*7e382390SJung-uk Kim accptnum[lastnfa] = NIL;
622*7e382390SJung-uk Kim assoc_rule[lastnfa] = num_rules;
623*7e382390SJung-uk Kim state_type[lastnfa] = current_state_type;
624*7e382390SJung-uk Kim
625*7e382390SJung-uk Kim /* Fix up equivalence classes base on this transition. Note that any
626*7e382390SJung-uk Kim * character which has its own transition gets its own equivalence
627*7e382390SJung-uk Kim * class. Thus only characters which are only in character classes
628*7e382390SJung-uk Kim * have a chance at being in the same equivalence class. E.g. "a|b"
629*7e382390SJung-uk Kim * puts 'a' and 'b' into two different equivalence classes. "[ab]"
630*7e382390SJung-uk Kim * puts them in the same equivalence class (barring other differences
631*7e382390SJung-uk Kim * elsewhere in the input).
632*7e382390SJung-uk Kim */
633*7e382390SJung-uk Kim
634*7e382390SJung-uk Kim if (sym < 0) {
635*7e382390SJung-uk Kim /* We don't have to update the equivalence classes since
636*7e382390SJung-uk Kim * that was already done when the ccl was created for the
637*7e382390SJung-uk Kim * first time.
638*7e382390SJung-uk Kim */
639*7e382390SJung-uk Kim }
640*7e382390SJung-uk Kim
641*7e382390SJung-uk Kim else if (sym == SYM_EPSILON)
642*7e382390SJung-uk Kim ++numeps;
643*7e382390SJung-uk Kim
644*7e382390SJung-uk Kim else {
645*7e382390SJung-uk Kim check_char (sym);
646*7e382390SJung-uk Kim
647*7e382390SJung-uk Kim if (useecs)
648*7e382390SJung-uk Kim /* Map NUL's to csize. */
649*7e382390SJung-uk Kim mkechar (sym ? sym : csize, nextecm, ecgroup);
650*7e382390SJung-uk Kim }
651*7e382390SJung-uk Kim
652*7e382390SJung-uk Kim return lastnfa;
653*7e382390SJung-uk Kim }
654*7e382390SJung-uk Kim
655*7e382390SJung-uk Kim
656*7e382390SJung-uk Kim /* mkxtion - make a transition from one state to another
657*7e382390SJung-uk Kim *
658*7e382390SJung-uk Kim * synopsis
659*7e382390SJung-uk Kim *
660*7e382390SJung-uk Kim * mkxtion( statefrom, stateto );
661*7e382390SJung-uk Kim *
662*7e382390SJung-uk Kim * statefrom - the state from which the transition is to be made
663*7e382390SJung-uk Kim * stateto - the state to which the transition is to be made
664*7e382390SJung-uk Kim */
665*7e382390SJung-uk Kim
mkxtion(int statefrom,int stateto)666*7e382390SJung-uk Kim void mkxtion (int statefrom, int stateto)
667*7e382390SJung-uk Kim {
668*7e382390SJung-uk Kim if (trans1[statefrom] == NO_TRANSITION)
669*7e382390SJung-uk Kim trans1[statefrom] = stateto;
670*7e382390SJung-uk Kim
671*7e382390SJung-uk Kim else if ((transchar[statefrom] != SYM_EPSILON) ||
672*7e382390SJung-uk Kim (trans2[statefrom] != NO_TRANSITION))
673*7e382390SJung-uk Kim flexfatal (_("found too many transitions in mkxtion()"));
674*7e382390SJung-uk Kim
675*7e382390SJung-uk Kim else { /* second out-transition for an epsilon state */
676*7e382390SJung-uk Kim ++eps2;
677*7e382390SJung-uk Kim trans2[statefrom] = stateto;
678*7e382390SJung-uk Kim }
679*7e382390SJung-uk Kim }
680*7e382390SJung-uk Kim
681*7e382390SJung-uk Kim /* new_rule - initialize for a new rule */
682*7e382390SJung-uk Kim
new_rule(void)683*7e382390SJung-uk Kim void new_rule (void)
684*7e382390SJung-uk Kim {
685*7e382390SJung-uk Kim if (++num_rules >= current_max_rules) {
686*7e382390SJung-uk Kim ++num_reallocs;
687*7e382390SJung-uk Kim current_max_rules += MAX_RULES_INCREMENT;
688*7e382390SJung-uk Kim rule_type = reallocate_integer_array (rule_type,
689*7e382390SJung-uk Kim current_max_rules);
690*7e382390SJung-uk Kim rule_linenum = reallocate_integer_array (rule_linenum,
691*7e382390SJung-uk Kim current_max_rules);
692*7e382390SJung-uk Kim rule_useful = reallocate_integer_array (rule_useful,
693*7e382390SJung-uk Kim current_max_rules);
694*7e382390SJung-uk Kim rule_has_nl = reallocate_bool_array (rule_has_nl,
695*7e382390SJung-uk Kim current_max_rules);
696*7e382390SJung-uk Kim }
697*7e382390SJung-uk Kim
698*7e382390SJung-uk Kim if (num_rules > MAX_RULE)
699*7e382390SJung-uk Kim lerr (_("too many rules (> %d)!"), MAX_RULE);
700*7e382390SJung-uk Kim
701*7e382390SJung-uk Kim rule_linenum[num_rules] = linenum;
702*7e382390SJung-uk Kim rule_useful[num_rules] = false;
703*7e382390SJung-uk Kim rule_has_nl[num_rules] = false;
704*7e382390SJung-uk Kim }
705