1 static const char rcsid[] = "$Id: s_regex.c,v 1.10 2004/04/30 17:00:58 will Exp $";
2 /*
3  * regex - Regular expression pattern matching
4  *         and replacement
5  *
6  *
7  * By:  Ozan S. Yigit (oz)
8  *      Dept. of Computer Science
9  *      York University
10  *
11  *
12  * These routines are the PUBLIC DOMAIN equivalents
13  * of regex routines as found in 4.nBSD UN*X, with minor
14  * extensions.
15  *
16  * These routines are derived from various implementations
17  * found in software tools books, and Conroy's grep. They
18  * are NOT derived from licensed/restricted software.
19  * For more interesting/academic/complicated implementations,
20  * see Henry Spencer's regexp routines, or GNU Emacs pattern
21  * matching module.
22  *
23  * ** NAMES CHANGED TO BE PREFIXED WITH s_, TO AVOID CONFLICTS **
24  * ** -Will Deich, Feb 1997 **
25  *
26  * Routines:
27  *      s_re_comp:      compile a regular expression into
28  *                      a DFA.
29  *
30  *			char *s_re_comp(s)
31  *			char *s;
32  *
33  *      s_re_exec:      execute the DFA to match a pattern.
34  *
35  *			int s_re_exec(s)
36  *			char *s;
37  *
38  *	s_re_modw	change s_re_exec's understanding of what
39  *			a "word" looks like (for \< and \>)
40  *			by adding into the hidden word-character
41  *			table.
42  *
43  *			void s_re_modw(s)
44  *			char *s;
45  *
46  *      s_re_subs:	substitute the matched portions in
47  *              	a new string.
48  *
49  *			int s_re_subs(src, dst)
50  *			char *src;
51  *			char *dst;
52  *
53  *	s_re_fail:	failure routine for s_re_exec.
54  *
55  *			void s_re_fail(msg, op)
56  *			char *msg;
57  *			char op;
58  *
59  * Regular Expressions:
60  *
61  *      [1]     char    matches itself, unless it is a special
62  *                      character (metachar): . \ [ ] * + ^ $
63  *
64  *      [2]     .       matches any character.
65  *
66  *      [3]     \       matches the character following it, except
67  *			when followed by a left or right round bracket,
68  *			a digit 1 to 9 or a left or right angle bracket.
69  *			(see [7], [8] and [9])
70  *			It is used as an escape character for all
71  *			other meta-characters, and itself. When used
72  *			in a set ([4]), it is treated as an ordinary
73  *			character.
74  *
75  *      [4]     [set]   matches one of the characters in the set.
76  *                      If the first character in the set is "^",
77  *                      it matches a character NOT in the set. A
78  *                      shorthand S-E is used to specify a set of
79  *                      characters S upto E, inclusive. The special
80  *                      characters "]" and "-" have no special
81  *                      meaning if they appear as the first chars
82  *                      in the set.
83  *                      examples:        match:
84  *
85  *                              [a-z]    any lowercase alpha
86  *
87  *                              [^]-]    any char except ] and -
88  *
89  *                              [^A-Z]   any char except uppercase
90  *                                       alpha
91  *
92  *                              [a-zA-Z] any alpha
93  *
94  *      [5]     *       any regular expression form [1] to [4], followed by
95  *                      closure char (*) matches zero or more matches of
96  *                      that form.
97  *
98  *      [6]     +       same as [5], except it matches one or more.
99  *
100  *      [7]             a regular expression in the form [1] to [10], enclosed
101  *                      as \(form\) matches what form matches. The enclosure
102  *                      creates a set of tags, used for [8] and for
103  *                      pattern substution. The tagged forms are numbered
104  *			starting from 1.
105  *
106  *      [8]             a \ followed by a digit 1 to 9 matches whatever a
107  *                      previously tagged regular expression ([7]) matched.
108  *
109  *	[9]	\<	a regular expression starting with a \< construct
110  *		\>	and/or ending with a \> construct, restricts the
111  *			pattern matching to the beginning of a word, and/or
112  *			the end of a word. A word is defined to be a character
113  *			string beginning and/or ending with the characters
114  *			A-Z a-z 0-9 and _. It must also be preceded and/or
115  *			followed by any character outside those mentioned.
116  *
117  *      [10]            a composite regular expression xy where x and y
118  *                      are in the form [1] to [10] matches the longest
119  *                      match of x followed by a match for y.
120  *
121  *      [11]	^	a regular expression starting with a ^ character
122  *		$	and/or ending with a $ character, restricts the
123  *                      pattern matching to the beginning of the line,
124  *                      or the end of line. [anchors] Elsewhere in the
125  *			pattern, ^ and $ are treated as ordinary characters.
126  *
127  *
128  * Acknowledgements:
129  *
130  *	HCR's Hugh Redelmeier has been most helpful in various
131  *	stages of development. He convinced me to include BOW
132  *	and EOW constructs, originally invented by Rob Pike at
133  *	the University of Toronto.
134  *
135  * References:
136  *              Software tools			Kernighan & Plauger
137  *              Software tools in Pascal        Kernighan & Plauger
138  *              Grep [rsx-11 C dist]            David Conroy
139  *		ed - text editor		Un*x Programmer's Manual
140  *		Advanced editing on Un*x	B. W. Kernighan
141  *		RegExp routines			Henry Spencer
142  *
143  * Notes:
144  *
145  *	This implementation uses a bit-set representation for character
146  *	classes for speed and compactness. Each character is represented
147  *	by one bit in a 128-bit block. Thus, CCL or NCL always takes a
148  *	constant 16 bytes in the internal dfa, and s_re_exec does a single
149  *	bit comparison to locate the character in the set.
150  *
151  * Examples:
152  *
153  *	pattern:	foo*.*
154  *	compile:	CHR f CHR o CLO CHR o END CLO ANY END END
155  *	matches:	fo foo fooo foobar fobar foxx ...
156  *
157  *	pattern:	fo[ob]a[rz]
158  *	compile:	CHR f CHR o CCL 2 o b CHR a CCL bitset END
159  *	matches:	fobar fooar fobaz fooaz
160  *
161  *	pattern:	foo\\+
162  *	compile:	CHR f CHR o CHR o CHR \ CLO CHR \ END END
163  *	matches:	foo\ foo\\ foo\\\  ...
164  *
165  *	pattern:	\(foo\)[1-3]\1	(same as foo[1-3]foo)
166  *	compile:	BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
167  *	matches:	foo1foo foo2foo foo3foo
168  *
169  *	pattern:	\(fo.*\)-\1
170  *	compile:	BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
171  *	matches:	foo-foo fo-fo fob-fob foobar-foobar ...
172  *
173  */
174 
175 #define MAXDFA  1024
176 #define MAXTAG  10
177 
178 #define OKP     1
179 #define NOP     0
180 
181 #define CHR     1
182 #define ANY     2
183 #define CCL     3
184 #define NCL     4
185 #define BOL     5
186 #define EOL     6
187 #define BOT     7
188 #define EOT     8
189 #define BOW	9
190 #define EOW	10
191 #define REF     11
192 #define CLO     12
193 
194 #define END     0
195 
196 /*
197  * The following defines are not meant
198  * to be changeable. They are for readibility
199  * only.
200  *
201  */
202 #define MAXCHR	128
203 #define CHRBIT	8
204 #define BITBLK	MAXCHR/CHRBIT
205 #define BLKIND	0170
206 #define BITIND	07
207 
208 #define ASCIIB	0177
209 
210 typedef unsigned char CHAR;
211 
212 static int  tagstk[MAXTAG];             /* subpat tag stack..*/
213 static CHAR dfa[MAXDFA];		/* automaton..       */
214 static int  sta = NOP;               	/* status of lastpat */
215 
216 static CHAR bittab[BITBLK];		/* bit table for CCL */
217 
218 static void
chset(c)219 chset(c)
220 register CHAR c;
221 {
222     bittab[((int) ((c)&BLKIND))>>3] |= 1<<((c)& (unsigned) BITIND);
223 }
224 
225 #define badpat(x)	return(*dfa = END, x)
226 #define store(x)	*mp++ = x
227 
228 char *
s_re_comp(pat)229 s_re_comp(pat)
230 char *pat;
231 {
232 	register char *p;               /* pattern pointer   */
233 	register CHAR *mp=dfa;          /* dfa pointer       */
234 	register CHAR *lp;              /* saved pointer..   */
235 	register CHAR *sp=dfa;          /* another one..     */
236 
237 	register int tagi = 0;          /* tag stack index   */
238 	register int tagc = 1;          /* actual tag count  */
239 
240 	register int n;
241 	int c1, c2;
242 
243 	if (!pat || !*pat) {
244 		if (sta)
245 			return(0);
246 		else
247 			badpat("No previous regular expression");
248 	}
249 	sta = NOP;
250 
251 	for (p = pat; *p; p++) {
252 		lp = mp;
253 		switch(*p) {
254 
255 		case '.':               /* match any char..  */
256 			store(ANY);
257 			break;
258 
259 		case '^':               /* match beginning.. */
260 			if (p == pat)
261 				store(BOL);
262 			else {
263 				store(CHR);
264 				store(*p);
265 			}
266 			break;
267 
268 		case '$':               /* match endofline.. */
269 			if (!*(p+1))
270 				store(EOL);
271 			else {
272 				store(CHR);
273 				store(*p);
274 			}
275 			break;
276 
277 		case '[':               /* match char class..*/
278 
279 			if (*++p == '^') {
280 				store(NCL);
281 				p++;
282 			}
283 			else
284 				store(CCL);
285 
286 			if (*p == '-')		/* real dash */
287 				chset(*p++);
288 			if (*p == ']')		/* real brac */
289 				chset(*p++);
290 			while (*p && *p != ']') {
291 				if (*p == '-' && *(p+1) && *(p+1) != ']') {
292 					p++;
293 					c1 = *(p-2) + 1;
294 					c2 = *p++;
295 					while (c1 <= c2)
296 						chset(c1++);
297 				}
298 #ifdef EXTEND
299 				else if (*p == '\\' && *(p+1)) {
300 					p++;
301 					chset(*p++);
302 				}
303 #endif
304 				else
305 					chset(*p++);
306 			}
307 			if (!*p)
308 				badpat("Missing ]");
309 
310 			for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
311 				store(bittab[n]);
312 
313 			break;
314 
315 		case '*':               /* match 0 or more.. */
316 		case '+':               /* match 1 or more.. */
317 			if (p == pat)
318 				badpat("Empty closure");
319 			lp = sp;                /* previous opcode */
320 			if (*lp == CLO)         /* equivalence..   */
321 				break;
322 			switch(*lp) {
323 
324 			case BOL:
325 			case BOT:
326 			case EOT:
327 			case BOW:
328 			case EOW:
329 			case REF:
330 				badpat("Illegal closure");
331 			default:
332 				break;
333 			}
334 
335 			if (*p == '+')
336 				for (sp = mp; lp < sp; lp++)
337 					store(*lp);
338 
339 			store(END);
340 			store(END);
341 			sp = mp;
342 			while (--mp > lp)
343 				*mp = mp[-1];
344 			store(CLO);
345 			mp = sp;
346 			break;
347 
348 		case '\\':              /* tags, backrefs .. */
349 			switch(*++p) {
350 
351 			case '(':
352 				if (tagc < MAXTAG) {
353 					tagstk[++tagi] = tagc;
354 					store(BOT);
355 					store(tagc++);
356 				}
357 				else
358 					badpat("Too many \\(\\) pairs");
359 				break;
360 			case ')':
361 				if (*sp == BOT)
362 					badpat("Null pattern inside \\(\\)");
363 				if (tagi > 0) {
364 					store(EOT);
365 					store(tagstk[tagi--]);
366 				}
367 				else
368 					badpat("Unmatched \\)");
369 				break;
370 			case '<':
371 				store(BOW);
372 				break;
373 			case '>':
374 				if (*sp == BOW)
375 					badpat("Null pattern inside \\<\\>");
376 				store(EOW);
377 				break;
378 			case '1':
379 			case '2':
380 			case '3':
381 			case '4':
382 			case '5':
383 			case '6':
384 			case '7':
385 			case '8':
386 			case '9':
387 				n = *p-'0';
388 				if (tagi > 0 && tagstk[tagi] == n)
389 					badpat("Cyclical reference");
390 				if (tagc > n) {
391 					store(REF);
392 					store(n);
393 				}
394 				else
395 					badpat("Undetermined reference");
396 				break;
397 #ifdef EXTEND
398 			case 'b':
399 				store(CHR);
400 				store('\b');
401 				break;
402 			case 'n':
403 				store(CHR);
404 				store('\n');
405 				break;
406 			case 'f':
407 				store(CHR);
408 				store('\f');
409 				break;
410 			case 'r':
411 				store(CHR);
412 				store('\r');
413 				break;
414 			case 't':
415 				store(CHR);
416 				store('\t');
417 				break;
418 #endif
419 			default:
420 				store(CHR);
421 				store(*p);
422 			}
423 			break;
424 
425 		default :               /* an ordinary char  */
426 			store(CHR);
427 			store(*p);
428 			break;
429 		}
430 		sp = lp;
431 	}
432 	if (tagi > 0)
433 		badpat("Unmatched \\(");
434 	store(END);
435 	sta = OKP;
436 	return(0);
437 }
438 
439 
440 static char *bol;
441 static char *bopat[MAXTAG];
442 static char *eopat[MAXTAG];
443 static char *pmatch();
444 
445 /*
446  * s_re_exec:
447  * 	execute dfa to find a match.
448  *
449  *	special cases: (dfa[0])
450  *		BOL
451  *			Match only once, starting from the
452  *			beginning.
453  *		CHR
454  *			First locate the character without
455  *			calling pmatch, and if found, call
456  *			pmatch for the remaining string.
457  *		END
458  *			s_re_comp failed, poor luser did not
459  *			check for it. Fail fast.
460  *
461  *	If a match is found, bopat[0] and eopat[0] are set
462  *	to the beginning and the end of the matched fragment,
463  *	respectively.
464  *
465  */
466 
467 int
s_re_exec(lp)468 s_re_exec(lp)
469 register char *lp;
470 {
471 	register char c;
472 	register char *ep = 0;
473 	register CHAR *ap = dfa;
474 
475 	bol = lp;
476 
477 	bopat[0] = 0;
478 	bopat[1] = 0;
479 	bopat[2] = 0;
480 	bopat[3] = 0;
481 	bopat[4] = 0;
482 	bopat[5] = 0;
483 	bopat[6] = 0;
484 	bopat[7] = 0;
485 	bopat[8] = 0;
486 	bopat[9] = 0;
487 
488 	switch(*ap) {
489 
490 	case BOL:			/* anchored: match from BOL only */
491 		ep = pmatch(lp,ap);
492 		break;
493 	case CHR:			/* ordinary char: locate it fast */
494 		c = *(ap+1);
495 		while (*lp && *lp != c)
496 			lp++;
497 		if (!*lp)		/* if EOS, fail, else fall thru. */
498 			return(0);
499 	default:			/* regular matching all the way. */
500 		while (*lp) {
501 			if ((ep = pmatch(lp,ap)))
502 				break;
503 			lp++;
504 		}
505 		break;
506 	case END:			/* munged automaton. fail always */
507 		return(0);
508 	}
509 	if (!ep)
510 		return(0);
511 
512 	bopat[0] = lp;
513 	eopat[0] = ep;
514 	return(1);
515 }
516 
517 /*
518  * pmatch:
519  *	internal routine for the hard part
520  *
521  * 	This code is mostly snarfed from an early
522  * 	grep written by David Conroy. The backref and
523  * 	tag stuff, and various other mods are by oZ.
524  *
525  *	special cases: (dfa[n], dfa[n+1])
526  *		CLO ANY
527  *			We KNOW ".*" will match ANYTHING
528  *			upto the end of line. Thus, go to
529  *			the end of line straight, without
530  *			calling pmatch recursively. As in
531  *			the other closure cases, the remaining
532  *			pattern must be matched by moving
533  *			backwards on the string recursively,
534  *			to find a match for xy (x is ".*" and
535  *			y is the remaining pattern) where
536  *			the match satisfies the LONGEST match
537  *			for x followed by a match for y.
538  *		CLO CHR
539  *			We can again scan the string forward
540  *			for the single char without recursion,
541  *			and at the point of failure, we execute
542  *			the remaining dfa recursively, as
543  *			described above.
544  *
545  *	At the end of a successful match, bopat[n] and eopat[n]
546  *	are set to the beginning and end of subpatterns matched
547  *	by tagged expressions (n = 1 to 9).
548  *
549  */
550 
551 extern void s_re_fail();
552 
553 /*
554  * character classification table for word boundary
555  * operators BOW and EOW. the reason for not using
556  * ctype macros is that we can let the user add into
557  * our own table. see s_re_modw. This table is not in
558  * the bitset form, since we may wish to extend it
559  * in the future for other character classifications.
560  *
561  *	TRUE for 0-9 A-Z a-z _
562  */
563 static char chrtyp[MAXCHR] = {
564 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 	0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
569 	1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
570 	0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
571 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
572 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
573 	1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
574 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
575 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
576 	1, 1, 1, 0, 0, 0, 0, 0
577 	};
578 
579 #define inascii(x)	(0177&(x))
580 #define iswordc(x) 	chrtyp[inascii(x)]
581 #define isinset(x,y) 	((x)[((y)&BLKIND)>>3] & (1<<((y)&BITIND)))
582 
583 /*
584  * skip values for CLO XXX to skip past the closure
585  *
586  */
587 
588 #define ANYSKIP	2 	/* CLO ANY END ...	   */
589 #define CHRSKIP	3	/* CLO CHR chr END ...	   */
590 #define CCLSKIP 18	/* CLO CCL 16bytes END ... */
591 
592 static char *
pmatch(lp,ap)593 pmatch(lp, ap)
594 register char *lp;
595 register CHAR *ap;
596 {
597 	register char *e;		/* extra pointer for CLO */
598 	register char *bp;		/* beginning of subpat.. */
599 	register char *ep;		/* ending of subpat..	 */
600 	register int op, c, n;
601 	char *are;			/* to save the line ptr. */
602 
603 	while ((op = *ap++) != END)
604 		switch(op) {
605 
606 		case CHR:
607 			if ((unsigned char) *lp++ != *ap++)
608 				return(0);
609 			break;
610 		case ANY:
611 			if (!*lp++)
612 				return(0);
613 			break;
614 		case CCL:
615 			c = *lp++;
616 			if (!isinset(ap,c))
617 				return(0);
618 			ap += BITBLK;
619 			break;
620 		case NCL:
621 			c = *lp++;
622 			if (isinset(ap,c))
623 				return(0);
624 			ap += BITBLK;
625 			break;
626 		case BOL:
627 			if (lp != bol)
628 				return(0);
629 			break;
630 		case EOL:
631 			if (*lp)
632 				return(0);
633 			break;
634 		case BOT:
635 			bopat[*ap++] = lp;
636 			break;
637 		case EOT:
638 			eopat[*ap++] = lp;
639 			break;
640  		case BOW:
641 			if (!(lp!=bol && iswordc(lp[-1])) && iswordc(*lp))
642 				break;
643 			return(0);
644 		case EOW:
645 			if ((lp!=bol && iswordc(lp[-1])) && !iswordc(*lp))
646 				break;
647 			return(0);
648 		case REF:
649 			n = *ap++;
650 			bp = bopat[n];
651 			ep = eopat[n];
652 			while (bp < ep)
653 				if (*bp++ != *lp++)
654 					return(0);
655 			break;
656 		case CLO:
657 			are = lp;
658 			switch(*ap) {
659 
660 			case ANY:
661 				while (*lp)
662 					lp++;
663 				n = ANYSKIP;
664 				break;
665 			case CHR:
666 				c = *(ap+1);
667 				while (*lp && c == *lp)
668 					lp++;
669 				n = CHRSKIP;
670 				break;
671 			case CCL:
672 			case NCL:
673 				while (*lp && (e = pmatch(lp, ap)))
674 					lp = e;
675 				n = CCLSKIP;
676 				break;
677 			default:
678 				s_re_fail("closure: bad dfa.", *ap);
679 				return(0);
680 			}
681 
682 			ap += n;
683 
684 			while (lp >= are) {
685 				if ((e = pmatch(lp, ap)))
686 					return(e);
687 				--lp;
688 			}
689 			return(0);
690 		default:
691 			s_re_fail("s_re_exec: bad dfa.", op);
692 			return(0);
693 		}
694 	return(lp);
695 }
696 
697 /*
698  * s_re_modw:
699  *	add new characters into the word table to
700  *	change the s_re_exec's understanding of what
701  *	a word should look like. Note that we only
702  *	accept additions into the word definition.
703  *
704  *	If the string parameter is 0 or null string,
705  *	the table is reset back to the default, which
706  *	contains A-Z a-z 0-9 _. [We use the compact
707  *	bitset representation for the default table]
708  *
709  */
710 
711 static unsigned char deftab[16] = {
712 	0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
713 	0376, 0377, 0377, 007
714 };
715 
716 void
s_re_modw(s)717 s_re_modw(s)
718 register char *s;
719 {
720 	register int i;
721 
722 	if (!s || !*s) {
723 		for (i = 0; i < MAXCHR; i++)
724 			if (!isinset(deftab,i))
725 				iswordc(i) = 0;
726 	}
727 	else
728 		while(*s)
729 			iswordc(*s++) = 1;
730 }
731 
732 /*
733  * s_re_subs:
734  *	substitute the matched portions of the src in
735  *	dst.
736  *
737  *	&	substitute the entire matched pattern.
738  *
739  *	\digit	substitute a subpattern, with the given
740  *		tag number. Tags are numbered from 1 to
741  *		9. If the particular tagged subpattern
742  *		does not exist, null is substituted.
743  *
744  */
745 int
s_re_subs(src,dst)746 s_re_subs(src, dst)
747 register char *src;
748 register char *dst;
749 {
750 	register char c;
751 	register int  pin;
752 	register char *bp;
753 	register char *ep;
754 
755 	if (!*src || !bopat[0])
756 		return(0);
757 
758 	while ((c = *src++)) {
759 		switch(c) {
760 
761 		case '&':
762 			pin = 0;
763 			break;
764 
765 		case '\\':
766 			c = *src++;
767 			if (c >= '0' && c <= '9') {
768 				pin = c - '0';
769 				break;
770 			}
771 
772 		default:
773 			*dst++ = c;
774 			continue;
775 		}
776 
777 		if ((bp = bopat[pin]) && (ep = eopat[pin])) {
778 			while (*bp && bp < ep)
779 				*dst++ = *bp++;
780 			if (bp < ep)
781 				return(0);
782 		}
783 	}
784 	*dst = (char) 0;
785 	return(1);
786 }
787 
788 #ifdef DEBUG
789 /*
790  * symbolic - produce a symbolic dump of the
791  *            dfa
792  */
symbolic(s)793 symbolic(s)
794 char *s;
795 {
796 	printf("pattern: %s\n", s);
797 	printf("dfacode:\n");
798 	dfadump(dfa);
799 }
800 
801 static
dfadump(ap)802 dfadump(ap)
803 CHAR *ap;
804 {
805 	register int n;
806 
807 	while (*ap != END)
808 		switch(*ap++) {
809 		case CLO:
810 			printf("CLOSURE");
811 			dfadump(ap);
812 			switch(*ap) {
813 			case CHR:
814 				n = CHRSKIP;
815 				break;
816 			case ANY:
817 				n = ANYSKIP;
818 				break;
819 			case CCL:
820 			case NCL:
821 				n = CCLSKIP;
822 				break;
823 			}
824 			ap += n;
825 			break;
826 		case CHR:
827 			printf("\tCHR %c\n",*ap++);
828 			break;
829 		case ANY:
830 			printf("\tANY .\n");
831 			break;
832 		case BOL:
833 			printf("\tBOL -\n");
834 			break;
835 		case EOL:
836 			printf("\tEOL -\n");
837 			break;
838 		case BOT:
839 			printf("BOT: %d\n",*ap++);
840 			break;
841 		case EOT:
842 			printf("EOT: %d\n",*ap++);
843 			break;
844 		case BOW:
845 			printf("BOW\n");
846 			break;
847 		case EOW:
848 			printf("EOW\n");
849 			break;
850 		case REF:
851 			printf("REF: %d\n",*ap++);
852 			break;
853 		case CCL:
854 			printf("\tCCL [");
855 			for (n = 0; n < MAXCHR; n++)
856 				if (isinset(ap,(CHAR)n))
857 					printf("%c",n);
858 			printf("]\n");
859 			ap += BITBLK;
860 			break;
861 		case NCL:
862 			printf("\tNCL [");
863 			for (n = 0; n < MAXCHR; n++)
864 				if (isinset(ap,(CHAR)n))
865 					printf("%c",n);
866 			printf("]\n");
867 			ap += BITBLK;
868 			break;
869 		default:
870 			printf("bad dfa. opcode %o\n", ap[-1]);
871 			exit(1);
872 			break;
873 		}
874 }
875 #endif
876